1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3   2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4   Free Software Foundation, Inc.
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 3, or (at your option)
11any later version.
12
13GCC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3.  If not see
20<http://www.gnu.org/licenses/>.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "rtl.h"
27#include "tree.h"
28#include "tm_p.h"
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
34#include "output.h"
35#include "insn-codes.h"
36#include "insn-attr.h"
37#include "flags.h"
38#include "except.h"
39#include "function.h"
40#include "recog.h"
41#include "expr.h"
42#include "optabs.h"
43#include "toplev.h"
44#include "basic-block.h"
45#include "ggc.h"
46#include "target.h"
47#include "target-def.h"
48#include "langhooks.h"
49#include "cgraph.h"
50#include "gimple.h"
51#include "dwarf2.h"
52#include "df.h"
53#include "tm-constrs.h"
54#include "params.h"
55#include "cselib.h"
56#include "debug.h"
57#include "dwarf2out.h"
58
59static rtx legitimize_dllimport_symbol (rtx, bool);
60
61#ifndef CHECK_STACK_LIMIT
62#define CHECK_STACK_LIMIT (-1)
63#endif
64
65/* Return index of given mode in mult and division cost tables.  */
66#define MODE_INDEX(mode)					\
67  ((mode) == QImode ? 0						\
68   : (mode) == HImode ? 1					\
69   : (mode) == SImode ? 2					\
70   : (mode) == DImode ? 3					\
71   : 4)
72
73/* Processor costs (relative to an add) */
74/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
75#define COSTS_N_BYTES(N) ((N) * 2)
76
77#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
78
79const
80struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81  COSTS_N_BYTES (2),			/* cost of an add instruction */
82  COSTS_N_BYTES (3),			/* cost of a lea instruction */
83  COSTS_N_BYTES (2),			/* variable shift costs */
84  COSTS_N_BYTES (3),			/* constant shift costs */
85  {COSTS_N_BYTES (3),			/* cost of starting multiply for QI */
86   COSTS_N_BYTES (3),			/*                               HI */
87   COSTS_N_BYTES (3),			/*                               SI */
88   COSTS_N_BYTES (3),			/*                               DI */
89   COSTS_N_BYTES (5)},			/*                            other */
90  0,					/* cost of multiply per each bit set */
91  {COSTS_N_BYTES (3),			/* cost of a divide/mod for QI */
92   COSTS_N_BYTES (3),			/*                          HI */
93   COSTS_N_BYTES (3),			/*                          SI */
94   COSTS_N_BYTES (3),			/*                          DI */
95   COSTS_N_BYTES (5)},			/*                       other */
96  COSTS_N_BYTES (3),			/* cost of movsx */
97  COSTS_N_BYTES (3),			/* cost of movzx */
98  0,					/* "large" insn */
99  2,					/* MOVE_RATIO */
100  2,					/* cost for loading QImode using movzbl */
101  {2, 2, 2},				/* cost of loading integer registers
102					   in QImode, HImode and SImode.
103					   Relative to reg-reg move (2).  */
104  {2, 2, 2},				/* cost of storing integer registers */
105  2,					/* cost of reg,reg fld/fst */
106  {2, 2, 2},				/* cost of loading fp registers
107					   in SFmode, DFmode and XFmode */
108  {2, 2, 2},				/* cost of storing fp registers
109					   in SFmode, DFmode and XFmode */
110  3,					/* cost of moving MMX register */
111  {3, 3},				/* cost of loading MMX registers
112					   in SImode and DImode */
113  {3, 3},				/* cost of storing MMX registers
114					   in SImode and DImode */
115  3,					/* cost of moving SSE register */
116  {3, 3, 3},				/* cost of loading SSE registers
117					   in SImode, DImode and TImode */
118  {3, 3, 3},				/* cost of storing SSE registers
119					   in SImode, DImode and TImode */
120  3,					/* MMX or SSE register to integer */
121  0,					/* size of l1 cache  */
122  0,					/* size of l2 cache  */
123  0,					/* size of prefetch block */
124  0,					/* number of parallel prefetches */
125  2,					/* Branch cost */
126  COSTS_N_BYTES (2),			/* cost of FADD and FSUB insns.  */
127  COSTS_N_BYTES (2),			/* cost of FMUL instruction.  */
128  COSTS_N_BYTES (2),			/* cost of FDIV instruction.  */
129  COSTS_N_BYTES (2),			/* cost of FABS instruction.  */
130  COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
131  COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
132  {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133   {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134  {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135   {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136  1,                                    /* scalar_stmt_cost.  */
137  1,                                    /* scalar load_cost.  */
138  1,                                    /* scalar_store_cost.  */
139  1,                                    /* vec_stmt_cost.  */
140  1,                                    /* vec_to_scalar_cost.  */
141  1,                                    /* scalar_to_vec_cost.  */
142  1,                                    /* vec_align_load_cost.  */
143  1,                                    /* vec_unalign_load_cost.  */
144  1,                                    /* vec_store_cost.  */
145  1,                                    /* cond_taken_branch_cost.  */
146  1,                                    /* cond_not_taken_branch_cost.  */
147};
148
149/* Processor costs (relative to an add) */
150static const
151struct processor_costs i386_cost = {	/* 386 specific costs */
152  COSTS_N_INSNS (1),			/* cost of an add instruction */
153  COSTS_N_INSNS (1),			/* cost of a lea instruction */
154  COSTS_N_INSNS (3),			/* variable shift costs */
155  COSTS_N_INSNS (2),			/* constant shift costs */
156  {COSTS_N_INSNS (6),			/* cost of starting multiply for QI */
157   COSTS_N_INSNS (6),			/*                               HI */
158   COSTS_N_INSNS (6),			/*                               SI */
159   COSTS_N_INSNS (6),			/*                               DI */
160   COSTS_N_INSNS (6)},			/*                               other */
161  COSTS_N_INSNS (1),			/* cost of multiply per each bit set */
162  {COSTS_N_INSNS (23),			/* cost of a divide/mod for QI */
163   COSTS_N_INSNS (23),			/*                          HI */
164   COSTS_N_INSNS (23),			/*                          SI */
165   COSTS_N_INSNS (23),			/*                          DI */
166   COSTS_N_INSNS (23)},			/*                          other */
167  COSTS_N_INSNS (3),			/* cost of movsx */
168  COSTS_N_INSNS (2),			/* cost of movzx */
169  15,					/* "large" insn */
170  3,					/* MOVE_RATIO */
171  4,					/* cost for loading QImode using movzbl */
172  {2, 4, 2},				/* cost of loading integer registers
173					   in QImode, HImode and SImode.
174					   Relative to reg-reg move (2).  */
175  {2, 4, 2},				/* cost of storing integer registers */
176  2,					/* cost of reg,reg fld/fst */
177  {8, 8, 8},				/* cost of loading fp registers
178					   in SFmode, DFmode and XFmode */
179  {8, 8, 8},				/* cost of storing fp registers
180					   in SFmode, DFmode and XFmode */
181  2,					/* cost of moving MMX register */
182  {4, 8},				/* cost of loading MMX registers
183					   in SImode and DImode */
184  {4, 8},				/* cost of storing MMX registers
185					   in SImode and DImode */
186  2,					/* cost of moving SSE register */
187  {4, 8, 16},				/* cost of loading SSE registers
188					   in SImode, DImode and TImode */
189  {4, 8, 16},				/* cost of storing SSE registers
190					   in SImode, DImode and TImode */
191  3,					/* MMX or SSE register to integer */
192  0,					/* size of l1 cache  */
193  0,					/* size of l2 cache  */
194  0,					/* size of prefetch block */
195  0,					/* number of parallel prefetches */
196  1,					/* Branch cost */
197  COSTS_N_INSNS (23),			/* cost of FADD and FSUB insns.  */
198  COSTS_N_INSNS (27),			/* cost of FMUL instruction.  */
199  COSTS_N_INSNS (88),			/* cost of FDIV instruction.  */
200  COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
201  COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
202  COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
203  {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204   DUMMY_STRINGOP_ALGS},
205  {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206   DUMMY_STRINGOP_ALGS},
207  1,                                    /* scalar_stmt_cost.  */
208  1,                                    /* scalar load_cost.  */
209  1,                                    /* scalar_store_cost.  */
210  1,                                    /* vec_stmt_cost.  */
211  1,                                    /* vec_to_scalar_cost.  */
212  1,                                    /* scalar_to_vec_cost.  */
213  1,                                    /* vec_align_load_cost.  */
214  2,                                    /* vec_unalign_load_cost.  */
215  1,                                    /* vec_store_cost.  */
216  3,                                    /* cond_taken_branch_cost.  */
217  1,                                    /* cond_not_taken_branch_cost.  */
218};
219
220static const
221struct processor_costs i486_cost = {	/* 486 specific costs */
222  COSTS_N_INSNS (1),			/* cost of an add instruction */
223  COSTS_N_INSNS (1),			/* cost of a lea instruction */
224  COSTS_N_INSNS (3),			/* variable shift costs */
225  COSTS_N_INSNS (2),			/* constant shift costs */
226  {COSTS_N_INSNS (12),			/* cost of starting multiply for QI */
227   COSTS_N_INSNS (12),			/*                               HI */
228   COSTS_N_INSNS (12),			/*                               SI */
229   COSTS_N_INSNS (12),			/*                               DI */
230   COSTS_N_INSNS (12)},			/*                               other */
231  1,					/* cost of multiply per each bit set */
232  {COSTS_N_INSNS (40),			/* cost of a divide/mod for QI */
233   COSTS_N_INSNS (40),			/*                          HI */
234   COSTS_N_INSNS (40),			/*                          SI */
235   COSTS_N_INSNS (40),			/*                          DI */
236   COSTS_N_INSNS (40)},			/*                          other */
237  COSTS_N_INSNS (3),			/* cost of movsx */
238  COSTS_N_INSNS (2),			/* cost of movzx */
239  15,					/* "large" insn */
240  3,					/* MOVE_RATIO */
241  4,					/* cost for loading QImode using movzbl */
242  {2, 4, 2},				/* cost of loading integer registers
243					   in QImode, HImode and SImode.
244					   Relative to reg-reg move (2).  */
245  {2, 4, 2},				/* cost of storing integer registers */
246  2,					/* cost of reg,reg fld/fst */
247  {8, 8, 8},				/* cost of loading fp registers
248					   in SFmode, DFmode and XFmode */
249  {8, 8, 8},				/* cost of storing fp registers
250					   in SFmode, DFmode and XFmode */
251  2,					/* cost of moving MMX register */
252  {4, 8},				/* cost of loading MMX registers
253					   in SImode and DImode */
254  {4, 8},				/* cost of storing MMX registers
255					   in SImode and DImode */
256  2,					/* cost of moving SSE register */
257  {4, 8, 16},				/* cost of loading SSE registers
258					   in SImode, DImode and TImode */
259  {4, 8, 16},				/* cost of storing SSE registers
260					   in SImode, DImode and TImode */
261  3,					/* MMX or SSE register to integer */
262  4,					/* size of l1 cache.  486 has 8kB cache
263					   shared for code and data, so 4kB is
264					   not really precise.  */
265  4,					/* size of l2 cache  */
266  0,					/* size of prefetch block */
267  0,					/* number of parallel prefetches */
268  1,					/* Branch cost */
269  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
270  COSTS_N_INSNS (16),			/* cost of FMUL instruction.  */
271  COSTS_N_INSNS (73),			/* cost of FDIV instruction.  */
272  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
273  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
274  COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
275  {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276   DUMMY_STRINGOP_ALGS},
277  {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278   DUMMY_STRINGOP_ALGS},
279  1,                                    /* scalar_stmt_cost.  */
280  1,                                    /* scalar load_cost.  */
281  1,                                    /* scalar_store_cost.  */
282  1,                                    /* vec_stmt_cost.  */
283  1,                                    /* vec_to_scalar_cost.  */
284  1,                                    /* scalar_to_vec_cost.  */
285  1,                                    /* vec_align_load_cost.  */
286  2,                                    /* vec_unalign_load_cost.  */
287  1,                                    /* vec_store_cost.  */
288  3,                                    /* cond_taken_branch_cost.  */
289  1,                                    /* cond_not_taken_branch_cost.  */
290};
291
292static const
293struct processor_costs pentium_cost = {
294  COSTS_N_INSNS (1),			/* cost of an add instruction */
295  COSTS_N_INSNS (1),			/* cost of a lea instruction */
296  COSTS_N_INSNS (4),			/* variable shift costs */
297  COSTS_N_INSNS (1),			/* constant shift costs */
298  {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
299   COSTS_N_INSNS (11),			/*                               HI */
300   COSTS_N_INSNS (11),			/*                               SI */
301   COSTS_N_INSNS (11),			/*                               DI */
302   COSTS_N_INSNS (11)},			/*                               other */
303  0,					/* cost of multiply per each bit set */
304  {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
305   COSTS_N_INSNS (25),			/*                          HI */
306   COSTS_N_INSNS (25),			/*                          SI */
307   COSTS_N_INSNS (25),			/*                          DI */
308   COSTS_N_INSNS (25)},			/*                          other */
309  COSTS_N_INSNS (3),			/* cost of movsx */
310  COSTS_N_INSNS (2),			/* cost of movzx */
311  8,					/* "large" insn */
312  6,					/* MOVE_RATIO */
313  6,					/* cost for loading QImode using movzbl */
314  {2, 4, 2},				/* cost of loading integer registers
315					   in QImode, HImode and SImode.
316					   Relative to reg-reg move (2).  */
317  {2, 4, 2},				/* cost of storing integer registers */
318  2,					/* cost of reg,reg fld/fst */
319  {2, 2, 6},				/* cost of loading fp registers
320					   in SFmode, DFmode and XFmode */
321  {4, 4, 6},				/* cost of storing fp registers
322					   in SFmode, DFmode and XFmode */
323  8,					/* cost of moving MMX register */
324  {8, 8},				/* cost of loading MMX registers
325					   in SImode and DImode */
326  {8, 8},				/* cost of storing MMX registers
327					   in SImode and DImode */
328  2,					/* cost of moving SSE register */
329  {4, 8, 16},				/* cost of loading SSE registers
330					   in SImode, DImode and TImode */
331  {4, 8, 16},				/* cost of storing SSE registers
332					   in SImode, DImode and TImode */
333  3,					/* MMX or SSE register to integer */
334  8,					/* size of l1 cache.  */
335  8,					/* size of l2 cache  */
336  0,					/* size of prefetch block */
337  0,					/* number of parallel prefetches */
338  2,					/* Branch cost */
339  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
340  COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
341  COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
342  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
343  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
344  COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
345  {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346   DUMMY_STRINGOP_ALGS},
347  {{libcall, {{-1, rep_prefix_4_byte}}},
348   DUMMY_STRINGOP_ALGS},
349  1,                                    /* scalar_stmt_cost.  */
350  1,                                    /* scalar load_cost.  */
351  1,                                    /* scalar_store_cost.  */
352  1,                                    /* vec_stmt_cost.  */
353  1,                                    /* vec_to_scalar_cost.  */
354  1,                                    /* scalar_to_vec_cost.  */
355  1,                                    /* vec_align_load_cost.  */
356  2,                                    /* vec_unalign_load_cost.  */
357  1,                                    /* vec_store_cost.  */
358  3,                                    /* cond_taken_branch_cost.  */
359  1,                                    /* cond_not_taken_branch_cost.  */
360};
361
362static const
363struct processor_costs pentiumpro_cost = {
364  COSTS_N_INSNS (1),			/* cost of an add instruction */
365  COSTS_N_INSNS (1),			/* cost of a lea instruction */
366  COSTS_N_INSNS (1),			/* variable shift costs */
367  COSTS_N_INSNS (1),			/* constant shift costs */
368  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
369   COSTS_N_INSNS (4),			/*                               HI */
370   COSTS_N_INSNS (4),			/*                               SI */
371   COSTS_N_INSNS (4),			/*                               DI */
372   COSTS_N_INSNS (4)},			/*                               other */
373  0,					/* cost of multiply per each bit set */
374  {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
375   COSTS_N_INSNS (17),			/*                          HI */
376   COSTS_N_INSNS (17),			/*                          SI */
377   COSTS_N_INSNS (17),			/*                          DI */
378   COSTS_N_INSNS (17)},			/*                          other */
379  COSTS_N_INSNS (1),			/* cost of movsx */
380  COSTS_N_INSNS (1),			/* cost of movzx */
381  8,					/* "large" insn */
382  6,					/* MOVE_RATIO */
383  2,					/* cost for loading QImode using movzbl */
384  {4, 4, 4},				/* cost of loading integer registers
385					   in QImode, HImode and SImode.
386					   Relative to reg-reg move (2).  */
387  {2, 2, 2},				/* cost of storing integer registers */
388  2,					/* cost of reg,reg fld/fst */
389  {2, 2, 6},				/* cost of loading fp registers
390					   in SFmode, DFmode and XFmode */
391  {4, 4, 6},				/* cost of storing fp registers
392					   in SFmode, DFmode and XFmode */
393  2,					/* cost of moving MMX register */
394  {2, 2},				/* cost of loading MMX registers
395					   in SImode and DImode */
396  {2, 2},				/* cost of storing MMX registers
397					   in SImode and DImode */
398  2,					/* cost of moving SSE register */
399  {2, 2, 8},				/* cost of loading SSE registers
400					   in SImode, DImode and TImode */
401  {2, 2, 8},				/* cost of storing SSE registers
402					   in SImode, DImode and TImode */
403  3,					/* MMX or SSE register to integer */
404  8,					/* size of l1 cache.  */
405  256,					/* size of l2 cache  */
406  32,					/* size of prefetch block */
407  6,					/* number of parallel prefetches */
408  2,					/* Branch cost */
409  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
410  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
411  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
412  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
413  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
414  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
415  /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416     the alignment).  For small blocks inline loop is still a noticeable win, for bigger
417     blocks either rep movsl or rep movsb is way to go.  Rep movsb has apparently
418     more expensive startup time in CPU, but after 4K the difference is down in the noise.
419   */
420  {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421			{8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422   DUMMY_STRINGOP_ALGS},
423  {{rep_prefix_4_byte, {{1024, unrolled_loop},
424  		        {8192, rep_prefix_4_byte}, {-1, libcall}}},
425   DUMMY_STRINGOP_ALGS},
426  1,                                    /* scalar_stmt_cost.  */
427  1,                                    /* scalar load_cost.  */
428  1,                                    /* scalar_store_cost.  */
429  1,                                    /* vec_stmt_cost.  */
430  1,                                    /* vec_to_scalar_cost.  */
431  1,                                    /* scalar_to_vec_cost.  */
432  1,                                    /* vec_align_load_cost.  */
433  2,                                    /* vec_unalign_load_cost.  */
434  1,                                    /* vec_store_cost.  */
435  3,                                    /* cond_taken_branch_cost.  */
436  1,                                    /* cond_not_taken_branch_cost.  */
437};
438
439static const
440struct processor_costs geode_cost = {
441  COSTS_N_INSNS (1),			/* cost of an add instruction */
442  COSTS_N_INSNS (1),			/* cost of a lea instruction */
443  COSTS_N_INSNS (2),			/* variable shift costs */
444  COSTS_N_INSNS (1),			/* constant shift costs */
445  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
446   COSTS_N_INSNS (4),			/*                               HI */
447   COSTS_N_INSNS (7),			/*                               SI */
448   COSTS_N_INSNS (7),			/*                               DI */
449   COSTS_N_INSNS (7)},			/*                               other */
450  0,					/* cost of multiply per each bit set */
451  {COSTS_N_INSNS (15),			/* cost of a divide/mod for QI */
452   COSTS_N_INSNS (23),			/*                          HI */
453   COSTS_N_INSNS (39),			/*                          SI */
454   COSTS_N_INSNS (39),			/*                          DI */
455   COSTS_N_INSNS (39)},			/*                          other */
456  COSTS_N_INSNS (1),			/* cost of movsx */
457  COSTS_N_INSNS (1),			/* cost of movzx */
458  8,					/* "large" insn */
459  4,					/* MOVE_RATIO */
460  1,					/* cost for loading QImode using movzbl */
461  {1, 1, 1},				/* cost of loading integer registers
462					   in QImode, HImode and SImode.
463					   Relative to reg-reg move (2).  */
464  {1, 1, 1},				/* cost of storing integer registers */
465  1,					/* cost of reg,reg fld/fst */
466  {1, 1, 1},				/* cost of loading fp registers
467					   in SFmode, DFmode and XFmode */
468  {4, 6, 6},				/* cost of storing fp registers
469					   in SFmode, DFmode and XFmode */
470
471  1,					/* cost of moving MMX register */
472  {1, 1},				/* cost of loading MMX registers
473					   in SImode and DImode */
474  {1, 1},				/* cost of storing MMX registers
475					   in SImode and DImode */
476  1,					/* cost of moving SSE register */
477  {1, 1, 1},				/* cost of loading SSE registers
478					   in SImode, DImode and TImode */
479  {1, 1, 1},				/* cost of storing SSE registers
480					   in SImode, DImode and TImode */
481  1,					/* MMX or SSE register to integer */
482  64,					/* size of l1 cache.  */
483  128,					/* size of l2 cache.  */
484  32,					/* size of prefetch block */
485  1,					/* number of parallel prefetches */
486  1,					/* Branch cost */
487  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
488  COSTS_N_INSNS (11),			/* cost of FMUL instruction.  */
489  COSTS_N_INSNS (47),			/* cost of FDIV instruction.  */
490  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
491  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
492  COSTS_N_INSNS (54),			/* cost of FSQRT instruction.  */
493  {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494   DUMMY_STRINGOP_ALGS},
495  {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496   DUMMY_STRINGOP_ALGS},
497  1,                                    /* scalar_stmt_cost.  */
498  1,                                    /* scalar load_cost.  */
499  1,                                    /* scalar_store_cost.  */
500  1,                                    /* vec_stmt_cost.  */
501  1,                                    /* vec_to_scalar_cost.  */
502  1,                                    /* scalar_to_vec_cost.  */
503  1,                                    /* vec_align_load_cost.  */
504  2,                                    /* vec_unalign_load_cost.  */
505  1,                                    /* vec_store_cost.  */
506  3,                                    /* cond_taken_branch_cost.  */
507  1,                                    /* cond_not_taken_branch_cost.  */
508};
509
510static const
511struct processor_costs k6_cost = {
512  COSTS_N_INSNS (1),			/* cost of an add instruction */
513  COSTS_N_INSNS (2),			/* cost of a lea instruction */
514  COSTS_N_INSNS (1),			/* variable shift costs */
515  COSTS_N_INSNS (1),			/* constant shift costs */
516  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
517   COSTS_N_INSNS (3),			/*                               HI */
518   COSTS_N_INSNS (3),			/*                               SI */
519   COSTS_N_INSNS (3),			/*                               DI */
520   COSTS_N_INSNS (3)},			/*                               other */
521  0,					/* cost of multiply per each bit set */
522  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
523   COSTS_N_INSNS (18),			/*                          HI */
524   COSTS_N_INSNS (18),			/*                          SI */
525   COSTS_N_INSNS (18),			/*                          DI */
526   COSTS_N_INSNS (18)},			/*                          other */
527  COSTS_N_INSNS (2),			/* cost of movsx */
528  COSTS_N_INSNS (2),			/* cost of movzx */
529  8,					/* "large" insn */
530  4,					/* MOVE_RATIO */
531  3,					/* cost for loading QImode using movzbl */
532  {4, 5, 4},				/* cost of loading integer registers
533					   in QImode, HImode and SImode.
534					   Relative to reg-reg move (2).  */
535  {2, 3, 2},				/* cost of storing integer registers */
536  4,					/* cost of reg,reg fld/fst */
537  {6, 6, 6},				/* cost of loading fp registers
538					   in SFmode, DFmode and XFmode */
539  {4, 4, 4},				/* cost of storing fp registers
540					   in SFmode, DFmode and XFmode */
541  2,					/* cost of moving MMX register */
542  {2, 2},				/* cost of loading MMX registers
543					   in SImode and DImode */
544  {2, 2},				/* cost of storing MMX registers
545					   in SImode and DImode */
546  2,					/* cost of moving SSE register */
547  {2, 2, 8},				/* cost of loading SSE registers
548					   in SImode, DImode and TImode */
549  {2, 2, 8},				/* cost of storing SSE registers
550					   in SImode, DImode and TImode */
551  6,					/* MMX or SSE register to integer */
552  32,					/* size of l1 cache.  */
553  32,					/* size of l2 cache.  Some models
554					   have integrated l2 cache, but
555					   optimizing for k6 is not important
556					   enough to worry about that.  */
557  32,					/* size of prefetch block */
558  1,					/* number of parallel prefetches */
559  1,					/* Branch cost */
560  COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
561  COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
562  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
563  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
564  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
565  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
566  {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567   DUMMY_STRINGOP_ALGS},
568  {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569   DUMMY_STRINGOP_ALGS},
570  1,                                    /* scalar_stmt_cost.  */
571  1,                                    /* scalar load_cost.  */
572  1,                                    /* scalar_store_cost.  */
573  1,                                    /* vec_stmt_cost.  */
574  1,                                    /* vec_to_scalar_cost.  */
575  1,                                    /* scalar_to_vec_cost.  */
576  1,                                    /* vec_align_load_cost.  */
577  2,                                    /* vec_unalign_load_cost.  */
578  1,                                    /* vec_store_cost.  */
579  3,                                    /* cond_taken_branch_cost.  */
580  1,                                    /* cond_not_taken_branch_cost.  */
581};
582
583static const
584struct processor_costs athlon_cost = {
585  COSTS_N_INSNS (1),			/* cost of an add instruction */
586  COSTS_N_INSNS (2),			/* cost of a lea instruction */
587  COSTS_N_INSNS (1),			/* variable shift costs */
588  COSTS_N_INSNS (1),			/* constant shift costs */
589  {COSTS_N_INSNS (5),			/* cost of starting multiply for QI */
590   COSTS_N_INSNS (5),			/*                               HI */
591   COSTS_N_INSNS (5),			/*                               SI */
592   COSTS_N_INSNS (5),			/*                               DI */
593   COSTS_N_INSNS (5)},			/*                               other */
594  0,					/* cost of multiply per each bit set */
595  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
596   COSTS_N_INSNS (26),			/*                          HI */
597   COSTS_N_INSNS (42),			/*                          SI */
598   COSTS_N_INSNS (74),			/*                          DI */
599   COSTS_N_INSNS (74)},			/*                          other */
600  COSTS_N_INSNS (1),			/* cost of movsx */
601  COSTS_N_INSNS (1),			/* cost of movzx */
602  8,					/* "large" insn */
603  9,					/* MOVE_RATIO */
604  4,					/* cost for loading QImode using movzbl */
605  {3, 4, 3},				/* cost of loading integer registers
606					   in QImode, HImode and SImode.
607					   Relative to reg-reg move (2).  */
608  {3, 4, 3},				/* cost of storing integer registers */
609  4,					/* cost of reg,reg fld/fst */
610  {4, 4, 12},				/* cost of loading fp registers
611					   in SFmode, DFmode and XFmode */
612  {6, 6, 8},				/* cost of storing fp registers
613					   in SFmode, DFmode and XFmode */
614  2,					/* cost of moving MMX register */
615  {4, 4},				/* cost of loading MMX registers
616					   in SImode and DImode */
617  {4, 4},				/* cost of storing MMX registers
618					   in SImode and DImode */
619  2,					/* cost of moving SSE register */
620  {4, 4, 6},				/* cost of loading SSE registers
621					   in SImode, DImode and TImode */
622  {4, 4, 5},				/* cost of storing SSE registers
623					   in SImode, DImode and TImode */
624  5,					/* MMX or SSE register to integer */
625  64,					/* size of l1 cache.  */
626  256,					/* size of l2 cache.  */
627  64,					/* size of prefetch block */
628  6,					/* number of parallel prefetches */
629  5,					/* Branch cost */
630  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
631  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
632  COSTS_N_INSNS (24),			/* cost of FDIV instruction.  */
633  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
634  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
635  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
636  /* For some reason, Athlon deals better with REP prefix (relative to loops)
637     compared to K8. Alignment becomes important after 8 bytes for memcpy and
638     128 bytes for memset.  */
639  {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640   DUMMY_STRINGOP_ALGS},
641  {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642   DUMMY_STRINGOP_ALGS},
643  1,                                    /* scalar_stmt_cost.  */
644  1,                                    /* scalar load_cost.  */
645  1,                                    /* scalar_store_cost.  */
646  1,                                    /* vec_stmt_cost.  */
647  1,                                    /* vec_to_scalar_cost.  */
648  1,                                    /* scalar_to_vec_cost.  */
649  1,                                    /* vec_align_load_cost.  */
650  2,                                    /* vec_unalign_load_cost.  */
651  1,                                    /* vec_store_cost.  */
652  3,                                    /* cond_taken_branch_cost.  */
653  1,                                    /* cond_not_taken_branch_cost.  */
654};
655
656static const
657struct processor_costs k8_cost = {
658  COSTS_N_INSNS (1),			/* cost of an add instruction */
659  COSTS_N_INSNS (2),			/* cost of a lea instruction */
660  COSTS_N_INSNS (1),			/* variable shift costs */
661  COSTS_N_INSNS (1),			/* constant shift costs */
662  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
663   COSTS_N_INSNS (4),			/*                               HI */
664   COSTS_N_INSNS (3),			/*                               SI */
665   COSTS_N_INSNS (4),			/*                               DI */
666   COSTS_N_INSNS (5)},			/*                               other */
667  0,					/* cost of multiply per each bit set */
668  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
669   COSTS_N_INSNS (26),			/*                          HI */
670   COSTS_N_INSNS (42),			/*                          SI */
671   COSTS_N_INSNS (74),			/*                          DI */
672   COSTS_N_INSNS (74)},			/*                          other */
673  COSTS_N_INSNS (1),			/* cost of movsx */
674  COSTS_N_INSNS (1),			/* cost of movzx */
675  8,					/* "large" insn */
676  9,					/* MOVE_RATIO */
677  4,					/* cost for loading QImode using movzbl */
678  {3, 4, 3},				/* cost of loading integer registers
679					   in QImode, HImode and SImode.
680					   Relative to reg-reg move (2).  */
681  {3, 4, 3},				/* cost of storing integer registers */
682  4,					/* cost of reg,reg fld/fst */
683  {4, 4, 12},				/* cost of loading fp registers
684					   in SFmode, DFmode and XFmode */
685  {6, 6, 8},				/* cost of storing fp registers
686					   in SFmode, DFmode and XFmode */
687  2,					/* cost of moving MMX register */
688  {3, 3},				/* cost of loading MMX registers
689					   in SImode and DImode */
690  {4, 4},				/* cost of storing MMX registers
691					   in SImode and DImode */
692  2,					/* cost of moving SSE register */
693  {4, 3, 6},				/* cost of loading SSE registers
694					   in SImode, DImode and TImode */
695  {4, 4, 5},				/* cost of storing SSE registers
696					   in SImode, DImode and TImode */
697  5,					/* MMX or SSE register to integer */
698  64,					/* size of l1 cache.  */
699  512,					/* size of l2 cache.  */
700  64,					/* size of prefetch block */
701  /* New AMD processors never drop prefetches; if they cannot be performed
702     immediately, they are queued.  We set number of simultaneous prefetches
703     to a large constant to reflect this (it probably is not a good idea not
704     to limit number of prefetches at all, as their execution also takes some
705     time).  */
706  100,					/* number of parallel prefetches */
707  3,					/* Branch cost */
708  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
709  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
710  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
711  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
712  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
713  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
714  /* K8 has optimized REP instruction for medium sized blocks, but for very small
715     blocks it is better to use loop. For large blocks, libcall can do
716     nontemporary accesses and beat inline considerably.  */
717  {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718   {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719  {{libcall, {{8, loop}, {24, unrolled_loop},
720	      {2048, rep_prefix_4_byte}, {-1, libcall}}},
721   {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722  4,                                    /* scalar_stmt_cost.  */
723  2,                                    /* scalar load_cost.  */
724  2,                                    /* scalar_store_cost.  */
725  5,                                    /* vec_stmt_cost.  */
726  0,                                    /* vec_to_scalar_cost.  */
727  2,                                    /* scalar_to_vec_cost.  */
728  2,                                    /* vec_align_load_cost.  */
729  3,                                    /* vec_unalign_load_cost.  */
730  3,                                    /* vec_store_cost.  */
731  3,                                    /* cond_taken_branch_cost.  */
732  2,                                    /* cond_not_taken_branch_cost.  */
733};
734
735struct processor_costs amdfam10_cost = {
736  COSTS_N_INSNS (1),                    /* cost of an add instruction */
737  COSTS_N_INSNS (2),                    /* cost of a lea instruction */
738  COSTS_N_INSNS (1),                    /* variable shift costs */
739  COSTS_N_INSNS (1),                    /* constant shift costs */
740  {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
741   COSTS_N_INSNS (4),                   /*                               HI */
742   COSTS_N_INSNS (3),                   /*                               SI */
743   COSTS_N_INSNS (4),                   /*                               DI */
744   COSTS_N_INSNS (5)},                  /*                               other */
745  0,                                    /* cost of multiply per each bit set */
746  {COSTS_N_INSNS (19),                  /* cost of a divide/mod for QI */
747   COSTS_N_INSNS (35),                  /*                          HI */
748   COSTS_N_INSNS (51),                  /*                          SI */
749   COSTS_N_INSNS (83),                  /*                          DI */
750   COSTS_N_INSNS (83)},                 /*                          other */
751  COSTS_N_INSNS (1),			/* cost of movsx */
752  COSTS_N_INSNS (1),			/* cost of movzx */
753  8,					/* "large" insn */
754  9,					/* MOVE_RATIO */
755  4,					/* cost for loading QImode using movzbl */
756  {3, 4, 3},				/* cost of loading integer registers
757					   in QImode, HImode and SImode.
758					   Relative to reg-reg move (2).  */
759  {3, 4, 3},				/* cost of storing integer registers */
760  4,					/* cost of reg,reg fld/fst */
761  {4, 4, 12},				/* cost of loading fp registers
762		   			   in SFmode, DFmode and XFmode */
763  {6, 6, 8},				/* cost of storing fp registers
764 		   			   in SFmode, DFmode and XFmode */
765  2,					/* cost of moving MMX register */
766  {3, 3},				/* cost of loading MMX registers
767					   in SImode and DImode */
768  {4, 4},				/* cost of storing MMX registers
769					   in SImode and DImode */
770  2,					/* cost of moving SSE register */
771  {4, 4, 3},				/* cost of loading SSE registers
772					   in SImode, DImode and TImode */
773  {4, 4, 5},				/* cost of storing SSE registers
774					   in SImode, DImode and TImode */
775  3,					/* MMX or SSE register to integer */
776  					/* On K8
777  					    MOVD reg64, xmmreg 	Double	FSTORE 4
778					    MOVD reg32, xmmreg 	Double	FSTORE 4
779					   On AMDFAM10
780					    MOVD reg64, xmmreg 	Double	FADD 3
781                                                                1/1  1/1
782					    MOVD reg32, xmmreg 	Double	FADD 3
783                                                                1/1  1/1 */
784  64,					/* size of l1 cache.  */
785  512,					/* size of l2 cache.  */
786  64,					/* size of prefetch block */
787  /* New AMD processors never drop prefetches; if they cannot be performed
788     immediately, they are queued.  We set number of simultaneous prefetches
789     to a large constant to reflect this (it probably is not a good idea not
790     to limit number of prefetches at all, as their execution also takes some
791     time).  */
792  100,					/* number of parallel prefetches */
793  2,					/* Branch cost */
794  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
795  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
796  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
797  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
798  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
799  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
800
801  /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802     very small blocks it is better to use loop. For large blocks, libcall can
803     do nontemporary accesses and beat inline considerably.  */
804  {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805   {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806  {{libcall, {{8, loop}, {24, unrolled_loop},
807	      {2048, rep_prefix_4_byte}, {-1, libcall}}},
808   {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809  4,                                    /* scalar_stmt_cost.  */
810  2,                                    /* scalar load_cost.  */
811  2,                                    /* scalar_store_cost.  */
812  6,                                    /* vec_stmt_cost.  */
813  0,                                    /* vec_to_scalar_cost.  */
814  2,                                    /* scalar_to_vec_cost.  */
815  2,                                    /* vec_align_load_cost.  */
816  2,                                    /* vec_unalign_load_cost.  */
817  2,                                    /* vec_store_cost.  */
818  2,                                    /* cond_taken_branch_cost.  */
819  1,                                    /* cond_not_taken_branch_cost.  */
820};
821
822static const
823struct processor_costs pentium4_cost = {
824  COSTS_N_INSNS (1),			/* cost of an add instruction */
825  COSTS_N_INSNS (3),			/* cost of a lea instruction */
826  COSTS_N_INSNS (4),			/* variable shift costs */
827  COSTS_N_INSNS (4),			/* constant shift costs */
828  {COSTS_N_INSNS (15),			/* cost of starting multiply for QI */
829   COSTS_N_INSNS (15),			/*                               HI */
830   COSTS_N_INSNS (15),			/*                               SI */
831   COSTS_N_INSNS (15),			/*                               DI */
832   COSTS_N_INSNS (15)},			/*                               other */
833  0,					/* cost of multiply per each bit set */
834  {COSTS_N_INSNS (56),			/* cost of a divide/mod for QI */
835   COSTS_N_INSNS (56),			/*                          HI */
836   COSTS_N_INSNS (56),			/*                          SI */
837   COSTS_N_INSNS (56),			/*                          DI */
838   COSTS_N_INSNS (56)},			/*                          other */
839  COSTS_N_INSNS (1),			/* cost of movsx */
840  COSTS_N_INSNS (1),			/* cost of movzx */
841  16,					/* "large" insn */
842  6,					/* MOVE_RATIO */
843  2,					/* cost for loading QImode using movzbl */
844  {4, 5, 4},				/* cost of loading integer registers
845					   in QImode, HImode and SImode.
846					   Relative to reg-reg move (2).  */
847  {2, 3, 2},				/* cost of storing integer registers */
848  2,					/* cost of reg,reg fld/fst */
849  {2, 2, 6},				/* cost of loading fp registers
850					   in SFmode, DFmode and XFmode */
851  {4, 4, 6},				/* cost of storing fp registers
852					   in SFmode, DFmode and XFmode */
853  2,					/* cost of moving MMX register */
854  {2, 2},				/* cost of loading MMX registers
855					   in SImode and DImode */
856  {2, 2},				/* cost of storing MMX registers
857					   in SImode and DImode */
858  12,					/* cost of moving SSE register */
859  {12, 12, 12},				/* cost of loading SSE registers
860					   in SImode, DImode and TImode */
861  {2, 2, 8},				/* cost of storing SSE registers
862					   in SImode, DImode and TImode */
863  10,					/* MMX or SSE register to integer */
864  8,					/* size of l1 cache.  */
865  256,					/* size of l2 cache.  */
866  64,					/* size of prefetch block */
867  6,					/* number of parallel prefetches */
868  2,					/* Branch cost */
869  COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
870  COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
871  COSTS_N_INSNS (43),			/* cost of FDIV instruction.  */
872  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
873  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
874  COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
875  {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876   DUMMY_STRINGOP_ALGS},
877  {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878   {-1, libcall}}},
879   DUMMY_STRINGOP_ALGS},
880  1,                                    /* scalar_stmt_cost.  */
881  1,                                    /* scalar load_cost.  */
882  1,                                    /* scalar_store_cost.  */
883  1,                                    /* vec_stmt_cost.  */
884  1,                                    /* vec_to_scalar_cost.  */
885  1,                                    /* scalar_to_vec_cost.  */
886  1,                                    /* vec_align_load_cost.  */
887  2,                                    /* vec_unalign_load_cost.  */
888  1,                                    /* vec_store_cost.  */
889  3,                                    /* cond_taken_branch_cost.  */
890  1,                                    /* cond_not_taken_branch_cost.  */
891};
892
893static const
894struct processor_costs nocona_cost = {
895  COSTS_N_INSNS (1),			/* cost of an add instruction */
896  COSTS_N_INSNS (1),			/* cost of a lea instruction */
897  COSTS_N_INSNS (1),			/* variable shift costs */
898  COSTS_N_INSNS (1),			/* constant shift costs */
899  {COSTS_N_INSNS (10),			/* cost of starting multiply for QI */
900   COSTS_N_INSNS (10),			/*                               HI */
901   COSTS_N_INSNS (10),			/*                               SI */
902   COSTS_N_INSNS (10),			/*                               DI */
903   COSTS_N_INSNS (10)},			/*                               other */
904  0,					/* cost of multiply per each bit set */
905  {COSTS_N_INSNS (66),			/* cost of a divide/mod for QI */
906   COSTS_N_INSNS (66),			/*                          HI */
907   COSTS_N_INSNS (66),			/*                          SI */
908   COSTS_N_INSNS (66),			/*                          DI */
909   COSTS_N_INSNS (66)},			/*                          other */
910  COSTS_N_INSNS (1),			/* cost of movsx */
911  COSTS_N_INSNS (1),			/* cost of movzx */
912  16,					/* "large" insn */
913  17,					/* MOVE_RATIO */
914  4,					/* cost for loading QImode using movzbl */
915  {4, 4, 4},				/* cost of loading integer registers
916					   in QImode, HImode and SImode.
917					   Relative to reg-reg move (2).  */
918  {4, 4, 4},				/* cost of storing integer registers */
919  3,					/* cost of reg,reg fld/fst */
920  {12, 12, 12},				/* cost of loading fp registers
921					   in SFmode, DFmode and XFmode */
922  {4, 4, 4},				/* cost of storing fp registers
923					   in SFmode, DFmode and XFmode */
924  6,					/* cost of moving MMX register */
925  {12, 12},				/* cost of loading MMX registers
926					   in SImode and DImode */
927  {12, 12},				/* cost of storing MMX registers
928					   in SImode and DImode */
929  6,					/* cost of moving SSE register */
930  {12, 12, 12},				/* cost of loading SSE registers
931					   in SImode, DImode and TImode */
932  {12, 12, 12},				/* cost of storing SSE registers
933					   in SImode, DImode and TImode */
934  8,					/* MMX or SSE register to integer */
935  8,					/* size of l1 cache.  */
936  1024,					/* size of l2 cache.  */
937  128,					/* size of prefetch block */
938  8,					/* number of parallel prefetches */
939  1,					/* Branch cost */
940  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
941  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
942  COSTS_N_INSNS (40),			/* cost of FDIV instruction.  */
943  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
944  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
945  COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
946  {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947   {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948	      {100000, unrolled_loop}, {-1, libcall}}}},
949  {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950   {-1, libcall}}},
951   {libcall, {{24, loop}, {64, unrolled_loop},
952	      {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953  1,                                    /* scalar_stmt_cost.  */
954  1,                                    /* scalar load_cost.  */
955  1,                                    /* scalar_store_cost.  */
956  1,                                    /* vec_stmt_cost.  */
957  1,                                    /* vec_to_scalar_cost.  */
958  1,                                    /* scalar_to_vec_cost.  */
959  1,                                    /* vec_align_load_cost.  */
960  2,                                    /* vec_unalign_load_cost.  */
961  1,                                    /* vec_store_cost.  */
962  3,                                    /* cond_taken_branch_cost.  */
963  1,                                    /* cond_not_taken_branch_cost.  */
964};
965
966static const
967struct processor_costs core2_cost = {
968  COSTS_N_INSNS (1),			/* cost of an add instruction */
969  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
970  COSTS_N_INSNS (1),			/* variable shift costs */
971  COSTS_N_INSNS (1),			/* constant shift costs */
972  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
973   COSTS_N_INSNS (3),			/*                               HI */
974   COSTS_N_INSNS (3),			/*                               SI */
975   COSTS_N_INSNS (3),			/*                               DI */
976   COSTS_N_INSNS (3)},			/*                               other */
977  0,					/* cost of multiply per each bit set */
978  {COSTS_N_INSNS (22),			/* cost of a divide/mod for QI */
979   COSTS_N_INSNS (22),			/*                          HI */
980   COSTS_N_INSNS (22),			/*                          SI */
981   COSTS_N_INSNS (22),			/*                          DI */
982   COSTS_N_INSNS (22)},			/*                          other */
983  COSTS_N_INSNS (1),			/* cost of movsx */
984  COSTS_N_INSNS (1),			/* cost of movzx */
985  8,					/* "large" insn */
986  16,					/* MOVE_RATIO */
987  2,					/* cost for loading QImode using movzbl */
988  {6, 6, 6},				/* cost of loading integer registers
989					   in QImode, HImode and SImode.
990					   Relative to reg-reg move (2).  */
991  {4, 4, 4},				/* cost of storing integer registers */
992  2,					/* cost of reg,reg fld/fst */
993  {6, 6, 6},				/* cost of loading fp registers
994					   in SFmode, DFmode and XFmode */
995  {4, 4, 4},				/* cost of storing fp registers
996					   in SFmode, DFmode and XFmode */
997  2,					/* cost of moving MMX register */
998  {6, 6},				/* cost of loading MMX registers
999					   in SImode and DImode */
1000  {4, 4},				/* cost of storing MMX registers
1001					   in SImode and DImode */
1002  2,					/* cost of moving SSE register */
1003  {6, 6, 6},				/* cost of loading SSE registers
1004					   in SImode, DImode and TImode */
1005  {4, 4, 4},				/* cost of storing SSE registers
1006					   in SImode, DImode and TImode */
1007  2,					/* MMX or SSE register to integer */
1008  32,					/* size of l1 cache.  */
1009  2048,					/* size of l2 cache.  */
1010  128,					/* size of prefetch block */
1011  8,					/* number of parallel prefetches */
1012  3,					/* Branch cost */
1013  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
1014  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
1015  COSTS_N_INSNS (32),			/* cost of FDIV instruction.  */
1016  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
1017  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
1018  COSTS_N_INSNS (58),			/* cost of FSQRT instruction.  */
1019  {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020   {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021	      {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022  {{libcall, {{8, loop}, {15, unrolled_loop},
1023	      {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024   {libcall, {{24, loop}, {32, unrolled_loop},
1025	      {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026  1,                                    /* scalar_stmt_cost.  */
1027  1,                                    /* scalar load_cost.  */
1028  1,                                    /* scalar_store_cost.  */
1029  1,                                    /* vec_stmt_cost.  */
1030  1,                                    /* vec_to_scalar_cost.  */
1031  1,                                    /* scalar_to_vec_cost.  */
1032  1,                                    /* vec_align_load_cost.  */
1033  2,                                    /* vec_unalign_load_cost.  */
1034  1,                                    /* vec_store_cost.  */
1035  3,                                    /* cond_taken_branch_cost.  */
1036  1,                                    /* cond_not_taken_branch_cost.  */
1037};
1038
1039static const
1040struct processor_costs atom_cost = {
1041  COSTS_N_INSNS (1),			/* cost of an add instruction */
1042  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
1043  COSTS_N_INSNS (1),			/* variable shift costs */
1044  COSTS_N_INSNS (1),			/* constant shift costs */
1045  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1046   COSTS_N_INSNS (4),			/*                               HI */
1047   COSTS_N_INSNS (3),			/*                               SI */
1048   COSTS_N_INSNS (4),			/*                               DI */
1049   COSTS_N_INSNS (2)},			/*                               other */
1050  0,					/* cost of multiply per each bit set */
1051  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
1052   COSTS_N_INSNS (26),			/*                          HI */
1053   COSTS_N_INSNS (42),			/*                          SI */
1054   COSTS_N_INSNS (74),			/*                          DI */
1055   COSTS_N_INSNS (74)},			/*                          other */
1056  COSTS_N_INSNS (1),			/* cost of movsx */
1057  COSTS_N_INSNS (1),			/* cost of movzx */
1058  8,					/* "large" insn */
1059  17,					/* MOVE_RATIO */
1060  2,					/* cost for loading QImode using movzbl */
1061  {4, 4, 4},				/* cost of loading integer registers
1062					   in QImode, HImode and SImode.
1063					   Relative to reg-reg move (2).  */
1064  {4, 4, 4},				/* cost of storing integer registers */
1065  4,					/* cost of reg,reg fld/fst */
1066  {12, 12, 12},				/* cost of loading fp registers
1067					   in SFmode, DFmode and XFmode */
1068  {6, 6, 8},				/* cost of storing fp registers
1069					   in SFmode, DFmode and XFmode */
1070  2,					/* cost of moving MMX register */
1071  {8, 8},				/* cost of loading MMX registers
1072					   in SImode and DImode */
1073  {8, 8},				/* cost of storing MMX registers
1074					   in SImode and DImode */
1075  2,					/* cost of moving SSE register */
1076  {8, 8, 8},				/* cost of loading SSE registers
1077					   in SImode, DImode and TImode */
1078  {8, 8, 8},				/* cost of storing SSE registers
1079					   in SImode, DImode and TImode */
1080  5,					/* MMX or SSE register to integer */
1081  32,					/* size of l1 cache.  */
1082  256,					/* size of l2 cache.  */
1083  64,					/* size of prefetch block */
1084  6,					/* number of parallel prefetches */
1085  3,					/* Branch cost */
1086  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
1087  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
1088  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
1089  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
1090  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
1091  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
1092  {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1093   {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1094          {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1095  {{libcall, {{8, loop}, {15, unrolled_loop},
1096          {2048, rep_prefix_4_byte}, {-1, libcall}}},
1097   {libcall, {{24, loop}, {32, unrolled_loop},
1098          {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099  1,                                    /* scalar_stmt_cost.  */
1100  1,                                    /* scalar load_cost.  */
1101  1,                                    /* scalar_store_cost.  */
1102  1,                                    /* vec_stmt_cost.  */
1103  1,                                    /* vec_to_scalar_cost.  */
1104  1,                                    /* scalar_to_vec_cost.  */
1105  1,                                    /* vec_align_load_cost.  */
1106  2,                                    /* vec_unalign_load_cost.  */
1107  1,                                    /* vec_store_cost.  */
1108  3,                                    /* cond_taken_branch_cost.  */
1109  1,                                    /* cond_not_taken_branch_cost.  */
1110};
1111
1112/* Generic64 should produce code tuned for Nocona and K8.  */
1113static const
1114struct processor_costs generic64_cost = {
1115  COSTS_N_INSNS (1),			/* cost of an add instruction */
1116  /* On all chips taken into consideration lea is 2 cycles and more.  With
1117     this cost however our current implementation of synth_mult results in
1118     use of unnecessary temporary registers causing regression on several
1119     SPECfp benchmarks.  */
1120  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
1121  COSTS_N_INSNS (1),			/* variable shift costs */
1122  COSTS_N_INSNS (1),			/* constant shift costs */
1123  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1124   COSTS_N_INSNS (4),			/*                               HI */
1125   COSTS_N_INSNS (3),			/*                               SI */
1126   COSTS_N_INSNS (4),			/*                               DI */
1127   COSTS_N_INSNS (2)},			/*                               other */
1128  0,					/* cost of multiply per each bit set */
1129  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
1130   COSTS_N_INSNS (26),			/*                          HI */
1131   COSTS_N_INSNS (42),			/*                          SI */
1132   COSTS_N_INSNS (74),			/*                          DI */
1133   COSTS_N_INSNS (74)},			/*                          other */
1134  COSTS_N_INSNS (1),			/* cost of movsx */
1135  COSTS_N_INSNS (1),			/* cost of movzx */
1136  8,					/* "large" insn */
1137  17,					/* MOVE_RATIO */
1138  4,					/* cost for loading QImode using movzbl */
1139  {4, 4, 4},				/* cost of loading integer registers
1140					   in QImode, HImode and SImode.
1141					   Relative to reg-reg move (2).  */
1142  {4, 4, 4},				/* cost of storing integer registers */
1143  4,					/* cost of reg,reg fld/fst */
1144  {12, 12, 12},				/* cost of loading fp registers
1145					   in SFmode, DFmode and XFmode */
1146  {6, 6, 8},				/* cost of storing fp registers
1147					   in SFmode, DFmode and XFmode */
1148  2,					/* cost of moving MMX register */
1149  {8, 8},				/* cost of loading MMX registers
1150					   in SImode and DImode */
1151  {8, 8},				/* cost of storing MMX registers
1152					   in SImode and DImode */
1153  2,					/* cost of moving SSE register */
1154  {8, 8, 8},				/* cost of loading SSE registers
1155					   in SImode, DImode and TImode */
1156  {8, 8, 8},				/* cost of storing SSE registers
1157					   in SImode, DImode and TImode */
1158  5,					/* MMX or SSE register to integer */
1159  32,					/* size of l1 cache.  */
1160  512,					/* size of l2 cache.  */
1161  64,					/* size of prefetch block */
1162  6,					/* number of parallel prefetches */
1163  /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1164     is increased to perhaps more appropriate value of 5.  */
1165  3,					/* Branch cost */
1166  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
1167  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
1168  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
1169  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
1170  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
1171  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
1172  {DUMMY_STRINGOP_ALGS,
1173   {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1174  {DUMMY_STRINGOP_ALGS,
1175   {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1176  1,                                    /* scalar_stmt_cost.  */
1177  1,                                    /* scalar load_cost.  */
1178  1,                                    /* scalar_store_cost.  */
1179  1,                                    /* vec_stmt_cost.  */
1180  1,                                    /* vec_to_scalar_cost.  */
1181  1,                                    /* scalar_to_vec_cost.  */
1182  1,                                    /* vec_align_load_cost.  */
1183  2,                                    /* vec_unalign_load_cost.  */
1184  1,                                    /* vec_store_cost.  */
1185  3,                                    /* cond_taken_branch_cost.  */
1186  1,                                    /* cond_not_taken_branch_cost.  */
1187};
1188
1189/* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8.  */
1190static const
1191struct processor_costs generic32_cost = {
1192  COSTS_N_INSNS (1),			/* cost of an add instruction */
1193  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
1194  COSTS_N_INSNS (1),			/* variable shift costs */
1195  COSTS_N_INSNS (1),			/* constant shift costs */
1196  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1197   COSTS_N_INSNS (4),			/*                               HI */
1198   COSTS_N_INSNS (3),			/*                               SI */
1199   COSTS_N_INSNS (4),			/*                               DI */
1200   COSTS_N_INSNS (2)},			/*                               other */
1201  0,					/* cost of multiply per each bit set */
1202  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
1203   COSTS_N_INSNS (26),			/*                          HI */
1204   COSTS_N_INSNS (42),			/*                          SI */
1205   COSTS_N_INSNS (74),			/*                          DI */
1206   COSTS_N_INSNS (74)},			/*                          other */
1207  COSTS_N_INSNS (1),			/* cost of movsx */
1208  COSTS_N_INSNS (1),			/* cost of movzx */
1209  8,					/* "large" insn */
1210  17,					/* MOVE_RATIO */
1211  4,					/* cost for loading QImode using movzbl */
1212  {4, 4, 4},				/* cost of loading integer registers
1213					   in QImode, HImode and SImode.
1214					   Relative to reg-reg move (2).  */
1215  {4, 4, 4},				/* cost of storing integer registers */
1216  4,					/* cost of reg,reg fld/fst */
1217  {12, 12, 12},				/* cost of loading fp registers
1218					   in SFmode, DFmode and XFmode */
1219  {6, 6, 8},				/* cost of storing fp registers
1220					   in SFmode, DFmode and XFmode */
1221  2,					/* cost of moving MMX register */
1222  {8, 8},				/* cost of loading MMX registers
1223					   in SImode and DImode */
1224  {8, 8},				/* cost of storing MMX registers
1225					   in SImode and DImode */
1226  2,					/* cost of moving SSE register */
1227  {8, 8, 8},				/* cost of loading SSE registers
1228					   in SImode, DImode and TImode */
1229  {8, 8, 8},				/* cost of storing SSE registers
1230					   in SImode, DImode and TImode */
1231  5,					/* MMX or SSE register to integer */
1232  32,					/* size of l1 cache.  */
1233  256,					/* size of l2 cache.  */
1234  64,					/* size of prefetch block */
1235  6,					/* number of parallel prefetches */
1236  3,					/* Branch cost */
1237  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
1238  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
1239  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
1240  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
1241  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
1242  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
1243  {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1244   DUMMY_STRINGOP_ALGS},
1245  {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1246   DUMMY_STRINGOP_ALGS},
1247  1,                                    /* scalar_stmt_cost.  */
1248  1,                                    /* scalar load_cost.  */
1249  1,                                    /* scalar_store_cost.  */
1250  1,                                    /* vec_stmt_cost.  */
1251  1,                                    /* vec_to_scalar_cost.  */
1252  1,                                    /* scalar_to_vec_cost.  */
1253  1,                                    /* vec_align_load_cost.  */
1254  2,                                    /* vec_unalign_load_cost.  */
1255  1,                                    /* vec_store_cost.  */
1256  3,                                    /* cond_taken_branch_cost.  */
1257  1,                                    /* cond_not_taken_branch_cost.  */
1258};
1259
1260const struct processor_costs *ix86_cost = &pentium_cost;
1261
1262/* Processor feature/optimization bitmasks.  */
1263#define m_386 (1<<PROCESSOR_I386)
1264#define m_486 (1<<PROCESSOR_I486)
1265#define m_PENT (1<<PROCESSOR_PENTIUM)
1266#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1267#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
1268#define m_NOCONA  (1<<PROCESSOR_NOCONA)
1269#define m_CORE2  (1<<PROCESSOR_CORE2)
1270#define m_ATOM  (1<<PROCESSOR_ATOM)
1271
1272#define m_GEODE  (1<<PROCESSOR_GEODE)
1273#define m_K6  (1<<PROCESSOR_K6)
1274#define m_K6_GEODE  (m_K6 | m_GEODE)
1275#define m_K8  (1<<PROCESSOR_K8)
1276#define m_ATHLON  (1<<PROCESSOR_ATHLON)
1277#define m_ATHLON_K8  (m_K8 | m_ATHLON)
1278#define m_AMDFAM10  (1<<PROCESSOR_AMDFAM10)
1279#define m_AMD_MULTIPLE  (m_K8 | m_ATHLON | m_AMDFAM10)
1280
1281#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1282#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283
1284/* Generic instruction choice should be common subset of supported CPUs
1285   (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
1286#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287
1288/* Feature tests against the various tunings.  */
1289unsigned char ix86_tune_features[X86_TUNE_LAST];
1290
1291/* Feature tests against the various tunings used to create ix86_tune_features
1292   based on the processor mask.  */
1293static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1294  /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1295     negatively, so enabling for Generic64 seems like good code size
1296     tradeoff.  We can't enable it for 32bit generic because it does not
1297     work well with PPro base chips.  */
1298  m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1299
1300  /* X86_TUNE_PUSH_MEMORY */
1301  m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1302  | m_NOCONA | m_CORE2 | m_GENERIC,
1303
1304  /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1305  m_486 | m_PENT,
1306
1307  /* X86_TUNE_UNROLL_STRLEN */
1308  m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1309  | m_CORE2 | m_GENERIC,
1310
1311  /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1312  m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1313
1314  /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1315     on simulation result. But after P4 was made, no performance benefit
1316     was observed with branch hints.  It also increases the code size.
1317     As a result, icc never generates branch hints.  */
1318  0,
1319
1320  /* X86_TUNE_DOUBLE_WITH_ADD */
1321  ~m_386,
1322
1323  /* X86_TUNE_USE_SAHF */
1324  m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1325  | m_NOCONA | m_CORE2 | m_GENERIC,
1326
1327  /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1328     partial dependencies.  */
1329  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1330  | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1331
1332  /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1333     register stalls on Generic32 compilation setting as well.  However
1334     in current implementation the partial register stalls are not eliminated
1335     very well - they can be introduced via subregs synthesized by combine
1336     and can happen in caller/callee saving sequences.  Because this option
1337     pays back little on PPro based chips and is in conflict with partial reg
1338     dependencies used by Athlon/P4 based chips, it is better to leave it off
1339     for generic32 for now.  */
1340  m_PPRO,
1341
1342  /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1343  m_CORE2 | m_GENERIC,
1344
1345  /* X86_TUNE_USE_HIMODE_FIOP */
1346  m_386 | m_486 | m_K6_GEODE,
1347
1348  /* X86_TUNE_USE_SIMODE_FIOP */
1349  ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1350
1351  /* X86_TUNE_USE_MOV0 */
1352  m_K6,
1353
1354  /* X86_TUNE_USE_CLTD */
1355  ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1356
1357  /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
1358  m_PENT4,
1359
1360  /* X86_TUNE_SPLIT_LONG_MOVES */
1361  m_PPRO,
1362
1363  /* X86_TUNE_READ_MODIFY_WRITE */
1364  ~m_PENT,
1365
1366  /* X86_TUNE_READ_MODIFY */
1367  ~(m_PENT | m_PPRO),
1368
1369  /* X86_TUNE_PROMOTE_QIMODE */
1370  m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1371  | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1372
1373  /* X86_TUNE_FAST_PREFIX */
1374  ~(m_PENT | m_486 | m_386),
1375
1376  /* X86_TUNE_SINGLE_STRINGOP */
1377  m_386 | m_PENT4 | m_NOCONA,
1378
1379  /* X86_TUNE_QIMODE_MATH */
1380  ~0,
1381
1382  /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1383     register stalls.  Just like X86_TUNE_PARTIAL_REG_STALL this option
1384     might be considered for Generic32 if our scheme for avoiding partial
1385     stalls was more effective.  */
1386  ~m_PPRO,
1387
1388  /* X86_TUNE_PROMOTE_QI_REGS */
1389  0,
1390
1391  /* X86_TUNE_PROMOTE_HI_REGS */
1392  m_PPRO,
1393
1394  /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop.  */
1395  m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1396  | m_CORE2 | m_GENERIC,
1397
1398  /* X86_TUNE_ADD_ESP_8 */
1399  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1400  | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1401
1402  /* X86_TUNE_SUB_ESP_4 */
1403  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1404  | m_GENERIC,
1405
1406  /* X86_TUNE_SUB_ESP_8 */
1407  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1408  | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1409
1410  /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1411     for DFmode copies */
1412  ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1413    | m_GENERIC | m_GEODE),
1414
1415  /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1416  m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1417
1418  /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1419     conflict here in between PPro/Pentium4 based chips that thread 128bit
1420     SSE registers as single units versus K8 based chips that divide SSE
1421     registers to two 64bit halves.  This knob promotes all store destinations
1422     to be 128bit to allow register renaming on 128bit SSE units, but usually
1423     results in one extra microop on 64bit SSE units.  Experimental results
1424     shows that disabling this option on P4 brings over 20% SPECfp regression,
1425     while enabling it on K8 brings roughly 2.4% regression that can be partly
1426     masked by careful scheduling of moves.  */
1427  m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1428  | m_AMDFAM10,
1429
1430  /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1431  m_AMDFAM10,
1432
1433  /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1434     are resolved on SSE register parts instead of whole registers, so we may
1435     maintain just lower part of scalar values in proper format leaving the
1436     upper part undefined.  */
1437  m_ATHLON_K8,
1438
1439  /* X86_TUNE_SSE_TYPELESS_STORES */
1440  m_AMD_MULTIPLE,
1441
1442  /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1443  m_PPRO | m_PENT4 | m_NOCONA,
1444
1445  /* X86_TUNE_MEMORY_MISMATCH_STALL */
1446  m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1447
1448  /* X86_TUNE_PROLOGUE_USING_MOVE */
1449  m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1450
1451  /* X86_TUNE_EPILOGUE_USING_MOVE */
1452  m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1453
1454  /* X86_TUNE_SHIFT1 */
1455  ~m_486,
1456
1457  /* X86_TUNE_USE_FFREEP */
1458  m_AMD_MULTIPLE,
1459
1460  /* X86_TUNE_INTER_UNIT_MOVES */
1461  ~(m_AMD_MULTIPLE | m_GENERIC),
1462
1463  /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1464  ~(m_AMDFAM10),
1465
1466  /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1467     than 4 branch instructions in the 16 byte window.  */
1468  m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1469  | m_GENERIC,
1470
1471  /* X86_TUNE_SCHEDULE */
1472  m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1473  | m_GENERIC,
1474
1475  /* X86_TUNE_USE_BT */
1476  m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1477
1478  /* X86_TUNE_USE_INCDEC */
1479  ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1480
1481  /* X86_TUNE_PAD_RETURNS */
1482  m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1483
1484  /* X86_TUNE_EXT_80387_CONSTANTS */
1485  m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1486  | m_CORE2 | m_GENERIC,
1487
1488  /* X86_TUNE_SHORTEN_X87_SSE */
1489  ~m_K8,
1490
1491  /* X86_TUNE_AVOID_VECTOR_DECODE */
1492  m_K8 | m_GENERIC64,
1493
1494  /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1495     and SImode multiply, but 386 and 486 do HImode multiply faster.  */
1496  ~(m_386 | m_486),
1497
1498  /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1499     vector path on AMD machines.  */
1500  m_K8 | m_GENERIC64 | m_AMDFAM10,
1501
1502  /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503     machines.  */
1504  m_K8 | m_GENERIC64 | m_AMDFAM10,
1505
1506  /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1507     than a MOV.  */
1508  m_PENT,
1509
1510  /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1511     but one byte longer.  */
1512  m_PENT,
1513
1514  /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1515     operand that cannot be represented using a modRM byte.  The XOR
1516     replacement is long decoded, so this split helps here as well.  */
1517  m_K6,
1518
1519  /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520     from FP to FP. */
1521  m_AMDFAM10 | m_GENERIC,
1522
1523  /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1524     from integer to FP. */
1525  m_AMDFAM10,
1526
1527  /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1528     with a subsequent conditional jump instruction into a single
1529     compare-and-branch uop.  */
1530  m_CORE2,
1531
1532  /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1533     will impact LEA instruction selection. */
1534  m_ATOM,
1535};
1536
1537/* Feature tests against the various architecture variations.  */
1538unsigned char ix86_arch_features[X86_ARCH_LAST];
1539
1540/* Feature tests against the various architecture variations, used to create
1541   ix86_arch_features based on the processor mask.  */
1542static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1543  /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro.  */
1544  ~(m_386 | m_486 | m_PENT | m_K6),
1545
1546  /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486.  */
1547  ~m_386,
1548
1549  /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1550  ~(m_386 | m_486),
1551
1552  /* X86_ARCH_XADD: Exchange and add was added for 80486.  */
1553  ~m_386,
1554
1555  /* X86_ARCH_BSWAP: Byteswap was added for 80486.  */
1556  ~m_386,
1557};
1558
1559static const unsigned int x86_accumulate_outgoing_args
1560  = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1561    | m_GENERIC;
1562
1563static const unsigned int x86_arch_always_fancy_math_387
1564  = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1565    | m_NOCONA | m_CORE2 | m_GENERIC;
1566
1567static enum stringop_alg stringop_alg = no_stringop;
1568
1569/* In case the average insn count for single function invocation is
1570   lower than this constant, emit fast (but longer) prologue and
1571   epilogue code.  */
1572#define FAST_PROLOGUE_INSN_COUNT 20
1573
1574/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
1575static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1576static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1577static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1578
1579/* Array of the smallest class containing reg number REGNO, indexed by
1580   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
1581
1582enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1583{
1584  /* ax, dx, cx, bx */
1585  AREG, DREG, CREG, BREG,
1586  /* si, di, bp, sp */
1587  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1588  /* FP registers */
1589  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1590  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1591  /* arg pointer */
1592  NON_Q_REGS,
1593  /* flags, fpsr, fpcr, frame */
1594  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1595  /* SSE registers */
1596  SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1597  SSE_REGS, SSE_REGS,
1598  /* MMX registers */
1599  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1600  MMX_REGS, MMX_REGS,
1601  /* REX registers */
1602  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1604  /* SSE REX registers */
1605  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1606  SSE_REGS, SSE_REGS,
1607};
1608
1609/* The "default" register map used in 32bit mode.  */
1610
1611int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1612{
1613  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
1614  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
1615  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
1616  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
1617  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
1618  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
1619  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
1620};
1621
1622/* The "default" register map used in 64bit mode.  */
1623
1624int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1625{
1626  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
1627  33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
1628  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
1629  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
1630  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
1631  8,9,10,11,12,13,14,15,		/* extended integer registers */
1632  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
1633};
1634
1635/* Define the register numbers to be used in Dwarf debugging information.
1636   The SVR4 reference port C compiler uses the following register numbers
1637   in its Dwarf output code:
1638	0 for %eax (gcc regno = 0)
1639	1 for %ecx (gcc regno = 2)
1640	2 for %edx (gcc regno = 1)
1641	3 for %ebx (gcc regno = 3)
1642	4 for %esp (gcc regno = 7)
1643	5 for %ebp (gcc regno = 6)
1644	6 for %esi (gcc regno = 4)
1645	7 for %edi (gcc regno = 5)
1646   The following three DWARF register numbers are never generated by
1647   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1648   believes these numbers have these meanings.
1649	8  for %eip    (no gcc equivalent)
1650	9  for %eflags (gcc regno = 17)
1651	10 for %trapno (no gcc equivalent)
1652   It is not at all clear how we should number the FP stack registers
1653   for the x86 architecture.  If the version of SDB on x86/svr4 were
1654   a bit less brain dead with respect to floating-point then we would
1655   have a precedent to follow with respect to DWARF register numbers
1656   for x86 FP registers, but the SDB on x86/svr4 is so completely
1657   broken with respect to FP registers that it is hardly worth thinking
1658   of it as something to strive for compatibility with.
1659   The version of x86/svr4 SDB I have at the moment does (partially)
1660   seem to believe that DWARF register number 11 is associated with
1661   the x86 register %st(0), but that's about all.  Higher DWARF
1662   register numbers don't seem to be associated with anything in
1663   particular, and even for DWARF regno 11, SDB only seems to under-
1664   stand that it should say that a variable lives in %st(0) (when
1665   asked via an `=' command) if we said it was in DWARF regno 11,
1666   but SDB still prints garbage when asked for the value of the
1667   variable in question (via a `/' command).
1668   (Also note that the labels SDB prints for various FP stack regs
1669   when doing an `x' command are all wrong.)
1670   Note that these problems generally don't affect the native SVR4
1671   C compiler because it doesn't allow the use of -O with -g and
1672   because when it is *not* optimizing, it allocates a memory
1673   location for each floating-point variable, and the memory
1674   location is what gets described in the DWARF AT_location
1675   attribute for the variable in question.
1676   Regardless of the severe mental illness of the x86/svr4 SDB, we
1677   do something sensible here and we use the following DWARF
1678   register numbers.  Note that these are all stack-top-relative
1679   numbers.
1680	11 for %st(0) (gcc regno = 8)
1681	12 for %st(1) (gcc regno = 9)
1682	13 for %st(2) (gcc regno = 10)
1683	14 for %st(3) (gcc regno = 11)
1684	15 for %st(4) (gcc regno = 12)
1685	16 for %st(5) (gcc regno = 13)
1686	17 for %st(6) (gcc regno = 14)
1687	18 for %st(7) (gcc regno = 15)
1688*/
1689int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1690{
1691  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
1692  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
1693  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
1694  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
1695  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
1696  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
1697  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
1698};
1699
1700/* Test and compare insns in i386.md store the information needed to
1701   generate branch and scc insns here.  */
1702
1703rtx ix86_compare_op0 = NULL_RTX;
1704rtx ix86_compare_op1 = NULL_RTX;
1705
1706/* Define parameter passing and return registers.  */
1707
1708static int const x86_64_int_parameter_registers[6] =
1709{
1710  DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1711};
1712
1713static int const x86_64_ms_abi_int_parameter_registers[4] =
1714{
1715  CX_REG, DX_REG, R8_REG, R9_REG
1716};
1717
1718static int const x86_64_int_return_registers[4] =
1719{
1720  AX_REG, DX_REG, DI_REG, SI_REG
1721};
1722
1723/* Define the structure for the machine field in struct function.  */
1724
1725struct GTY(()) stack_local_entry {
1726  unsigned short mode;
1727  unsigned short n;
1728  rtx rtl;
1729  struct stack_local_entry *next;
1730};
1731
1732/* Structure describing stack frame layout.
1733   Stack grows downward:
1734
1735   [arguments]
1736					      <- ARG_POINTER
1737   saved pc
1738
1739   saved frame pointer if frame_pointer_needed
1740					      <- HARD_FRAME_POINTER
1741   [saved regs]
1742
1743   [padding0]
1744
1745   [saved SSE regs]
1746
1747   [padding1]          \
1748		        )
1749   [va_arg registers]  (
1750		        > to_allocate	      <- FRAME_POINTER
1751   [frame]	       (
1752		        )
1753   [padding2]	       /
1754  */
1755struct ix86_frame
1756{
1757  int padding0;
1758  int nsseregs;
1759  int nregs;
1760  int padding1;
1761  int va_arg_size;
1762  HOST_WIDE_INT frame;
1763  int padding2;
1764  int outgoing_arguments_size;
1765  int red_zone_size;
1766
1767  HOST_WIDE_INT to_allocate;
1768  /* The offsets relative to ARG_POINTER.  */
1769  HOST_WIDE_INT frame_pointer_offset;
1770  HOST_WIDE_INT hard_frame_pointer_offset;
1771  HOST_WIDE_INT stack_pointer_offset;
1772
1773  /* When save_regs_using_mov is set, emit prologue using
1774     move instead of push instructions.  */
1775  bool save_regs_using_mov;
1776};
1777
1778/* Code model option.  */
1779enum cmodel ix86_cmodel;
1780/* Asm dialect.  */
1781enum asm_dialect ix86_asm_dialect = ASM_ATT;
1782/* TLS dialects.  */
1783enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1784
1785/* Which unit we are generating floating point math for.  */
1786enum fpmath_unit ix86_fpmath;
1787
1788/* Which cpu are we scheduling for.  */
1789enum attr_cpu ix86_schedule;
1790
1791/* Which cpu are we optimizing for.  */
1792enum processor_type ix86_tune;
1793
1794/* Which instruction set architecture to use.  */
1795enum processor_type ix86_arch;
1796
1797/* true if sse prefetch instruction is not NOOP.  */
1798int x86_prefetch_sse;
1799
1800/* ix86_regparm_string as a number */
1801static int ix86_regparm;
1802
1803/* -mstackrealign option */
1804extern int ix86_force_align_arg_pointer;
1805static const char ix86_force_align_arg_pointer_string[]
1806  = "force_align_arg_pointer";
1807
1808static rtx (*ix86_gen_leave) (void);
1809static rtx (*ix86_gen_pop1) (rtx);
1810static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1811static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1812static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1813static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1814static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1815static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1816
1817/* Preferred alignment for stack boundary in bits.  */
1818unsigned int ix86_preferred_stack_boundary;
1819
1820/* Alignment for incoming stack boundary in bits specified at
1821   command line.  */
1822static unsigned int ix86_user_incoming_stack_boundary;
1823
1824/* Default alignment for incoming stack boundary in bits.  */
1825static unsigned int ix86_default_incoming_stack_boundary;
1826
1827/* Alignment for incoming stack boundary in bits.  */
1828unsigned int ix86_incoming_stack_boundary;
1829
1830/* The abi used by target.  */
1831enum calling_abi ix86_abi;
1832
1833/* Values 1-5: see jump.c */
1834int ix86_branch_cost;
1835
1836/* Calling abi specific va_list type nodes.  */
1837static GTY(()) tree sysv_va_list_type_node;
1838static GTY(()) tree ms_va_list_type_node;
1839
1840/* Variables which are this size or smaller are put in the data/bss
1841   or ldata/lbss sections.  */
1842
1843int ix86_section_threshold = 65536;
1844
1845/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
1846char internal_label_prefix[16];
1847int internal_label_prefix_len;
1848
1849/* Fence to use after loop using movnt.  */
1850tree x86_mfence;
1851
1852/* Register class used for passing given 64bit part of the argument.
1853   These represent classes as documented by the PS ABI, with the exception
1854   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1855   use SF or DFmode move instead of DImode to avoid reformatting penalties.
1856
1857   Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1858   whenever possible (upper half does contain padding).  */
1859enum x86_64_reg_class
1860  {
1861    X86_64_NO_CLASS,
1862    X86_64_INTEGER_CLASS,
1863    X86_64_INTEGERSI_CLASS,
1864    X86_64_SSE_CLASS,
1865    X86_64_SSESF_CLASS,
1866    X86_64_SSEDF_CLASS,
1867    X86_64_SSEUP_CLASS,
1868    X86_64_X87_CLASS,
1869    X86_64_X87UP_CLASS,
1870    X86_64_COMPLEX_X87_CLASS,
1871    X86_64_MEMORY_CLASS
1872  };
1873
1874#define MAX_CLASSES 4
1875
1876/* Table of constants used by fldpi, fldln2, etc....  */
1877static REAL_VALUE_TYPE ext_80387_constants_table [5];
1878static bool ext_80387_constants_init = 0;
1879
1880
1881static struct machine_function * ix86_init_machine_status (void);
1882static rtx ix86_function_value (const_tree, const_tree, bool);
1883static rtx ix86_static_chain (const_tree, bool);
1884static int ix86_function_regparm (const_tree, const_tree);
1885static void ix86_compute_frame_layout (struct ix86_frame *);
1886static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1887						 rtx, rtx, int);
1888static void ix86_add_new_builtins (int);
1889static rtx ix86_expand_vec_perm_builtin (tree);
1890
1891enum ix86_function_specific_strings
1892{
1893  IX86_FUNCTION_SPECIFIC_ARCH,
1894  IX86_FUNCTION_SPECIFIC_TUNE,
1895  IX86_FUNCTION_SPECIFIC_FPMATH,
1896  IX86_FUNCTION_SPECIFIC_MAX
1897};
1898
1899static char *ix86_target_string (int, int, const char *, const char *,
1900				 const char *, bool);
1901static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1902static void ix86_function_specific_save (struct cl_target_option *);
1903static void ix86_function_specific_restore (struct cl_target_option *);
1904static void ix86_function_specific_print (FILE *, int,
1905					  struct cl_target_option *);
1906static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1907static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1908static bool ix86_can_inline_p (tree, tree);
1909static void ix86_set_current_function (tree);
1910static unsigned int ix86_minimum_incoming_stack_boundary (bool);
1911
1912static enum calling_abi ix86_function_abi (const_tree);
1913
1914
1915#ifndef SUBTARGET32_DEFAULT_CPU
1916#define SUBTARGET32_DEFAULT_CPU "i386"
1917#endif
1918
1919/* The svr4 ABI for the i386 says that records and unions are returned
1920   in memory.  */
1921#ifndef DEFAULT_PCC_STRUCT_RETURN
1922#define DEFAULT_PCC_STRUCT_RETURN 1
1923#endif
1924
1925/* Whether -mtune= or -march= were specified */
1926static int ix86_tune_defaulted;
1927static int ix86_arch_specified;
1928
1929/* Bit flags that specify the ISA we are compiling for.  */
1930int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1931
1932/* A mask of ix86_isa_flags that includes bit X if X
1933   was set or cleared on the command line.  */
1934static int ix86_isa_flags_explicit;
1935
1936/* Define a set of ISAs which are available when a given ISA is
1937   enabled.  MMX and SSE ISAs are handled separately.  */
1938
1939#define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1940#define OPTION_MASK_ISA_3DNOW_SET \
1941  (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1942
1943#define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1944#define OPTION_MASK_ISA_SSE2_SET \
1945  (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1946#define OPTION_MASK_ISA_SSE3_SET \
1947  (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1948#define OPTION_MASK_ISA_SSSE3_SET \
1949  (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1950#define OPTION_MASK_ISA_SSE4_1_SET \
1951  (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1952#define OPTION_MASK_ISA_SSE4_2_SET \
1953  (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1954#define OPTION_MASK_ISA_AVX_SET \
1955  (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1956#define OPTION_MASK_ISA_FMA_SET \
1957  (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1958
1959/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1960   as -msse4.2.  */
1961#define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1962
1963#define OPTION_MASK_ISA_SSE4A_SET \
1964  (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1965#define OPTION_MASK_ISA_FMA4_SET \
1966  (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
1967   | OPTION_MASK_ISA_AVX_SET)
1968#define OPTION_MASK_ISA_XOP_SET \
1969  (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
1970#define OPTION_MASK_ISA_LWP_SET \
1971  OPTION_MASK_ISA_LWP
1972
1973/* AES and PCLMUL need SSE2 because they use xmm registers */
1974#define OPTION_MASK_ISA_AES_SET \
1975  (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1976#define OPTION_MASK_ISA_PCLMUL_SET \
1977  (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1978
1979#define OPTION_MASK_ISA_ABM_SET \
1980  (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1981
1982#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1983#define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1984#define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1985#define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1986#define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
1987
1988/* Define a set of ISAs which aren't available when a given ISA is
1989   disabled.  MMX and SSE ISAs are handled separately.  */
1990
1991#define OPTION_MASK_ISA_MMX_UNSET \
1992  (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1993#define OPTION_MASK_ISA_3DNOW_UNSET \
1994  (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1995#define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1996
1997#define OPTION_MASK_ISA_SSE_UNSET \
1998  (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1999#define OPTION_MASK_ISA_SSE2_UNSET \
2000  (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2001#define OPTION_MASK_ISA_SSE3_UNSET \
2002  (OPTION_MASK_ISA_SSE3 \
2003   | OPTION_MASK_ISA_SSSE3_UNSET \
2004   | OPTION_MASK_ISA_SSE4A_UNSET )
2005#define OPTION_MASK_ISA_SSSE3_UNSET \
2006  (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2007#define OPTION_MASK_ISA_SSE4_1_UNSET \
2008  (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2009#define OPTION_MASK_ISA_SSE4_2_UNSET \
2010  (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2011#define OPTION_MASK_ISA_AVX_UNSET \
2012  (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2013   | OPTION_MASK_ISA_FMA4_UNSET)
2014#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2015
2016/* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
2017   as -mno-sse4.1. */
2018#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2019
2020#define OPTION_MASK_ISA_SSE4A_UNSET \
2021  (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2022
2023#define OPTION_MASK_ISA_FMA4_UNSET \
2024  (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2025#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2026#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2027
2028#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2029#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2030#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2031#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2032#define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2033#define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2034#define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2035#define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2036
2037/* Vectorization library interface and handlers.  */
2038tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2039static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2040static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2041
2042/* Processor target table, indexed by processor number */
2043struct ptt
2044{
2045  const struct processor_costs *cost;		/* Processor costs */
2046  const int align_loop;				/* Default alignments.  */
2047  const int align_loop_max_skip;
2048  const int align_jump;
2049  const int align_jump_max_skip;
2050  const int align_func;
2051};
2052
2053static const struct ptt processor_target_table[PROCESSOR_max] =
2054{
2055  {&i386_cost, 4, 3, 4, 3, 4},
2056  {&i486_cost, 16, 15, 16, 15, 16},
2057  {&pentium_cost, 16, 7, 16, 7, 16},
2058  {&pentiumpro_cost, 16, 15, 16, 10, 16},
2059  {&geode_cost, 0, 0, 0, 0, 0},
2060  {&k6_cost, 32, 7, 32, 7, 32},
2061  {&athlon_cost, 16, 7, 16, 7, 16},
2062  {&pentium4_cost, 0, 0, 0, 0, 0},
2063  {&k8_cost, 16, 7, 16, 7, 16},
2064  {&nocona_cost, 0, 0, 0, 0, 0},
2065  {&core2_cost, 16, 10, 16, 10, 16},
2066  {&generic32_cost, 16, 7, 16, 7, 16},
2067  {&generic64_cost, 16, 10, 16, 10, 16},
2068  {&amdfam10_cost, 32, 24, 32, 7, 32},
2069  {&atom_cost, 16, 7, 16, 7, 16}
2070};
2071
2072static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2073{
2074  "generic",
2075  "i386",
2076  "i486",
2077  "pentium",
2078  "pentium-mmx",
2079  "pentiumpro",
2080  "pentium2",
2081  "pentium3",
2082  "pentium4",
2083  "pentium-m",
2084  "prescott",
2085  "nocona",
2086  "core2",
2087  "atom",
2088  "geode",
2089  "k6",
2090  "k6-2",
2091  "k6-3",
2092  "athlon",
2093  "athlon-4",
2094  "k8",
2095  "amdfam10"
2096};
2097
2098/* Implement TARGET_HANDLE_OPTION.  */
2099
2100static bool
2101ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2102{
2103  switch (code)
2104    {
2105    case OPT_mmmx:
2106      if (value)
2107	{
2108	  ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2109	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2110	}
2111      else
2112	{
2113	  ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2114	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2115	}
2116      return true;
2117
2118    case OPT_m3dnow:
2119      if (value)
2120	{
2121	  ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2122	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2123	}
2124      else
2125	{
2126	  ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2127	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2128	}
2129      return true;
2130
2131    case OPT_m3dnowa:
2132      return false;
2133
2134    case OPT_msse:
2135      if (value)
2136	{
2137	  ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2138	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2139	}
2140      else
2141	{
2142	  ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2143	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2144	}
2145      return true;
2146
2147    case OPT_msse2:
2148      if (value)
2149	{
2150	  ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2151	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2152	}
2153      else
2154	{
2155	  ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2156	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2157	}
2158      return true;
2159
2160    case OPT_msse3:
2161      if (value)
2162	{
2163	  ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2164	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2165	}
2166      else
2167	{
2168	  ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2169	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2170	}
2171      return true;
2172
2173    case OPT_mssse3:
2174      if (value)
2175	{
2176	  ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2177	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2178	}
2179      else
2180	{
2181	  ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2182	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2183	}
2184      return true;
2185
2186    case OPT_msse4_1:
2187      if (value)
2188	{
2189	  ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2190	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2191	}
2192      else
2193	{
2194	  ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2195	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2196	}
2197      return true;
2198
2199    case OPT_msse4_2:
2200      if (value)
2201	{
2202	  ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2203	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2204	}
2205      else
2206	{
2207	  ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2208	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2209	}
2210      return true;
2211
2212    case OPT_mavx:
2213      if (value)
2214	{
2215	  ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2216	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2217	}
2218      else
2219	{
2220	  ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2221	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2222	}
2223      return true;
2224
2225    case OPT_mfma:
2226      if (value)
2227	{
2228	  ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2229	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2230	}
2231      else
2232	{
2233	  ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2234	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2235	}
2236      return true;
2237
2238    case OPT_msse4:
2239      ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2240      ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2241      return true;
2242
2243    case OPT_mno_sse4:
2244      ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2245      ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2246      return true;
2247
2248    case OPT_msse4a:
2249      if (value)
2250	{
2251	  ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2252	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2253	}
2254      else
2255	{
2256	  ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2257	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2258	}
2259      return true;
2260
2261    case OPT_mfma4:
2262      if (value)
2263	{
2264	  ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2265	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2266	}
2267      else
2268	{
2269	  ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2270	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2271	}
2272      return true;
2273
2274   case OPT_mxop:
2275      if (value)
2276	{
2277	  ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2278	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2279	}
2280      else
2281	{
2282	  ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2283	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2284	}
2285      return true;
2286
2287   case OPT_mlwp:
2288      if (value)
2289	{
2290	  ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2291	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2292	}
2293      else
2294	{
2295	  ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2296	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2297	}
2298      return true;
2299
2300    case OPT_mabm:
2301      if (value)
2302	{
2303	  ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2304	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2305	}
2306      else
2307	{
2308	  ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2309	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2310	}
2311      return true;
2312
2313    case OPT_mpopcnt:
2314      if (value)
2315	{
2316	  ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2317	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2318	}
2319      else
2320	{
2321	  ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2322	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2323	}
2324      return true;
2325
2326    case OPT_msahf:
2327      if (value)
2328	{
2329	  ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2330	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2331	}
2332      else
2333	{
2334	  ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2335	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2336	}
2337      return true;
2338
2339    case OPT_mcx16:
2340      if (value)
2341	{
2342	  ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2343	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2344	}
2345      else
2346	{
2347	  ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2348	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2349	}
2350      return true;
2351
2352    case OPT_mmovbe:
2353      if (value)
2354	{
2355	  ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2356	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2357	}
2358      else
2359	{
2360	  ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2361	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2362	}
2363      return true;
2364
2365    case OPT_mcrc32:
2366      if (value)
2367	{
2368	  ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2369	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2370	}
2371      else
2372	{
2373	  ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2374	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2375	}
2376      return true;
2377
2378    case OPT_maes:
2379      if (value)
2380	{
2381	  ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2382	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2383	}
2384      else
2385	{
2386	  ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2387	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2388	}
2389      return true;
2390
2391    case OPT_mpclmul:
2392      if (value)
2393	{
2394	  ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2395	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2396	}
2397      else
2398	{
2399	  ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2400	  ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2401	}
2402      return true;
2403
2404    default:
2405      return true;
2406    }
2407}
2408
2409/* Return a string that documents the current -m options.  The caller is
2410   responsible for freeing the string.  */
2411
2412static char *
2413ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2414		    const char *fpmath, bool add_nl_p)
2415{
2416  struct ix86_target_opts
2417  {
2418    const char *option;		/* option string */
2419    int mask;			/* isa mask options */
2420  };
2421
2422  /* This table is ordered so that options like -msse4.2 that imply
2423     preceding options while match those first.  */
2424  static struct ix86_target_opts isa_opts[] =
2425  {
2426    { "-m64",		OPTION_MASK_ISA_64BIT },
2427    { "-mfma4",		OPTION_MASK_ISA_FMA4 },
2428    { "-mfma",		OPTION_MASK_ISA_FMA },
2429    { "-mxop",		OPTION_MASK_ISA_XOP },
2430    { "-mlwp",		OPTION_MASK_ISA_LWP },
2431    { "-msse4a",	OPTION_MASK_ISA_SSE4A },
2432    { "-msse4.2",	OPTION_MASK_ISA_SSE4_2 },
2433    { "-msse4.1",	OPTION_MASK_ISA_SSE4_1 },
2434    { "-mssse3",	OPTION_MASK_ISA_SSSE3 },
2435    { "-msse3",		OPTION_MASK_ISA_SSE3 },
2436    { "-msse2",		OPTION_MASK_ISA_SSE2 },
2437    { "-msse",		OPTION_MASK_ISA_SSE },
2438    { "-m3dnow",	OPTION_MASK_ISA_3DNOW },
2439    { "-m3dnowa",	OPTION_MASK_ISA_3DNOW_A },
2440    { "-mmmx",		OPTION_MASK_ISA_MMX },
2441    { "-mabm",		OPTION_MASK_ISA_ABM },
2442    { "-mpopcnt",	OPTION_MASK_ISA_POPCNT },
2443    { "-mmovbe",	OPTION_MASK_ISA_MOVBE },
2444    { "-mcrc32",	OPTION_MASK_ISA_CRC32 },
2445    { "-maes",		OPTION_MASK_ISA_AES },
2446    { "-mpclmul",	OPTION_MASK_ISA_PCLMUL },
2447  };
2448
2449  /* Flag options.  */
2450  static struct ix86_target_opts flag_opts[] =
2451  {
2452    { "-m128bit-long-double",		MASK_128BIT_LONG_DOUBLE },
2453    { "-m80387",			MASK_80387 },
2454    { "-maccumulate-outgoing-args",	MASK_ACCUMULATE_OUTGOING_ARGS },
2455    { "-malign-double",			MASK_ALIGN_DOUBLE },
2456    { "-mcld",				MASK_CLD },
2457    { "-mfp-ret-in-387",		MASK_FLOAT_RETURNS },
2458    { "-mieee-fp",			MASK_IEEE_FP },
2459    { "-minline-all-stringops",		MASK_INLINE_ALL_STRINGOPS },
2460    { "-minline-stringops-dynamically",	MASK_INLINE_STRINGOPS_DYNAMICALLY },
2461    { "-mms-bitfields",			MASK_MS_BITFIELD_LAYOUT },
2462    { "-mno-align-stringops",		MASK_NO_ALIGN_STRINGOPS },
2463    { "-mno-fancy-math-387",		MASK_NO_FANCY_MATH_387 },
2464    { "-mno-push-args",			MASK_NO_PUSH_ARGS },
2465    { "-mno-red-zone",			MASK_NO_RED_ZONE },
2466    { "-momit-leaf-frame-pointer",	MASK_OMIT_LEAF_FRAME_POINTER },
2467    { "-mrecip",			MASK_RECIP },
2468    { "-mrtd",				MASK_RTD },
2469    { "-msseregparm",			MASK_SSEREGPARM },
2470    { "-mstack-arg-probe",		MASK_STACK_PROBE },
2471    { "-mtls-direct-seg-refs",		MASK_TLS_DIRECT_SEG_REFS },
2472  };
2473
2474  const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2475
2476  char isa_other[40];
2477  char target_other[40];
2478  unsigned num = 0;
2479  unsigned i, j;
2480  char *ret;
2481  char *ptr;
2482  size_t len;
2483  size_t line_len;
2484  size_t sep_len;
2485
2486  memset (opts, '\0', sizeof (opts));
2487
2488  /* Add -march= option.  */
2489  if (arch)
2490    {
2491      opts[num][0] = "-march=";
2492      opts[num++][1] = arch;
2493    }
2494
2495  /* Add -mtune= option.  */
2496  if (tune)
2497    {
2498      opts[num][0] = "-mtune=";
2499      opts[num++][1] = tune;
2500    }
2501
2502  /* Pick out the options in isa options.  */
2503  for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2504    {
2505      if ((isa & isa_opts[i].mask) != 0)
2506	{
2507	  opts[num++][0] = isa_opts[i].option;
2508	  isa &= ~ isa_opts[i].mask;
2509	}
2510    }
2511
2512  if (isa && add_nl_p)
2513    {
2514      opts[num++][0] = isa_other;
2515      sprintf (isa_other, "(other isa: 0x%x)", isa);
2516    }
2517
2518  /* Add flag options.  */
2519  for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2520    {
2521      if ((flags & flag_opts[i].mask) != 0)
2522	{
2523	  opts[num++][0] = flag_opts[i].option;
2524	  flags &= ~ flag_opts[i].mask;
2525	}
2526    }
2527
2528  if (flags && add_nl_p)
2529    {
2530      opts[num++][0] = target_other;
2531      sprintf (target_other, "(other flags: 0x%x)", flags);
2532    }
2533
2534  /* Add -fpmath= option.  */
2535  if (fpmath)
2536    {
2537      opts[num][0] = "-mfpmath=";
2538      opts[num++][1] = fpmath;
2539    }
2540
2541  /* Any options?  */
2542  if (num == 0)
2543    return NULL;
2544
2545  gcc_assert (num < ARRAY_SIZE (opts));
2546
2547  /* Size the string.  */
2548  len = 0;
2549  sep_len = (add_nl_p) ? 3 : 1;
2550  for (i = 0; i < num; i++)
2551    {
2552      len += sep_len;
2553      for (j = 0; j < 2; j++)
2554	if (opts[i][j])
2555	  len += strlen (opts[i][j]);
2556    }
2557
2558  /* Build the string.  */
2559  ret = ptr = (char *) xmalloc (len);
2560  line_len = 0;
2561
2562  for (i = 0; i < num; i++)
2563    {
2564      size_t len2[2];
2565
2566      for (j = 0; j < 2; j++)
2567	len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2568
2569      if (i != 0)
2570	{
2571	  *ptr++ = ' ';
2572	  line_len++;
2573
2574	  if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2575	    {
2576	      *ptr++ = '\\';
2577	      *ptr++ = '\n';
2578	      line_len = 0;
2579	    }
2580	}
2581
2582      for (j = 0; j < 2; j++)
2583	if (opts[i][j])
2584	  {
2585	    memcpy (ptr, opts[i][j], len2[j]);
2586	    ptr += len2[j];
2587	    line_len += len2[j];
2588	  }
2589    }
2590
2591  *ptr = '\0';
2592  gcc_assert (ret + len >= ptr);
2593
2594  return ret;
2595}
2596
2597/* Function that is callable from the debugger to print the current
2598   options.  */
2599void
2600ix86_debug_options (void)
2601{
2602  char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2603				   ix86_arch_string, ix86_tune_string,
2604				   ix86_fpmath_string, true);
2605
2606  if (opts)
2607    {
2608      fprintf (stderr, "%s\n\n", opts);
2609      free (opts);
2610    }
2611  else
2612    fputs ("<no options>\n\n", stderr);
2613
2614  return;
2615}
2616
2617/* Sometimes certain combinations of command options do not make
2618   sense on a particular target machine.  You can define a macro
2619   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
2620   defined, is executed once just after all the command options have
2621   been parsed.
2622
2623   Don't use this macro to turn on various extra optimizations for
2624   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
2625
2626void
2627override_options (bool main_args_p)
2628{
2629  int i;
2630  unsigned int ix86_arch_mask, ix86_tune_mask;
2631  const bool ix86_tune_specified = (ix86_tune_string != NULL);
2632  const char *prefix;
2633  const char *suffix;
2634  const char *sw;
2635
2636  /* Comes from final.c -- no real reason to change it.  */
2637#define MAX_CODE_ALIGN 16
2638
2639  enum pta_flags
2640    {
2641      PTA_SSE = 1 << 0,
2642      PTA_SSE2 = 1 << 1,
2643      PTA_SSE3 = 1 << 2,
2644      PTA_MMX = 1 << 3,
2645      PTA_PREFETCH_SSE = 1 << 4,
2646      PTA_3DNOW = 1 << 5,
2647      PTA_3DNOW_A = 1 << 6,
2648      PTA_64BIT = 1 << 7,
2649      PTA_SSSE3 = 1 << 8,
2650      PTA_CX16 = 1 << 9,
2651      PTA_POPCNT = 1 << 10,
2652      PTA_ABM = 1 << 11,
2653      PTA_SSE4A = 1 << 12,
2654      PTA_NO_SAHF = 1 << 13,
2655      PTA_SSE4_1 = 1 << 14,
2656      PTA_SSE4_2 = 1 << 15,
2657      PTA_AES = 1 << 16,
2658      PTA_PCLMUL = 1 << 17,
2659      PTA_AVX = 1 << 18,
2660      PTA_FMA = 1 << 19,
2661      PTA_MOVBE = 1 << 20,
2662      PTA_FMA4 = 1 << 21,
2663      PTA_XOP = 1 << 22,
2664      PTA_LWP = 1 << 23
2665    };
2666
2667  static struct pta
2668    {
2669      const char *const name;		/* processor name or nickname.  */
2670      const enum processor_type processor;
2671      const enum attr_cpu schedule;
2672      const unsigned /*enum pta_flags*/ flags;
2673    }
2674  const processor_alias_table[] =
2675    {
2676      {"i386", PROCESSOR_I386, CPU_NONE, 0},
2677      {"i486", PROCESSOR_I486, CPU_NONE, 0},
2678      {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2679      {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2680      {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2681      {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2682      {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2683      {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2684      {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2685      {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2686      {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2687      {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2688      {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2689	PTA_MMX | PTA_SSE},
2690      {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2691	PTA_MMX | PTA_SSE},
2692      {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2693	PTA_MMX | PTA_SSE | PTA_SSE2},
2694      {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2695	PTA_MMX |PTA_SSE | PTA_SSE2},
2696      {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2697	PTA_MMX | PTA_SSE | PTA_SSE2},
2698      {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2699	PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2700      {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2701	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2702	| PTA_CX16 | PTA_NO_SAHF},
2703      {"core2", PROCESSOR_CORE2, CPU_CORE2,
2704	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2705	| PTA_SSSE3 | PTA_CX16},
2706      {"atom", PROCESSOR_ATOM, CPU_ATOM,
2707	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2708	| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2709      {"geode", PROCESSOR_GEODE, CPU_GEODE,
2710	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2711      {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2712      {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2713      {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2714      {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2715	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2716      {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2717	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2718      {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2719	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2720      {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2721	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2722      {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2723	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2724      {"x86-64", PROCESSOR_K8, CPU_K8,
2725	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2726      {"k8", PROCESSOR_K8, CPU_K8,
2727	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2728	| PTA_SSE2 | PTA_NO_SAHF},
2729      {"k8-sse3", PROCESSOR_K8, CPU_K8,
2730	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2731	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2732      {"opteron", PROCESSOR_K8, CPU_K8,
2733	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2734	| PTA_SSE2 | PTA_NO_SAHF},
2735      {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2736        PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2737	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2738      {"athlon64", PROCESSOR_K8, CPU_K8,
2739	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2740	| PTA_SSE2 | PTA_NO_SAHF},
2741      {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2742	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2743	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2744      {"athlon-fx", PROCESSOR_K8, CPU_K8,
2745	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2746	| PTA_SSE2 | PTA_NO_SAHF},
2747      {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2748	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2749	| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2750      {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2751	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2752	| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2753      {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2754	0 /* flags are only used for -march switch.  */ },
2755      {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2756	PTA_64BIT /* flags are only used for -march switch.  */ },
2757    };
2758
2759  int const pta_size = ARRAY_SIZE (processor_alias_table);
2760
2761  /* Set up prefix/suffix so the error messages refer to either the command
2762     line argument, or the attribute(target).  */
2763  if (main_args_p)
2764    {
2765      prefix = "-m";
2766      suffix = "";
2767      sw = "switch";
2768    }
2769  else
2770    {
2771      prefix = "option(\"";
2772      suffix = "\")";
2773      sw = "attribute";
2774    }
2775
2776#ifdef SUBTARGET_OVERRIDE_OPTIONS
2777  SUBTARGET_OVERRIDE_OPTIONS;
2778#endif
2779
2780#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2781  SUBSUBTARGET_OVERRIDE_OPTIONS;
2782#endif
2783
2784  /* -fPIC is the default for x86_64.  */
2785  if (TARGET_MACHO && TARGET_64BIT)
2786    flag_pic = 2;
2787
2788  /* Set the default values for switches whose default depends on TARGET_64BIT
2789     in case they weren't overwritten by command line options.  */
2790  if (TARGET_64BIT)
2791    {
2792      /* Mach-O doesn't support omitting the frame pointer for now.  */
2793      if (flag_omit_frame_pointer == 2)
2794	flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2795      if (flag_asynchronous_unwind_tables == 2)
2796	flag_asynchronous_unwind_tables = 1;
2797      if (flag_pcc_struct_return == 2)
2798	flag_pcc_struct_return = 0;
2799    }
2800  else
2801    {
2802      if (flag_omit_frame_pointer == 2)
2803	flag_omit_frame_pointer = 0;
2804      if (flag_asynchronous_unwind_tables == 2)
2805	flag_asynchronous_unwind_tables = 0;
2806      if (flag_pcc_struct_return == 2)
2807	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2808    }
2809
2810  /* Need to check -mtune=generic first.  */
2811  if (ix86_tune_string)
2812    {
2813      if (!strcmp (ix86_tune_string, "generic")
2814	  || !strcmp (ix86_tune_string, "i686")
2815	  /* As special support for cross compilers we read -mtune=native
2816	     as -mtune=generic.  With native compilers we won't see the
2817	     -mtune=native, as it was changed by the driver.  */
2818	  || !strcmp (ix86_tune_string, "native"))
2819	{
2820	  if (TARGET_64BIT)
2821	    ix86_tune_string = "generic64";
2822	  else
2823	    ix86_tune_string = "generic32";
2824	}
2825      /* If this call is for setting the option attribute, allow the
2826	 generic32/generic64 that was previously set.  */
2827      else if (!main_args_p
2828	       && (!strcmp (ix86_tune_string, "generic32")
2829		   || !strcmp (ix86_tune_string, "generic64")))
2830	;
2831      else if (!strncmp (ix86_tune_string, "generic", 7))
2832        error ("bad value (%s) for %stune=%s %s",
2833	       ix86_tune_string, prefix, suffix, sw);
2834      else if (!strcmp (ix86_tune_string, "x86-64"))
2835        warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated.  Use "
2836                 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2837                 prefix, suffix, prefix, suffix, prefix, suffix);
2838    }
2839  else
2840    {
2841      if (ix86_arch_string)
2842	ix86_tune_string = ix86_arch_string;
2843      if (!ix86_tune_string)
2844	{
2845	  ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2846	  ix86_tune_defaulted = 1;
2847	}
2848
2849      /* ix86_tune_string is set to ix86_arch_string or defaulted.  We
2850	 need to use a sensible tune option.  */
2851      if (!strcmp (ix86_tune_string, "generic")
2852	  || !strcmp (ix86_tune_string, "x86-64")
2853	  || !strcmp (ix86_tune_string, "i686"))
2854	{
2855	  if (TARGET_64BIT)
2856	    ix86_tune_string = "generic64";
2857	  else
2858	    ix86_tune_string = "generic32";
2859	}
2860    }
2861
2862  if (ix86_stringop_string)
2863    {
2864      if (!strcmp (ix86_stringop_string, "rep_byte"))
2865	stringop_alg = rep_prefix_1_byte;
2866      else if (!strcmp (ix86_stringop_string, "libcall"))
2867	stringop_alg = libcall;
2868      else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2869	stringop_alg = rep_prefix_4_byte;
2870      else if (!strcmp (ix86_stringop_string, "rep_8byte")
2871	       && TARGET_64BIT)
2872	/* rep; movq isn't available in 32-bit code.  */
2873	stringop_alg = rep_prefix_8_byte;
2874      else if (!strcmp (ix86_stringop_string, "byte_loop"))
2875	stringop_alg = loop_1_byte;
2876      else if (!strcmp (ix86_stringop_string, "loop"))
2877	stringop_alg = loop;
2878      else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2879	stringop_alg = unrolled_loop;
2880      else
2881	error ("bad value (%s) for %sstringop-strategy=%s %s",
2882	       ix86_stringop_string, prefix, suffix, sw);
2883    }
2884
2885  if (!ix86_arch_string)
2886    ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
2887  else
2888    ix86_arch_specified = 1;
2889
2890  /* Validate -mabi= value.  */
2891  if (ix86_abi_string)
2892    {
2893      if (strcmp (ix86_abi_string, "sysv") == 0)
2894	ix86_abi = SYSV_ABI;
2895      else if (strcmp (ix86_abi_string, "ms") == 0)
2896	ix86_abi = MS_ABI;
2897      else
2898	error ("unknown ABI (%s) for %sabi=%s %s",
2899	       ix86_abi_string, prefix, suffix, sw);
2900    }
2901  else
2902    ix86_abi = DEFAULT_ABI;
2903
2904  if (ix86_cmodel_string != 0)
2905    {
2906      if (!strcmp (ix86_cmodel_string, "small"))
2907	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2908      else if (!strcmp (ix86_cmodel_string, "medium"))
2909	ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2910      else if (!strcmp (ix86_cmodel_string, "large"))
2911	ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2912      else if (flag_pic)
2913	error ("code model %s does not support PIC mode", ix86_cmodel_string);
2914      else if (!strcmp (ix86_cmodel_string, "32"))
2915	ix86_cmodel = CM_32;
2916      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2917	ix86_cmodel = CM_KERNEL;
2918      else
2919	error ("bad value (%s) for %scmodel=%s %s",
2920	       ix86_cmodel_string, prefix, suffix, sw);
2921    }
2922  else
2923    {
2924      /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2925	 use of rip-relative addressing.  This eliminates fixups that
2926	 would otherwise be needed if this object is to be placed in a
2927	 DLL, and is essentially just as efficient as direct addressing.  */
2928      if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2929	ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2930      else if (TARGET_64BIT)
2931	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2932      else
2933        ix86_cmodel = CM_32;
2934    }
2935  if (ix86_asm_string != 0)
2936    {
2937      if (! TARGET_MACHO
2938	  && !strcmp (ix86_asm_string, "intel"))
2939	ix86_asm_dialect = ASM_INTEL;
2940      else if (!strcmp (ix86_asm_string, "att"))
2941	ix86_asm_dialect = ASM_ATT;
2942      else
2943	error ("bad value (%s) for %sasm=%s %s",
2944	       ix86_asm_string, prefix, suffix, sw);
2945    }
2946  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2947    error ("code model %qs not supported in the %s bit mode",
2948	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2949  if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2950    sorry ("%i-bit mode not compiled in",
2951	   (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2952
2953  for (i = 0; i < pta_size; i++)
2954    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2955      {
2956	ix86_schedule = processor_alias_table[i].schedule;
2957	ix86_arch = processor_alias_table[i].processor;
2958	/* Default cpu tuning to the architecture.  */
2959	ix86_tune = ix86_arch;
2960
2961	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2962	  error ("CPU you selected does not support x86-64 "
2963		 "instruction set");
2964
2965	if (processor_alias_table[i].flags & PTA_MMX
2966	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2967	  ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2968	if (processor_alias_table[i].flags & PTA_3DNOW
2969	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2970	  ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2971	if (processor_alias_table[i].flags & PTA_3DNOW_A
2972	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2973	  ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2974	if (processor_alias_table[i].flags & PTA_SSE
2975	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2976	  ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2977	if (processor_alias_table[i].flags & PTA_SSE2
2978	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2979	  ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2980	if (processor_alias_table[i].flags & PTA_SSE3
2981	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2982	  ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2983	if (processor_alias_table[i].flags & PTA_SSSE3
2984	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2985	  ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2986	if (processor_alias_table[i].flags & PTA_SSE4_1
2987	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2988	  ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2989	if (processor_alias_table[i].flags & PTA_SSE4_2
2990	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2991	  ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2992	if (processor_alias_table[i].flags & PTA_AVX
2993	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2994	  ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2995	if (processor_alias_table[i].flags & PTA_FMA
2996	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2997	  ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2998	if (processor_alias_table[i].flags & PTA_SSE4A
2999	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3000	  ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3001	if (processor_alias_table[i].flags & PTA_FMA4
3002	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3003	  ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3004	if (processor_alias_table[i].flags & PTA_XOP
3005	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3006	  ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3007	if (processor_alias_table[i].flags & PTA_LWP
3008	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3009	  ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3010	if (processor_alias_table[i].flags & PTA_ABM
3011	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3012	  ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3013	if (processor_alias_table[i].flags & PTA_CX16
3014	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3015	  ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3016	if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3017	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3018	  ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3019	if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3020	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3021	  ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3022	if (processor_alias_table[i].flags & PTA_MOVBE
3023	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3024	  ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3025	if (processor_alias_table[i].flags & PTA_AES
3026	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3027	  ix86_isa_flags |= OPTION_MASK_ISA_AES;
3028	if (processor_alias_table[i].flags & PTA_PCLMUL
3029	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3030	  ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3031	if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3032	  x86_prefetch_sse = true;
3033
3034	break;
3035      }
3036
3037  if (!strcmp (ix86_arch_string, "generic"))
3038    error ("generic CPU can be used only for %stune=%s %s",
3039	   prefix, suffix, sw);
3040  else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3041    error ("bad value (%s) for %sarch=%s %s",
3042	   ix86_arch_string, prefix, suffix, sw);
3043
3044  ix86_arch_mask = 1u << ix86_arch;
3045  for (i = 0; i < X86_ARCH_LAST; ++i)
3046    ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3047
3048  for (i = 0; i < pta_size; i++)
3049    if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3050      {
3051	ix86_schedule = processor_alias_table[i].schedule;
3052	ix86_tune = processor_alias_table[i].processor;
3053	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3054	  {
3055	    if (ix86_tune_defaulted)
3056	      {
3057		ix86_tune_string = "x86-64";
3058		for (i = 0; i < pta_size; i++)
3059		  if (! strcmp (ix86_tune_string,
3060				processor_alias_table[i].name))
3061		    break;
3062		ix86_schedule = processor_alias_table[i].schedule;
3063		ix86_tune = processor_alias_table[i].processor;
3064	      }
3065	    else
3066	      error ("CPU you selected does not support x86-64 "
3067		     "instruction set");
3068	  }
3069        /* Intel CPUs have always interpreted SSE prefetch instructions as
3070	   NOPs; so, we can enable SSE prefetch instructions even when
3071	   -mtune (rather than -march) points us to a processor that has them.
3072	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3073	   higher processors.  */
3074	if (TARGET_CMOVE
3075	    && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3076	  x86_prefetch_sse = true;
3077	break;
3078      }
3079
3080  if (ix86_tune_specified && i == pta_size)
3081    error ("bad value (%s) for %stune=%s %s",
3082	   ix86_tune_string, prefix, suffix, sw);
3083
3084  ix86_tune_mask = 1u << ix86_tune;
3085  for (i = 0; i < X86_TUNE_LAST; ++i)
3086    ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3087
3088  if (optimize_size)
3089    ix86_cost = &ix86_size_cost;
3090  else
3091    ix86_cost = processor_target_table[ix86_tune].cost;
3092
3093  /* Arrange to set up i386_stack_locals for all functions.  */
3094  init_machine_status = ix86_init_machine_status;
3095
3096  /* Validate -mregparm= value.  */
3097  if (ix86_regparm_string)
3098    {
3099      if (TARGET_64BIT)
3100	warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3101      i = atoi (ix86_regparm_string);
3102      if (i < 0 || i > REGPARM_MAX)
3103	error ("%sregparm=%d%s is not between 0 and %d",
3104	       prefix, i, suffix, REGPARM_MAX);
3105      else
3106	ix86_regparm = i;
3107    }
3108  if (TARGET_64BIT)
3109    ix86_regparm = REGPARM_MAX;
3110
3111  /* If the user has provided any of the -malign-* options,
3112     warn and use that value only if -falign-* is not set.
3113     Remove this code in GCC 3.2 or later.  */
3114  if (ix86_align_loops_string)
3115    {
3116      warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3117	       prefix, suffix, suffix);
3118      if (align_loops == 0)
3119	{
3120	  i = atoi (ix86_align_loops_string);
3121	  if (i < 0 || i > MAX_CODE_ALIGN)
3122	    error ("%salign-loops=%d%s is not between 0 and %d",
3123		   prefix, i, suffix, MAX_CODE_ALIGN);
3124	  else
3125	    align_loops = 1 << i;
3126	}
3127    }
3128
3129  if (ix86_align_jumps_string)
3130    {
3131      warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3132	       prefix, suffix, suffix);
3133      if (align_jumps == 0)
3134	{
3135	  i = atoi (ix86_align_jumps_string);
3136	  if (i < 0 || i > MAX_CODE_ALIGN)
3137	    error ("%salign-loops=%d%s is not between 0 and %d",
3138		   prefix, i, suffix, MAX_CODE_ALIGN);
3139	  else
3140	    align_jumps = 1 << i;
3141	}
3142    }
3143
3144  if (ix86_align_funcs_string)
3145    {
3146      warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3147	       prefix, suffix, suffix);
3148      if (align_functions == 0)
3149	{
3150	  i = atoi (ix86_align_funcs_string);
3151	  if (i < 0 || i > MAX_CODE_ALIGN)
3152	    error ("%salign-loops=%d%s is not between 0 and %d",
3153		   prefix, i, suffix, MAX_CODE_ALIGN);
3154	  else
3155	    align_functions = 1 << i;
3156	}
3157    }
3158
3159  /* Default align_* from the processor table.  */
3160  if (align_loops == 0)
3161    {
3162      align_loops = processor_target_table[ix86_tune].align_loop;
3163      align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3164    }
3165  if (align_jumps == 0)
3166    {
3167      align_jumps = processor_target_table[ix86_tune].align_jump;
3168      align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3169    }
3170  if (align_functions == 0)
3171    {
3172      align_functions = processor_target_table[ix86_tune].align_func;
3173    }
3174
3175  /* Validate -mbranch-cost= value, or provide default.  */
3176  ix86_branch_cost = ix86_cost->branch_cost;
3177  if (ix86_branch_cost_string)
3178    {
3179      i = atoi (ix86_branch_cost_string);
3180      if (i < 0 || i > 5)
3181	error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3182      else
3183	ix86_branch_cost = i;
3184    }
3185  if (ix86_section_threshold_string)
3186    {
3187      i = atoi (ix86_section_threshold_string);
3188      if (i < 0)
3189	error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3190      else
3191	ix86_section_threshold = i;
3192    }
3193
3194  if (ix86_tls_dialect_string)
3195    {
3196      if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3197	ix86_tls_dialect = TLS_DIALECT_GNU;
3198      else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3199	ix86_tls_dialect = TLS_DIALECT_GNU2;
3200      else
3201	error ("bad value (%s) for %stls-dialect=%s %s",
3202	       ix86_tls_dialect_string, prefix, suffix, sw);
3203    }
3204
3205  if (ix87_precision_string)
3206    {
3207      i = atoi (ix87_precision_string);
3208      if (i != 32 && i != 64 && i != 80)
3209	error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3210    }
3211
3212  if (TARGET_64BIT)
3213    {
3214      target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3215
3216      /* Enable by default the SSE and MMX builtins.  Do allow the user to
3217	 explicitly disable any of these.  In particular, disabling SSE and
3218	 MMX for kernel code is extremely useful.  */
3219      if (!ix86_arch_specified)
3220      ix86_isa_flags
3221	|= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3222	     | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3223
3224      if (TARGET_RTD)
3225	warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3226    }
3227  else
3228    {
3229      target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3230
3231      if (!ix86_arch_specified)
3232      ix86_isa_flags
3233	|= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3234
3235      /* i386 ABI does not specify red zone.  It still makes sense to use it
3236         when programmer takes care to stack from being destroyed.  */
3237      if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3238        target_flags |= MASK_NO_RED_ZONE;
3239    }
3240
3241  /* Keep nonleaf frame pointers.  */
3242  if (flag_omit_frame_pointer)
3243    target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3244  else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3245    flag_omit_frame_pointer = 1;
3246
3247  /* If we're doing fast math, we don't care about comparison order
3248     wrt NaNs.  This lets us use a shorter comparison sequence.  */
3249  if (flag_finite_math_only)
3250    target_flags &= ~MASK_IEEE_FP;
3251
3252  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3253     since the insns won't need emulation.  */
3254  if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3255    target_flags &= ~MASK_NO_FANCY_MATH_387;
3256
3257  /* Likewise, if the target doesn't have a 387, or we've specified
3258     software floating point, don't use 387 inline intrinsics.  */
3259  if (!TARGET_80387)
3260    target_flags |= MASK_NO_FANCY_MATH_387;
3261
3262  /* Turn on MMX builtins for -msse.  */
3263  if (TARGET_SSE)
3264    {
3265      ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3266      x86_prefetch_sse = true;
3267    }
3268
3269  /* Turn on popcnt instruction for -msse4.2 or -mabm.  */
3270  if (TARGET_SSE4_2 || TARGET_ABM)
3271    ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3272
3273  /* Validate -mpreferred-stack-boundary= value or default it to
3274     PREFERRED_STACK_BOUNDARY_DEFAULT.  */
3275  ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3276  if (ix86_preferred_stack_boundary_string)
3277    {
3278      i = atoi (ix86_preferred_stack_boundary_string);
3279      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3280	error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3281	       prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3282      else
3283	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3284    }
3285
3286  /* Set the default value for -mstackrealign.  */
3287  if (ix86_force_align_arg_pointer == -1)
3288    ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3289
3290  ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3291
3292  /* Validate -mincoming-stack-boundary= value or default it to
3293     MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY.  */
3294  ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3295  if (ix86_incoming_stack_boundary_string)
3296    {
3297      i = atoi (ix86_incoming_stack_boundary_string);
3298      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3299	error ("-mincoming-stack-boundary=%d is not between %d and 12",
3300	       i, TARGET_64BIT ? 4 : 2);
3301      else
3302	{
3303	  ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3304	  ix86_incoming_stack_boundary
3305	    = ix86_user_incoming_stack_boundary;
3306	}
3307    }
3308
3309  /* Accept -msseregparm only if at least SSE support is enabled.  */
3310  if (TARGET_SSEREGPARM
3311      && ! TARGET_SSE)
3312    error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3313
3314  ix86_fpmath = TARGET_FPMATH_DEFAULT;
3315  if (ix86_fpmath_string != 0)
3316    {
3317      if (! strcmp (ix86_fpmath_string, "387"))
3318	ix86_fpmath = FPMATH_387;
3319      else if (! strcmp (ix86_fpmath_string, "sse"))
3320	{
3321	  if (!TARGET_SSE)
3322	    {
3323	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
3324	      ix86_fpmath = FPMATH_387;
3325	    }
3326	  else
3327	    ix86_fpmath = FPMATH_SSE;
3328	}
3329      else if (! strcmp (ix86_fpmath_string, "387,sse")
3330	       || ! strcmp (ix86_fpmath_string, "387+sse")
3331	       || ! strcmp (ix86_fpmath_string, "sse,387")
3332	       || ! strcmp (ix86_fpmath_string, "sse+387")
3333	       || ! strcmp (ix86_fpmath_string, "both"))
3334	{
3335	  if (!TARGET_SSE)
3336	    {
3337	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
3338	      ix86_fpmath = FPMATH_387;
3339	    }
3340	  else if (!TARGET_80387)
3341	    {
3342	      warning (0, "387 instruction set disabled, using SSE arithmetics");
3343	      ix86_fpmath = FPMATH_SSE;
3344	    }
3345	  else
3346	    ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3347	}
3348      else
3349	error ("bad value (%s) for %sfpmath=%s %s",
3350	       ix86_fpmath_string, prefix, suffix, sw);
3351    }
3352
3353  /* If the i387 is disabled, then do not return values in it. */
3354  if (!TARGET_80387)
3355    target_flags &= ~MASK_FLOAT_RETURNS;
3356
3357  /* Use external vectorized library in vectorizing intrinsics.  */
3358  if (ix86_veclibabi_string)
3359    {
3360      if (strcmp (ix86_veclibabi_string, "svml") == 0)
3361	ix86_veclib_handler = ix86_veclibabi_svml;
3362      else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3363	ix86_veclib_handler = ix86_veclibabi_acml;
3364      else
3365	error ("unknown vectorization library ABI type (%s) for "
3366	       "%sveclibabi=%s %s", ix86_veclibabi_string,
3367	       prefix, suffix, sw);
3368    }
3369
3370  if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3371      && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3372      && !optimize_size)
3373    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3374
3375  /* ??? Unwind info is not correct around the CFG unless either a frame
3376     pointer is present or M_A_O_A is set.  Fixing this requires rewriting
3377     unwind info generation to be aware of the CFG and propagating states
3378     around edges.  */
3379  if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3380       || flag_exceptions || flag_non_call_exceptions)
3381      && flag_omit_frame_pointer
3382      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3383    {
3384      if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3385	warning (0, "unwind tables currently require either a frame pointer "
3386		 "or %saccumulate-outgoing-args%s for correctness",
3387		 prefix, suffix);
3388      target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3389    }
3390
3391  /* If stack probes are required, the space used for large function
3392     arguments on the stack must also be probed, so enable
3393     -maccumulate-outgoing-args so this happens in the prologue.  */
3394  if (TARGET_STACK_PROBE
3395      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3396    {
3397      if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3398	warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3399		 "for correctness", prefix, suffix);
3400      target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3401    }
3402
3403  /* For sane SSE instruction set generation we need fcomi instruction.
3404     It is safe to enable all CMOVE instructions.  */
3405  if (TARGET_SSE)
3406    TARGET_CMOVE = 1;
3407
3408  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
3409  {
3410    char *p;
3411    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3412    p = strchr (internal_label_prefix, 'X');
3413    internal_label_prefix_len = p - internal_label_prefix;
3414    *p = '\0';
3415  }
3416
3417  /* When scheduling description is not available, disable scheduler pass
3418     so it won't slow down the compilation and make x87 code slower.  */
3419  if (!TARGET_SCHEDULE)
3420    flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3421
3422  if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3423    set_param_value ("simultaneous-prefetches",
3424		     ix86_cost->simultaneous_prefetches);
3425  if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3426    set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3427  if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3428    set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3429  if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3430    set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3431
3432  /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3433     can be optimized to ap = __builtin_next_arg (0).  */
3434  if (!TARGET_64BIT)
3435    targetm.expand_builtin_va_start = NULL;
3436
3437  if (TARGET_64BIT)
3438    {
3439      ix86_gen_leave = gen_leave_rex64;
3440      ix86_gen_pop1 = gen_popdi1;
3441      ix86_gen_add3 = gen_adddi3;
3442      ix86_gen_sub3 = gen_subdi3;
3443      ix86_gen_sub3_carry = gen_subdi3_carry;
3444      ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3445      ix86_gen_monitor = gen_sse3_monitor64;
3446      ix86_gen_andsp = gen_anddi3;
3447    }
3448  else
3449    {
3450      ix86_gen_leave = gen_leave;
3451      ix86_gen_pop1 = gen_popsi1;
3452      ix86_gen_add3 = gen_addsi3;
3453      ix86_gen_sub3 = gen_subsi3;
3454      ix86_gen_sub3_carry = gen_subsi3_carry;
3455      ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3456      ix86_gen_monitor = gen_sse3_monitor;
3457      ix86_gen_andsp = gen_andsi3;
3458    }
3459
3460#ifdef USE_IX86_CLD
3461  /* Use -mcld by default for 32-bit code if configured with --enable-cld.  */
3462  if (!TARGET_64BIT)
3463    target_flags |= MASK_CLD & ~target_flags_explicit;
3464#endif
3465
3466  /* Save the initial options in case the user does function specific options */
3467  if (main_args_p)
3468    target_option_default_node = target_option_current_node
3469      = build_target_option_node ();
3470}
3471
3472/* Update register usage after having seen the compiler flags.  */
3473
3474void
3475ix86_conditional_register_usage (void)
3476{
3477  int i;
3478  unsigned int j;
3479
3480  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3481    {
3482      if (fixed_regs[i] > 1)
3483	fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3484      if (call_used_regs[i] > 1)
3485	call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3486    }
3487
3488  /* The PIC register, if it exists, is fixed.  */
3489  j = PIC_OFFSET_TABLE_REGNUM;
3490  if (j != INVALID_REGNUM)
3491    fixed_regs[j] = call_used_regs[j] = 1;
3492
3493  /* The MS_ABI changes the set of call-used registers.  */
3494  if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3495    {
3496      call_used_regs[SI_REG] = 0;
3497      call_used_regs[DI_REG] = 0;
3498      call_used_regs[XMM6_REG] = 0;
3499      call_used_regs[XMM7_REG] = 0;
3500      for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3501	call_used_regs[i] = 0;
3502    }
3503
3504  /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3505     other call-clobbered regs for 64-bit.  */
3506  if (TARGET_64BIT)
3507    {
3508      CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3509
3510      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3511	if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3512	    && call_used_regs[i])
3513	  SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3514    }
3515
3516  /* If MMX is disabled, squash the registers.  */
3517  if (! TARGET_MMX)
3518    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3519      if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3520	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3521
3522  /* If SSE is disabled, squash the registers.  */
3523  if (! TARGET_SSE)
3524    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3525      if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3526	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3527
3528  /* If the FPU is disabled, squash the registers.  */
3529  if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3530    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3531      if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3532	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3533
3534  /* If 32-bit, squash the 64-bit registers.  */
3535  if (! TARGET_64BIT)
3536    {
3537      for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3538	reg_names[i] = "";
3539      for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3540	reg_names[i] = "";
3541    }
3542}
3543
3544
3545/* Save the current options */
3546
3547static void
3548ix86_function_specific_save (struct cl_target_option *ptr)
3549{
3550  ptr->arch = ix86_arch;
3551  ptr->schedule = ix86_schedule;
3552  ptr->tune = ix86_tune;
3553  ptr->fpmath = ix86_fpmath;
3554  ptr->branch_cost = ix86_branch_cost;
3555  ptr->tune_defaulted = ix86_tune_defaulted;
3556  ptr->arch_specified = ix86_arch_specified;
3557  ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3558  ptr->target_flags_explicit = target_flags_explicit;
3559
3560  /* The fields are char but the variables are not; make sure the
3561     values fit in the fields.  */
3562  gcc_assert (ptr->arch == ix86_arch);
3563  gcc_assert (ptr->schedule == ix86_schedule);
3564  gcc_assert (ptr->tune == ix86_tune);
3565  gcc_assert (ptr->fpmath == ix86_fpmath);
3566  gcc_assert (ptr->branch_cost == ix86_branch_cost);
3567}
3568
3569/* Restore the current options */
3570
3571static void
3572ix86_function_specific_restore (struct cl_target_option *ptr)
3573{
3574  enum processor_type old_tune = ix86_tune;
3575  enum processor_type old_arch = ix86_arch;
3576  unsigned int ix86_arch_mask, ix86_tune_mask;
3577  int i;
3578
3579  ix86_arch = (enum processor_type) ptr->arch;
3580  ix86_schedule = (enum attr_cpu) ptr->schedule;
3581  ix86_tune = (enum processor_type) ptr->tune;
3582  ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3583  ix86_branch_cost = ptr->branch_cost;
3584  ix86_tune_defaulted = ptr->tune_defaulted;
3585  ix86_arch_specified = ptr->arch_specified;
3586  ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3587  target_flags_explicit = ptr->target_flags_explicit;
3588
3589  /* Recreate the arch feature tests if the arch changed */
3590  if (old_arch != ix86_arch)
3591    {
3592      ix86_arch_mask = 1u << ix86_arch;
3593      for (i = 0; i < X86_ARCH_LAST; ++i)
3594	ix86_arch_features[i]
3595	  = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3596    }
3597
3598  /* Recreate the tune optimization tests */
3599  if (old_tune != ix86_tune)
3600    {
3601      ix86_tune_mask = 1u << ix86_tune;
3602      for (i = 0; i < X86_TUNE_LAST; ++i)
3603	ix86_tune_features[i]
3604	  = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3605    }
3606}
3607
3608/* Print the current options */
3609
3610static void
3611ix86_function_specific_print (FILE *file, int indent,
3612			      struct cl_target_option *ptr)
3613{
3614  char *target_string
3615    = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3616			  NULL, NULL, NULL, false);
3617
3618  fprintf (file, "%*sarch = %d (%s)\n",
3619	   indent, "",
3620	   ptr->arch,
3621	   ((ptr->arch < TARGET_CPU_DEFAULT_max)
3622	    ? cpu_names[ptr->arch]
3623	    : "<unknown>"));
3624
3625  fprintf (file, "%*stune = %d (%s)\n",
3626	   indent, "",
3627	   ptr->tune,
3628	   ((ptr->tune < TARGET_CPU_DEFAULT_max)
3629	    ? cpu_names[ptr->tune]
3630	    : "<unknown>"));
3631
3632  fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3633	   (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3634	   (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3635  fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3636
3637  if (target_string)
3638    {
3639      fprintf (file, "%*s%s\n", indent, "", target_string);
3640      free (target_string);
3641    }
3642}
3643
3644
3645/* Inner function to process the attribute((target(...))), take an argument and
3646   set the current options from the argument. If we have a list, recursively go
3647   over the list.  */
3648
3649static bool
3650ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3651{
3652  char *next_optstr;
3653  bool ret = true;
3654
3655#define IX86_ATTR_ISA(S,O)   { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3656#define IX86_ATTR_STR(S,O)   { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3657#define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3658#define IX86_ATTR_NO(S,O,M)  { S, sizeof (S)-1, ix86_opt_no,  O, M }
3659
3660  enum ix86_opt_type
3661  {
3662    ix86_opt_unknown,
3663    ix86_opt_yes,
3664    ix86_opt_no,
3665    ix86_opt_str,
3666    ix86_opt_isa
3667  };
3668
3669  static const struct
3670  {
3671    const char *string;
3672    size_t len;
3673    enum ix86_opt_type type;
3674    int opt;
3675    int mask;
3676  } attrs[] = {
3677    /* isa options */
3678    IX86_ATTR_ISA ("3dnow",	OPT_m3dnow),
3679    IX86_ATTR_ISA ("abm",	OPT_mabm),
3680    IX86_ATTR_ISA ("aes",	OPT_maes),
3681    IX86_ATTR_ISA ("avx",	OPT_mavx),
3682    IX86_ATTR_ISA ("mmx",	OPT_mmmx),
3683    IX86_ATTR_ISA ("pclmul",	OPT_mpclmul),
3684    IX86_ATTR_ISA ("popcnt",	OPT_mpopcnt),
3685    IX86_ATTR_ISA ("sse",	OPT_msse),
3686    IX86_ATTR_ISA ("sse2",	OPT_msse2),
3687    IX86_ATTR_ISA ("sse3",	OPT_msse3),
3688    IX86_ATTR_ISA ("sse4",	OPT_msse4),
3689    IX86_ATTR_ISA ("sse4.1",	OPT_msse4_1),
3690    IX86_ATTR_ISA ("sse4.2",	OPT_msse4_2),
3691    IX86_ATTR_ISA ("sse4a",	OPT_msse4a),
3692    IX86_ATTR_ISA ("ssse3",	OPT_mssse3),
3693    IX86_ATTR_ISA ("fma4",	OPT_mfma4),
3694    IX86_ATTR_ISA ("xop",	OPT_mxop),
3695    IX86_ATTR_ISA ("lwp",	OPT_mlwp),
3696
3697    /* string options */
3698    IX86_ATTR_STR ("arch=",	IX86_FUNCTION_SPECIFIC_ARCH),
3699    IX86_ATTR_STR ("fpmath=",	IX86_FUNCTION_SPECIFIC_FPMATH),
3700    IX86_ATTR_STR ("tune=",	IX86_FUNCTION_SPECIFIC_TUNE),
3701
3702    /* flag options */
3703    IX86_ATTR_YES ("cld",
3704		   OPT_mcld,
3705		   MASK_CLD),
3706
3707    IX86_ATTR_NO ("fancy-math-387",
3708		  OPT_mfancy_math_387,
3709		  MASK_NO_FANCY_MATH_387),
3710
3711    IX86_ATTR_YES ("ieee-fp",
3712		   OPT_mieee_fp,
3713		   MASK_IEEE_FP),
3714
3715    IX86_ATTR_YES ("inline-all-stringops",
3716		   OPT_minline_all_stringops,
3717		   MASK_INLINE_ALL_STRINGOPS),
3718
3719    IX86_ATTR_YES ("inline-stringops-dynamically",
3720		   OPT_minline_stringops_dynamically,
3721		   MASK_INLINE_STRINGOPS_DYNAMICALLY),
3722
3723    IX86_ATTR_NO ("align-stringops",
3724		  OPT_mno_align_stringops,
3725		  MASK_NO_ALIGN_STRINGOPS),
3726
3727    IX86_ATTR_YES ("recip",
3728		   OPT_mrecip,
3729		   MASK_RECIP),
3730
3731  };
3732
3733  /* If this is a list, recurse to get the options.  */
3734  if (TREE_CODE (args) == TREE_LIST)
3735    {
3736      bool ret = true;
3737
3738      for (; args; args = TREE_CHAIN (args))
3739	if (TREE_VALUE (args)
3740	    && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3741	  ret = false;
3742
3743      return ret;
3744    }
3745
3746  else if (TREE_CODE (args) != STRING_CST)
3747    gcc_unreachable ();
3748
3749  /* Handle multiple arguments separated by commas.  */
3750  next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3751
3752  while (next_optstr && *next_optstr != '\0')
3753    {
3754      char *p = next_optstr;
3755      char *orig_p = p;
3756      char *comma = strchr (next_optstr, ',');
3757      const char *opt_string;
3758      size_t len, opt_len;
3759      int opt;
3760      bool opt_set_p;
3761      char ch;
3762      unsigned i;
3763      enum ix86_opt_type type = ix86_opt_unknown;
3764      int mask = 0;
3765
3766      if (comma)
3767	{
3768	  *comma = '\0';
3769	  len = comma - next_optstr;
3770	  next_optstr = comma + 1;
3771	}
3772      else
3773	{
3774	  len = strlen (p);
3775	  next_optstr = NULL;
3776	}
3777
3778      /* Recognize no-xxx.  */
3779      if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3780	{
3781	  opt_set_p = false;
3782	  p += 3;
3783	  len -= 3;
3784	}
3785      else
3786	opt_set_p = true;
3787
3788      /* Find the option.  */
3789      ch = *p;
3790      opt = N_OPTS;
3791      for (i = 0; i < ARRAY_SIZE (attrs); i++)
3792	{
3793	  type = attrs[i].type;
3794	  opt_len = attrs[i].len;
3795	  if (ch == attrs[i].string[0]
3796	      && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3797	      && memcmp (p, attrs[i].string, opt_len) == 0)
3798	    {
3799	      opt = attrs[i].opt;
3800	      mask = attrs[i].mask;
3801	      opt_string = attrs[i].string;
3802	      break;
3803	    }
3804	}
3805
3806      /* Process the option.  */
3807      if (opt == N_OPTS)
3808	{
3809	  error ("attribute(target(\"%s\")) is unknown", orig_p);
3810	  ret = false;
3811	}
3812
3813      else if (type == ix86_opt_isa)
3814	ix86_handle_option (opt, p, opt_set_p);
3815
3816      else if (type == ix86_opt_yes || type == ix86_opt_no)
3817	{
3818	  if (type == ix86_opt_no)
3819	    opt_set_p = !opt_set_p;
3820
3821	  if (opt_set_p)
3822	    target_flags |= mask;
3823	  else
3824	    target_flags &= ~mask;
3825	}
3826
3827      else if (type == ix86_opt_str)
3828	{
3829	  if (p_strings[opt])
3830	    {
3831	      error ("option(\"%s\") was already specified", opt_string);
3832	      ret = false;
3833	    }
3834	  else
3835	    p_strings[opt] = xstrdup (p + opt_len);
3836	}
3837
3838      else
3839	gcc_unreachable ();
3840    }
3841
3842  return ret;
3843}
3844
3845/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
3846
3847tree
3848ix86_valid_target_attribute_tree (tree args)
3849{
3850  const char *orig_arch_string = ix86_arch_string;
3851  const char *orig_tune_string = ix86_tune_string;
3852  const char *orig_fpmath_string = ix86_fpmath_string;
3853  int orig_tune_defaulted = ix86_tune_defaulted;
3854  int orig_arch_specified = ix86_arch_specified;
3855  char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3856  tree t = NULL_TREE;
3857  int i;
3858  struct cl_target_option *def
3859    = TREE_TARGET_OPTION (target_option_default_node);
3860
3861  /* Process each of the options on the chain.  */
3862  if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3863    return NULL_TREE;
3864
3865  /* If the changed options are different from the default, rerun override_options,
3866     and then save the options away.  The string options are are attribute options,
3867     and will be undone when we copy the save structure.  */
3868  if (ix86_isa_flags != def->ix86_isa_flags
3869      || target_flags != def->target_flags
3870      || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3871      || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3872      || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3873    {
3874      /* If we are using the default tune= or arch=, undo the string assigned,
3875	 and use the default.  */
3876      if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3877	ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3878      else if (!orig_arch_specified)
3879	ix86_arch_string = NULL;
3880
3881      if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3882	ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3883      else if (orig_tune_defaulted)
3884	ix86_tune_string = NULL;
3885
3886      /* If fpmath= is not set, and we now have sse2 on 32-bit, use it.  */
3887      if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3888	ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3889      else if (!TARGET_64BIT && TARGET_SSE)
3890	ix86_fpmath_string = "sse,387";
3891
3892      /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
3893      override_options (false);
3894
3895      /* Add any builtin functions with the new isa if any.  */
3896      ix86_add_new_builtins (ix86_isa_flags);
3897
3898      /* Save the current options unless we are validating options for
3899	 #pragma.  */
3900      t = build_target_option_node ();
3901
3902      ix86_arch_string = orig_arch_string;
3903      ix86_tune_string = orig_tune_string;
3904      ix86_fpmath_string = orig_fpmath_string;
3905
3906      /* Free up memory allocated to hold the strings */
3907      for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3908	if (option_strings[i])
3909	  free (option_strings[i]);
3910    }
3911
3912  return t;
3913}
3914
3915/* Hook to validate attribute((target("string"))).  */
3916
3917static bool
3918ix86_valid_target_attribute_p (tree fndecl,
3919			       tree ARG_UNUSED (name),
3920			       tree args,
3921			       int ARG_UNUSED (flags))
3922{
3923  struct cl_target_option cur_target;
3924  bool ret = true;
3925  tree old_optimize = build_optimization_node ();
3926  tree new_target, new_optimize;
3927  tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3928
3929  /* If the function changed the optimization levels as well as setting target
3930     options, start with the optimizations specified.  */
3931  if (func_optimize && func_optimize != old_optimize)
3932    cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3933
3934  /* The target attributes may also change some optimization flags, so update
3935     the optimization options if necessary.  */
3936  cl_target_option_save (&cur_target);
3937  new_target = ix86_valid_target_attribute_tree (args);
3938  new_optimize = build_optimization_node ();
3939
3940  if (!new_target)
3941    ret = false;
3942
3943  else if (fndecl)
3944    {
3945      DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3946
3947      if (old_optimize != new_optimize)
3948	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3949    }
3950
3951  cl_target_option_restore (&cur_target);
3952
3953  if (old_optimize != new_optimize)
3954    cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3955
3956  return ret;
3957}
3958
3959
3960/* Hook to determine if one function can safely inline another.  */
3961
3962static bool
3963ix86_can_inline_p (tree caller, tree callee)
3964{
3965  bool ret = false;
3966  tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3967  tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3968
3969  /* If callee has no option attributes, then it is ok to inline.  */
3970  if (!callee_tree)
3971    ret = true;
3972
3973  /* If caller has no option attributes, but callee does then it is not ok to
3974     inline.  */
3975  else if (!caller_tree)
3976    ret = false;
3977
3978  else
3979    {
3980      struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3981      struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3982
3983      /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
3984	 can inline a SSE2 function but a SSE2 function can't inline a SSE4
3985	 function.  */
3986      if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3987	  != callee_opts->ix86_isa_flags)
3988	ret = false;
3989
3990      /* See if we have the same non-isa options.  */
3991      else if (caller_opts->target_flags != callee_opts->target_flags)
3992	ret = false;
3993
3994      /* See if arch, tune, etc. are the same.  */
3995      else if (caller_opts->arch != callee_opts->arch)
3996	ret = false;
3997
3998      else if (caller_opts->tune != callee_opts->tune)
3999	ret = false;
4000
4001      else if (caller_opts->fpmath != callee_opts->fpmath)
4002	ret = false;
4003
4004      else if (caller_opts->branch_cost != callee_opts->branch_cost)
4005	ret = false;
4006
4007      else
4008	ret = true;
4009    }
4010
4011  return ret;
4012}
4013
4014
4015/* Remember the last target of ix86_set_current_function.  */
4016static GTY(()) tree ix86_previous_fndecl;
4017
4018/* Establish appropriate back-end context for processing the function
4019   FNDECL.  The argument might be NULL to indicate processing at top
4020   level, outside of any function scope.  */
4021static void
4022ix86_set_current_function (tree fndecl)
4023{
4024  /* Only change the context if the function changes.  This hook is called
4025     several times in the course of compiling a function, and we don't want to
4026     slow things down too much or call target_reinit when it isn't safe.  */
4027  if (fndecl && fndecl != ix86_previous_fndecl)
4028    {
4029      tree old_tree = (ix86_previous_fndecl
4030		       ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4031		       : NULL_TREE);
4032
4033      tree new_tree = (fndecl
4034		       ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4035		       : NULL_TREE);
4036
4037      ix86_previous_fndecl = fndecl;
4038      if (old_tree == new_tree)
4039	;
4040
4041      else if (new_tree)
4042	{
4043	  cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4044	  target_reinit ();
4045	}
4046
4047      else if (old_tree)
4048	{
4049	  struct cl_target_option *def
4050	    = TREE_TARGET_OPTION (target_option_current_node);
4051
4052	  cl_target_option_restore (def);
4053	  target_reinit ();
4054	}
4055    }
4056}
4057
4058
4059/* Return true if this goes in large data/bss.  */
4060
4061static bool
4062ix86_in_large_data_p (tree exp)
4063{
4064  if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4065    return false;
4066
4067  /* Functions are never large data.  */
4068  if (TREE_CODE (exp) == FUNCTION_DECL)
4069    return false;
4070
4071  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4072    {
4073      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4074      if (strcmp (section, ".ldata") == 0
4075	  || strcmp (section, ".lbss") == 0)
4076	return true;
4077      return false;
4078    }
4079  else
4080    {
4081      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4082
4083      /* If this is an incomplete type with size 0, then we can't put it
4084	 in data because it might be too big when completed.  */
4085      if (!size || size > ix86_section_threshold)
4086	return true;
4087    }
4088
4089  return false;
4090}
4091
4092/* Switch to the appropriate section for output of DECL.
4093   DECL is either a `VAR_DECL' node or a constant of some sort.
4094   RELOC indicates whether forming the initial value of DECL requires
4095   link-time relocations.  */
4096
4097static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4098	ATTRIBUTE_UNUSED;
4099
4100static section *
4101x86_64_elf_select_section (tree decl, int reloc,
4102			   unsigned HOST_WIDE_INT align)
4103{
4104  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4105      && ix86_in_large_data_p (decl))
4106    {
4107      const char *sname = NULL;
4108      unsigned int flags = SECTION_WRITE;
4109      switch (categorize_decl_for_section (decl, reloc))
4110	{
4111	case SECCAT_DATA:
4112	  sname = ".ldata";
4113	  break;
4114	case SECCAT_DATA_REL:
4115	  sname = ".ldata.rel";
4116	  break;
4117	case SECCAT_DATA_REL_LOCAL:
4118	  sname = ".ldata.rel.local";
4119	  break;
4120	case SECCAT_DATA_REL_RO:
4121	  sname = ".ldata.rel.ro";
4122	  break;
4123	case SECCAT_DATA_REL_RO_LOCAL:
4124	  sname = ".ldata.rel.ro.local";
4125	  break;
4126	case SECCAT_BSS:
4127	  sname = ".lbss";
4128	  flags |= SECTION_BSS;
4129	  break;
4130	case SECCAT_RODATA:
4131	case SECCAT_RODATA_MERGE_STR:
4132	case SECCAT_RODATA_MERGE_STR_INIT:
4133	case SECCAT_RODATA_MERGE_CONST:
4134	  sname = ".lrodata";
4135	  flags = 0;
4136	  break;
4137	case SECCAT_SRODATA:
4138	case SECCAT_SDATA:
4139	case SECCAT_SBSS:
4140	  gcc_unreachable ();
4141	case SECCAT_TEXT:
4142	case SECCAT_TDATA:
4143	case SECCAT_TBSS:
4144	  /* We don't split these for medium model.  Place them into
4145	     default sections and hope for best.  */
4146	  break;
4147	case SECCAT_EMUTLS_VAR:
4148	case SECCAT_EMUTLS_TMPL:
4149	  gcc_unreachable ();
4150	}
4151      if (sname)
4152	{
4153	  /* We might get called with string constants, but get_named_section
4154	     doesn't like them as they are not DECLs.  Also, we need to set
4155	     flags in that case.  */
4156	  if (!DECL_P (decl))
4157	    return get_section (sname, flags, NULL);
4158	  return get_named_section (decl, sname, reloc);
4159	}
4160    }
4161  return default_elf_select_section (decl, reloc, align);
4162}
4163
4164/* Build up a unique section name, expressed as a
4165   STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4166   RELOC indicates whether the initial value of EXP requires
4167   link-time relocations.  */
4168
4169static void ATTRIBUTE_UNUSED
4170x86_64_elf_unique_section (tree decl, int reloc)
4171{
4172  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4173      && ix86_in_large_data_p (decl))
4174    {
4175      const char *prefix = NULL;
4176      /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
4177      bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4178
4179      switch (categorize_decl_for_section (decl, reloc))
4180	{
4181	case SECCAT_DATA:
4182	case SECCAT_DATA_REL:
4183	case SECCAT_DATA_REL_LOCAL:
4184	case SECCAT_DATA_REL_RO:
4185	case SECCAT_DATA_REL_RO_LOCAL:
4186          prefix = one_only ? ".ld" : ".ldata";
4187	  break;
4188	case SECCAT_BSS:
4189          prefix = one_only ? ".lb" : ".lbss";
4190	  break;
4191	case SECCAT_RODATA:
4192	case SECCAT_RODATA_MERGE_STR:
4193	case SECCAT_RODATA_MERGE_STR_INIT:
4194	case SECCAT_RODATA_MERGE_CONST:
4195          prefix = one_only ? ".lr" : ".lrodata";
4196	  break;
4197	case SECCAT_SRODATA:
4198	case SECCAT_SDATA:
4199	case SECCAT_SBSS:
4200	  gcc_unreachable ();
4201	case SECCAT_TEXT:
4202	case SECCAT_TDATA:
4203	case SECCAT_TBSS:
4204	  /* We don't split these for medium model.  Place them into
4205	     default sections and hope for best.  */
4206	  break;
4207	case SECCAT_EMUTLS_VAR:
4208	  prefix = targetm.emutls.var_section;
4209	  break;
4210	case SECCAT_EMUTLS_TMPL:
4211	  prefix = targetm.emutls.tmpl_section;
4212	  break;
4213	}
4214      if (prefix)
4215	{
4216	  const char *name, *linkonce;
4217	  char *string;
4218
4219	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4220	  name = targetm.strip_name_encoding (name);
4221
4222	  /* If we're using one_only, then there needs to be a .gnu.linkonce
4223     	     prefix to the section name.  */
4224	  linkonce = one_only ? ".gnu.linkonce" : "";
4225
4226	  string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4227
4228	  DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4229	  return;
4230	}
4231    }
4232  default_unique_section (decl, reloc);
4233}
4234
4235#ifdef COMMON_ASM_OP
4236/* This says how to output assembler code to declare an
4237   uninitialized external linkage data object.
4238
4239   For medium model x86-64 we need to use .largecomm opcode for
4240   large objects.  */
4241void
4242x86_elf_aligned_common (FILE *file,
4243			const char *name, unsigned HOST_WIDE_INT size,
4244			int align)
4245{
4246  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4247      && size > (unsigned int)ix86_section_threshold)
4248    fputs (".largecomm\t", file);
4249  else
4250    fputs (COMMON_ASM_OP, file);
4251  assemble_name (file, name);
4252  fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4253	   size, align / BITS_PER_UNIT);
4254}
4255#endif
4256
4257/* Utility function for targets to use in implementing
4258   ASM_OUTPUT_ALIGNED_BSS.  */
4259
4260void
4261x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4262			const char *name, unsigned HOST_WIDE_INT size,
4263			int align)
4264{
4265  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4266      && size > (unsigned int)ix86_section_threshold)
4267    switch_to_section (get_named_section (decl, ".lbss", 0));
4268  else
4269    switch_to_section (bss_section);
4270  ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4271#ifdef ASM_DECLARE_OBJECT_NAME
4272  last_assemble_variable_decl = decl;
4273  ASM_DECLARE_OBJECT_NAME (file, name, decl);
4274#else
4275  /* Standard thing is just output label for the object.  */
4276  ASM_OUTPUT_LABEL (file, name);
4277#endif /* ASM_DECLARE_OBJECT_NAME */
4278  ASM_OUTPUT_SKIP (file, size ? size : 1);
4279}
4280
4281void
4282optimization_options (int level, int size ATTRIBUTE_UNUSED)
4283{
4284  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
4285     make the problem with not enough registers even worse.  */
4286#ifdef INSN_SCHEDULING
4287  if (level > 1)
4288    flag_schedule_insns = 0;
4289#endif
4290
4291  if (TARGET_MACHO)
4292    /* The Darwin libraries never set errno, so we might as well
4293       avoid calling them when that's the only reason we would.  */
4294    flag_errno_math = 0;
4295
4296  /* The default values of these switches depend on the TARGET_64BIT
4297     that is not known at this moment.  Mark these values with 2 and
4298     let user the to override these.  In case there is no command line option
4299     specifying them, we will set the defaults in override_options.  */
4300  if (optimize >= 1)
4301    flag_omit_frame_pointer = 2;
4302  flag_pcc_struct_return = 2;
4303  flag_asynchronous_unwind_tables = 2;
4304  flag_vect_cost_model = 1;
4305#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4306  SUBTARGET_OPTIMIZATION_OPTIONS;
4307#endif
4308}
4309
4310/* Decide whether we can make a sibling call to a function.  DECL is the
4311   declaration of the function being targeted by the call and EXP is the
4312   CALL_EXPR representing the call.  */
4313
4314static bool
4315ix86_function_ok_for_sibcall (tree decl, tree exp)
4316{
4317  tree type, decl_or_type;
4318  rtx a, b;
4319
4320  /* If we are generating position-independent code, we cannot sibcall
4321     optimize any indirect call, or a direct call to a global function,
4322     as the PLT requires %ebx be live.  */
4323  if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4324    return false;
4325
4326  /* If we need to align the outgoing stack, then sibcalling would
4327     unalign the stack, which may break the called function.  */
4328  if (ix86_minimum_incoming_stack_boundary (true)
4329      < PREFERRED_STACK_BOUNDARY)
4330    return false;
4331
4332  if (decl)
4333    {
4334      decl_or_type = decl;
4335      type = TREE_TYPE (decl);
4336    }
4337  else
4338    {
4339      /* We're looking at the CALL_EXPR, we need the type of the function.  */
4340      type = CALL_EXPR_FN (exp);		/* pointer expression */
4341      type = TREE_TYPE (type);			/* pointer type */
4342      type = TREE_TYPE (type);			/* function type */
4343      decl_or_type = type;
4344    }
4345
4346  /* Check that the return value locations are the same.  Like
4347     if we are returning floats on the 80387 register stack, we cannot
4348     make a sibcall from a function that doesn't return a float to a
4349     function that does or, conversely, from a function that does return
4350     a float to a function that doesn't; the necessary stack adjustment
4351     would not be executed.  This is also the place we notice
4352     differences in the return value ABI.  Note that it is ok for one
4353     of the functions to have void return type as long as the return
4354     value of the other is passed in a register.  */
4355  a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4356  b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4357			   cfun->decl, false);
4358  if (STACK_REG_P (a) || STACK_REG_P (b))
4359    {
4360      if (!rtx_equal_p (a, b))
4361	return false;
4362    }
4363  else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4364    ;
4365  else if (!rtx_equal_p (a, b))
4366    return false;
4367
4368  if (TARGET_64BIT)
4369    {
4370      /* The SYSV ABI has more call-clobbered registers;
4371	 disallow sibcalls from MS to SYSV.  */
4372      if (cfun->machine->call_abi == MS_ABI
4373	  && ix86_function_type_abi (type) == SYSV_ABI)
4374	return false;
4375    }
4376  else
4377    {
4378      /* If this call is indirect, we'll need to be able to use a
4379	 call-clobbered register for the address of the target function.
4380	 Make sure that all such registers are not used for passing
4381	 parameters.  Note that DLLIMPORT functions are indirect.  */
4382      if (!decl
4383	  || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4384	{
4385	  if (ix86_function_regparm (type, NULL) >= 3)
4386	    {
4387	      /* ??? Need to count the actual number of registers to be used,
4388		 not the possible number of registers.  Fix later.  */
4389	      return false;
4390	    }
4391	}
4392    }
4393
4394  /* Otherwise okay.  That also includes certain types of indirect calls.  */
4395  return true;
4396}
4397
4398/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4399   calling convention attributes;
4400   arguments as in struct attribute_spec.handler.  */
4401
4402static tree
4403ix86_handle_cconv_attribute (tree *node, tree name,
4404				   tree args,
4405				   int flags ATTRIBUTE_UNUSED,
4406				   bool *no_add_attrs)
4407{
4408  if (TREE_CODE (*node) != FUNCTION_TYPE
4409      && TREE_CODE (*node) != METHOD_TYPE
4410      && TREE_CODE (*node) != FIELD_DECL
4411      && TREE_CODE (*node) != TYPE_DECL)
4412    {
4413      warning (OPT_Wattributes, "%qE attribute only applies to functions",
4414	       name);
4415      *no_add_attrs = true;
4416      return NULL_TREE;
4417    }
4418
4419  /* Can combine regparm with all attributes but fastcall.  */
4420  if (is_attribute_p ("regparm", name))
4421    {
4422      tree cst;
4423
4424      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4425        {
4426	  error ("fastcall and regparm attributes are not compatible");
4427	}
4428
4429      cst = TREE_VALUE (args);
4430      if (TREE_CODE (cst) != INTEGER_CST)
4431	{
4432	  warning (OPT_Wattributes,
4433		   "%qE attribute requires an integer constant argument",
4434		   name);
4435	  *no_add_attrs = true;
4436	}
4437      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4438	{
4439	  warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4440		   name, REGPARM_MAX);
4441	  *no_add_attrs = true;
4442	}
4443
4444      return NULL_TREE;
4445    }
4446
4447  if (TARGET_64BIT)
4448    {
4449      /* Do not warn when emulating the MS ABI.  */
4450      if (TREE_CODE (*node) != FUNCTION_TYPE
4451	  || ix86_function_type_abi (*node) != MS_ABI)
4452	warning (OPT_Wattributes, "%qE attribute ignored",
4453	         name);
4454      *no_add_attrs = true;
4455      return NULL_TREE;
4456    }
4457
4458  /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
4459  if (is_attribute_p ("fastcall", name))
4460    {
4461      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4462        {
4463	  error ("fastcall and cdecl attributes are not compatible");
4464	}
4465      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4466        {
4467	  error ("fastcall and stdcall attributes are not compatible");
4468	}
4469      if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4470        {
4471	  error ("fastcall and regparm attributes are not compatible");
4472	}
4473    }
4474
4475  /* Can combine stdcall with fastcall (redundant), regparm and
4476     sseregparm.  */
4477  else if (is_attribute_p ("stdcall", name))
4478    {
4479      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4480        {
4481	  error ("stdcall and cdecl attributes are not compatible");
4482	}
4483      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4484        {
4485	  error ("stdcall and fastcall attributes are not compatible");
4486	}
4487    }
4488
4489  /* Can combine cdecl with regparm and sseregparm.  */
4490  else if (is_attribute_p ("cdecl", name))
4491    {
4492      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4493        {
4494	  error ("stdcall and cdecl attributes are not compatible");
4495	}
4496      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4497        {
4498	  error ("fastcall and cdecl attributes are not compatible");
4499	}
4500    }
4501
4502  /* Can combine sseregparm with all attributes.  */
4503
4504  return NULL_TREE;
4505}
4506
4507/* Return 0 if the attributes for two types are incompatible, 1 if they
4508   are compatible, and 2 if they are nearly compatible (which causes a
4509   warning to be generated).  */
4510
4511static int
4512ix86_comp_type_attributes (const_tree type1, const_tree type2)
4513{
4514  /* Check for mismatch of non-default calling convention.  */
4515  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4516
4517  if (TREE_CODE (type1) != FUNCTION_TYPE
4518      && TREE_CODE (type1) != METHOD_TYPE)
4519    return 1;
4520
4521  /* Check for mismatched fastcall/regparm types.  */
4522  if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4523       != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4524      || (ix86_function_regparm (type1, NULL)
4525	  != ix86_function_regparm (type2, NULL)))
4526    return 0;
4527
4528  /* Check for mismatched sseregparm types.  */
4529  if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4530      != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4531    return 0;
4532
4533  /* Check for mismatched return types (cdecl vs stdcall).  */
4534  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4535      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4536    return 0;
4537
4538  return 1;
4539}
4540
4541/* Return the regparm value for a function with the indicated TYPE and DECL.
4542   DECL may be NULL when calling function indirectly
4543   or considering a libcall.  */
4544
4545static int
4546ix86_function_regparm (const_tree type, const_tree decl)
4547{
4548  tree attr;
4549  int regparm;
4550
4551  if (TARGET_64BIT)
4552    return (ix86_function_type_abi (type) == SYSV_ABI
4553	    ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4554
4555  regparm = ix86_regparm;
4556  attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4557  if (attr)
4558    {
4559      regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4560      return regparm;
4561    }
4562
4563  if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4564    return 2;
4565
4566  /* Use register calling convention for local functions when possible.  */
4567  if (decl
4568      && TREE_CODE (decl) == FUNCTION_DECL
4569      && optimize
4570      && !profile_flag)
4571    {
4572      /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
4573      struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4574      if (i && i->local)
4575	{
4576	  int local_regparm, globals = 0, regno;
4577
4578	  /* Make sure no regparm register is taken by a
4579	     fixed register variable.  */
4580	  for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4581	    if (fixed_regs[local_regparm])
4582	      break;
4583
4584	  /* We don't want to use regparm(3) for nested functions as
4585	     these use a static chain pointer in the third argument.  */
4586	  if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4587	    local_regparm = 2;
4588
4589	  /* Each fixed register usage increases register pressure,
4590	     so less registers should be used for argument passing.
4591	     This functionality can be overriden by an explicit
4592	     regparm value.  */
4593	  for (regno = 0; regno <= DI_REG; regno++)
4594	    if (fixed_regs[regno])
4595	      globals++;
4596
4597	  local_regparm
4598	    = globals < local_regparm ? local_regparm - globals : 0;
4599
4600	  if (local_regparm > regparm)
4601	    regparm = local_regparm;
4602	}
4603    }
4604
4605  return regparm;
4606}
4607
4608/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4609   DFmode (2) arguments in SSE registers for a function with the
4610   indicated TYPE and DECL.  DECL may be NULL when calling function
4611   indirectly or considering a libcall.  Otherwise return 0.  */
4612
4613static int
4614ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4615{
4616  gcc_assert (!TARGET_64BIT);
4617
4618  /* Use SSE registers to pass SFmode and DFmode arguments if requested
4619     by the sseregparm attribute.  */
4620  if (TARGET_SSEREGPARM
4621      || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4622    {
4623      if (!TARGET_SSE)
4624	{
4625	  if (warn)
4626	    {
4627	      if (decl)
4628		error ("Calling %qD with attribute sseregparm without "
4629		       "SSE/SSE2 enabled", decl);
4630	      else
4631		error ("Calling %qT with attribute sseregparm without "
4632		       "SSE/SSE2 enabled", type);
4633	    }
4634	  return 0;
4635	}
4636
4637      return 2;
4638    }
4639
4640  /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4641     (and DFmode for SSE2) arguments in SSE registers.  */
4642  if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4643    {
4644      /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
4645      struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4646      if (i && i->local)
4647	return TARGET_SSE2 ? 2 : 1;
4648    }
4649
4650  return 0;
4651}
4652
4653/* Return true if EAX is live at the start of the function.  Used by
4654   ix86_expand_prologue to determine if we need special help before
4655   calling allocate_stack_worker.  */
4656
4657static bool
4658ix86_eax_live_at_start_p (void)
4659{
4660  /* Cheat.  Don't bother working forward from ix86_function_regparm
4661     to the function type to whether an actual argument is located in
4662     eax.  Instead just look at cfg info, which is still close enough
4663     to correct at this point.  This gives false positives for broken
4664     functions that might use uninitialized data that happens to be
4665     allocated in eax, but who cares?  */
4666  return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4667}
4668
4669/* Value is the number of bytes of arguments automatically
4670   popped when returning from a subroutine call.
4671   FUNDECL is the declaration node of the function (as a tree),
4672   FUNTYPE is the data type of the function (as a tree),
4673   or for a library call it is an identifier node for the subroutine name.
4674   SIZE is the number of bytes of arguments passed on the stack.
4675
4676   On the 80386, the RTD insn may be used to pop them if the number
4677     of args is fixed, but if the number is variable then the caller
4678     must pop them all.  RTD can't be used for library calls now
4679     because the library is compiled with the Unix compiler.
4680   Use of RTD is a selectable option, since it is incompatible with
4681   standard Unix calling sequences.  If the option is not selected,
4682   the caller must always pop the args.
4683
4684   The attribute stdcall is equivalent to RTD on a per module basis.  */
4685
4686int
4687ix86_return_pops_args (tree fundecl, tree funtype, int size)
4688{
4689  int rtd;
4690
4691  /* None of the 64-bit ABIs pop arguments.  */
4692  if (TARGET_64BIT)
4693    return 0;
4694
4695  rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4696
4697  /* Cdecl functions override -mrtd, and never pop the stack.  */
4698  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4699    {
4700      /* Stdcall and fastcall functions will pop the stack if not
4701         variable args.  */
4702      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4703          || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4704	rtd = 1;
4705
4706      if (rtd && ! stdarg_p (funtype))
4707	return size;
4708    }
4709
4710  /* Lose any fake structure return argument if it is passed on the stack.  */
4711  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4712      && !KEEP_AGGREGATE_RETURN_POINTER)
4713    {
4714      int nregs = ix86_function_regparm (funtype, fundecl);
4715      if (nregs == 0)
4716	return GET_MODE_SIZE (Pmode);
4717    }
4718
4719  return 0;
4720}
4721
4722/* Argument support functions.  */
4723
4724/* Return true when register may be used to pass function parameters.  */
4725bool
4726ix86_function_arg_regno_p (int regno)
4727{
4728  int i;
4729  const int *parm_regs;
4730
4731  if (!TARGET_64BIT)
4732    {
4733      if (TARGET_MACHO)
4734        return (regno < REGPARM_MAX
4735                || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4736      else
4737        return (regno < REGPARM_MAX
4738	        || (TARGET_MMX && MMX_REGNO_P (regno)
4739	  	    && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4740	        || (TARGET_SSE && SSE_REGNO_P (regno)
4741		    && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4742    }
4743
4744  if (TARGET_MACHO)
4745    {
4746      if (SSE_REGNO_P (regno) && TARGET_SSE)
4747        return true;
4748    }
4749  else
4750    {
4751      if (TARGET_SSE && SSE_REGNO_P (regno)
4752          && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4753        return true;
4754    }
4755
4756  /* TODO: The function should depend on current function ABI but
4757     builtins.c would need updating then. Therefore we use the
4758     default ABI.  */
4759
4760  /* RAX is used as hidden argument to va_arg functions.  */
4761  if (ix86_abi == SYSV_ABI && regno == AX_REG)
4762    return true;
4763
4764  if (ix86_abi == MS_ABI)
4765    parm_regs = x86_64_ms_abi_int_parameter_registers;
4766  else
4767    parm_regs = x86_64_int_parameter_registers;
4768  for (i = 0; i < (ix86_abi == MS_ABI
4769		   ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
4770    if (regno == parm_regs[i])
4771      return true;
4772  return false;
4773}
4774
4775/* Return if we do not know how to pass TYPE solely in registers.  */
4776
4777static bool
4778ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4779{
4780  if (must_pass_in_stack_var_size_or_pad (mode, type))
4781    return true;
4782
4783  /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
4784     The layout_type routine is crafty and tries to trick us into passing
4785     currently unsupported vector types on the stack by using TImode.  */
4786  return (!TARGET_64BIT && mode == TImode
4787	  && type && TREE_CODE (type) != VECTOR_TYPE);
4788}
4789
4790/* It returns the size, in bytes, of the area reserved for arguments passed
4791   in registers for the function represented by fndecl dependent to the used
4792   abi format.  */
4793int
4794ix86_reg_parm_stack_space (const_tree fndecl)
4795{
4796  enum calling_abi call_abi = SYSV_ABI;
4797  if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4798    call_abi = ix86_function_abi (fndecl);
4799  else
4800    call_abi = ix86_function_type_abi (fndecl);
4801  if (call_abi == MS_ABI)
4802    return 32;
4803  return 0;
4804}
4805
4806/* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4807   call abi used.  */
4808enum calling_abi
4809ix86_function_type_abi (const_tree fntype)
4810{
4811  if (TARGET_64BIT && fntype != NULL)
4812    {
4813      enum calling_abi abi = ix86_abi;
4814      if (abi == SYSV_ABI)
4815	{
4816	  if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4817	    abi = MS_ABI;
4818	}
4819      else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4820	abi = SYSV_ABI;
4821      return abi;
4822    }
4823  return ix86_abi;
4824}
4825
4826static bool
4827ix86_function_ms_hook_prologue (const_tree fntype)
4828{
4829  if (!TARGET_64BIT)
4830    {
4831      if (lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fntype)))
4832        {
4833          if (decl_function_context (fntype) != NULL_TREE)
4834          {
4835            error_at (DECL_SOURCE_LOCATION (fntype),
4836                "ms_hook_prologue is not compatible with nested function");
4837          }
4838
4839          return true;
4840        }
4841    }
4842  return false;
4843}
4844
4845static enum calling_abi
4846ix86_function_abi (const_tree fndecl)
4847{
4848  if (! fndecl)
4849    return ix86_abi;
4850  return ix86_function_type_abi (TREE_TYPE (fndecl));
4851}
4852
4853/* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4854   call abi used.  */
4855enum calling_abi
4856ix86_cfun_abi (void)
4857{
4858  if (! cfun || ! TARGET_64BIT)
4859    return ix86_abi;
4860  return cfun->machine->call_abi;
4861}
4862
4863/* regclass.c  */
4864extern void init_regs (void);
4865
4866/* Implementation of call abi switching target hook. Specific to FNDECL
4867   the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4868   for more details.  */
4869void
4870ix86_call_abi_override (const_tree fndecl)
4871{
4872  if (fndecl == NULL_TREE)
4873    cfun->machine->call_abi = ix86_abi;
4874  else
4875    cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4876}
4877
4878/* MS and SYSV ABI have different set of call used registers.  Avoid expensive
4879   re-initialization of init_regs each time we switch function context since
4880   this is needed only during RTL expansion.  */
4881static void
4882ix86_maybe_switch_abi (void)
4883{
4884  if (TARGET_64BIT &&
4885      call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4886    reinit_regs ();
4887}
4888
4889/* Initialize a variable CUM of type CUMULATIVE_ARGS
4890   for a call to a function whose data type is FNTYPE.
4891   For a library call, FNTYPE is 0.  */
4892
4893void
4894init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
4895		      tree fntype,	/* tree ptr for function decl */
4896		      rtx libname,	/* SYMBOL_REF of library name or 0 */
4897		      tree fndecl)
4898{
4899  struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4900  memset (cum, 0, sizeof (*cum));
4901
4902  if (fndecl)
4903   cum->call_abi = ix86_function_abi (fndecl);
4904  else
4905   cum->call_abi = ix86_function_type_abi (fntype);
4906  /* Set up the number of registers to use for passing arguments.  */
4907
4908  if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4909    sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
4910	   "or subtarget optimization implying it");
4911  cum->nregs = ix86_regparm;
4912  if (TARGET_64BIT)
4913    {
4914      cum->nregs = (cum->call_abi == SYSV_ABI
4915                   ? X86_64_REGPARM_MAX
4916                   : X86_64_MS_REGPARM_MAX);
4917    }
4918  if (TARGET_SSE)
4919    {
4920      cum->sse_nregs = SSE_REGPARM_MAX;
4921      if (TARGET_64BIT)
4922        {
4923          cum->sse_nregs = (cum->call_abi == SYSV_ABI
4924                           ? X86_64_SSE_REGPARM_MAX
4925                           : X86_64_MS_SSE_REGPARM_MAX);
4926        }
4927    }
4928  if (TARGET_MMX)
4929    cum->mmx_nregs = MMX_REGPARM_MAX;
4930  cum->warn_avx = true;
4931  cum->warn_sse = true;
4932  cum->warn_mmx = true;
4933
4934  /* Because type might mismatch in between caller and callee, we need to
4935     use actual type of function for local calls.
4936     FIXME: cgraph_analyze can be told to actually record if function uses
4937     va_start so for local functions maybe_vaarg can be made aggressive
4938     helping K&R code.
4939     FIXME: once typesytem is fixed, we won't need this code anymore.  */
4940  if (i && i->local)
4941    fntype = TREE_TYPE (fndecl);
4942  cum->maybe_vaarg = (fntype
4943		      ? (!prototype_p (fntype) || stdarg_p (fntype))
4944		      : !libname);
4945
4946  if (!TARGET_64BIT)
4947    {
4948      /* If there are variable arguments, then we won't pass anything
4949         in registers in 32-bit mode. */
4950      if (stdarg_p (fntype))
4951	{
4952	  cum->nregs = 0;
4953	  cum->sse_nregs = 0;
4954	  cum->mmx_nregs = 0;
4955	  cum->warn_avx = 0;
4956	  cum->warn_sse = 0;
4957	  cum->warn_mmx = 0;
4958	  return;
4959	}
4960
4961      /* Use ecx and edx registers if function has fastcall attribute,
4962	 else look for regparm information.  */
4963      if (fntype)
4964	{
4965	  if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4966	    {
4967	      cum->nregs = 2;
4968	      cum->fastcall = 1;
4969	    }
4970	  else
4971	    cum->nregs = ix86_function_regparm (fntype, fndecl);
4972	}
4973
4974      /* Set up the number of SSE registers used for passing SFmode
4975	 and DFmode arguments.  Warn for mismatching ABI.  */
4976      cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4977    }
4978}
4979
4980/* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
4981   But in the case of vector types, it is some vector mode.
4982
4983   When we have only some of our vector isa extensions enabled, then there
4984   are some modes for which vector_mode_supported_p is false.  For these
4985   modes, the generic vector support in gcc will choose some non-vector mode
4986   in order to implement the type.  By computing the natural mode, we'll
4987   select the proper ABI location for the operand and not depend on whatever
4988   the middle-end decides to do with these vector types.
4989
4990   The midde-end can't deal with the vector types > 16 bytes.  In this
4991   case, we return the original mode and warn ABI change if CUM isn't
4992   NULL.  */
4993
4994static enum machine_mode
4995type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4996{
4997  enum machine_mode mode = TYPE_MODE (type);
4998
4999  if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5000    {
5001      HOST_WIDE_INT size = int_size_in_bytes (type);
5002      if ((size == 8 || size == 16 || size == 32)
5003	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
5004	  && TYPE_VECTOR_SUBPARTS (type) > 1)
5005	{
5006	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5007
5008	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5009	    mode = MIN_MODE_VECTOR_FLOAT;
5010	  else
5011	    mode = MIN_MODE_VECTOR_INT;
5012
5013	  /* Get the mode which has this inner mode and number of units.  */
5014	  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5015	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5016		&& GET_MODE_INNER (mode) == innermode)
5017	      {
5018		if (size == 32 && !TARGET_AVX)
5019		  {
5020		    static bool warnedavx;
5021
5022		    if (cum
5023			&& !warnedavx
5024			&& cum->warn_avx)
5025		      {
5026			warnedavx = true;
5027			warning (0, "AVX vector argument without AVX "
5028				 "enabled changes the ABI");
5029		      }
5030		    return TYPE_MODE (type);
5031		  }
5032		else
5033		  return mode;
5034	      }
5035
5036	  gcc_unreachable ();
5037	}
5038    }
5039
5040  return mode;
5041}
5042
5043/* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
5044   this may not agree with the mode that the type system has chosen for the
5045   register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
5046   go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
5047
5048static rtx
5049gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5050		     unsigned int regno)
5051{
5052  rtx tmp;
5053
5054  if (orig_mode != BLKmode)
5055    tmp = gen_rtx_REG (orig_mode, regno);
5056  else
5057    {
5058      tmp = gen_rtx_REG (mode, regno);
5059      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5060      tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5061    }
5062
5063  return tmp;
5064}
5065
5066/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
5067   of this code is to classify each 8bytes of incoming argument by the register
5068   class and assign registers accordingly.  */
5069
5070/* Return the union class of CLASS1 and CLASS2.
5071   See the x86-64 PS ABI for details.  */
5072
5073static enum x86_64_reg_class
5074merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5075{
5076  /* Rule #1: If both classes are equal, this is the resulting class.  */
5077  if (class1 == class2)
5078    return class1;
5079
5080  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5081     the other class.  */
5082  if (class1 == X86_64_NO_CLASS)
5083    return class2;
5084  if (class2 == X86_64_NO_CLASS)
5085    return class1;
5086
5087  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
5088  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5089    return X86_64_MEMORY_CLASS;
5090
5091  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
5092  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5093      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5094    return X86_64_INTEGERSI_CLASS;
5095  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5096      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5097    return X86_64_INTEGER_CLASS;
5098
5099  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5100     MEMORY is used.  */
5101  if (class1 == X86_64_X87_CLASS
5102      || class1 == X86_64_X87UP_CLASS
5103      || class1 == X86_64_COMPLEX_X87_CLASS
5104      || class2 == X86_64_X87_CLASS
5105      || class2 == X86_64_X87UP_CLASS
5106      || class2 == X86_64_COMPLEX_X87_CLASS)
5107    return X86_64_MEMORY_CLASS;
5108
5109  /* Rule #6: Otherwise class SSE is used.  */
5110  return X86_64_SSE_CLASS;
5111}
5112
5113/* Classify the argument of type TYPE and mode MODE.
5114   CLASSES will be filled by the register class used to pass each word
5115   of the operand.  The number of words is returned.  In case the parameter
5116   should be passed in memory, 0 is returned. As a special case for zero
5117   sized containers, classes[0] will be NO_CLASS and 1 is returned.
5118
5119   BIT_OFFSET is used internally for handling records and specifies offset
5120   of the offset in bits modulo 256 to avoid overflow cases.
5121
5122   See the x86-64 PS ABI for details.
5123*/
5124
5125static int
5126classify_argument (enum machine_mode mode, const_tree type,
5127		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5128{
5129  HOST_WIDE_INT bytes =
5130    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5131  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5132
5133  /* Variable sized entities are always passed/returned in memory.  */
5134  if (bytes < 0)
5135    return 0;
5136
5137  if (mode != VOIDmode
5138      && targetm.calls.must_pass_in_stack (mode, type))
5139    return 0;
5140
5141  if (type && AGGREGATE_TYPE_P (type))
5142    {
5143      int i;
5144      tree field;
5145      enum x86_64_reg_class subclasses[MAX_CLASSES];
5146
5147      /* On x86-64 we pass structures larger than 32 bytes on the stack.  */
5148      if (bytes > 32)
5149	return 0;
5150
5151      for (i = 0; i < words; i++)
5152	classes[i] = X86_64_NO_CLASS;
5153
5154      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
5155	 signalize memory class, so handle it as special case.  */
5156      if (!words)
5157	{
5158	  classes[0] = X86_64_NO_CLASS;
5159	  return 1;
5160	}
5161
5162      /* Classify each field of record and merge classes.  */
5163      switch (TREE_CODE (type))
5164	{
5165	case RECORD_TYPE:
5166	  /* And now merge the fields of structure.  */
5167	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5168	    {
5169	      if (TREE_CODE (field) == FIELD_DECL)
5170		{
5171		  int num;
5172
5173		  if (TREE_TYPE (field) == error_mark_node)
5174		    continue;
5175
5176		  /* Bitfields are always classified as integer.  Handle them
5177		     early, since later code would consider them to be
5178		     misaligned integers.  */
5179		  if (DECL_BIT_FIELD (field))
5180		    {
5181		      for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5182			   i < ((int_bit_position (field) + (bit_offset % 64))
5183			        + tree_low_cst (DECL_SIZE (field), 0)
5184				+ 63) / 8 / 8; i++)
5185			classes[i] =
5186			  merge_classes (X86_64_INTEGER_CLASS,
5187					 classes[i]);
5188		    }
5189		  else
5190		    {
5191		      int pos;
5192
5193		      type = TREE_TYPE (field);
5194
5195		      /* Flexible array member is ignored.  */
5196		      if (TYPE_MODE (type) == BLKmode
5197			  && TREE_CODE (type) == ARRAY_TYPE
5198			  && TYPE_SIZE (type) == NULL_TREE
5199			  && TYPE_DOMAIN (type) != NULL_TREE
5200			  && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5201			      == NULL_TREE))
5202			{
5203			  static bool warned;
5204
5205			  if (!warned && warn_psabi)
5206			    {
5207			      warned = true;
5208			      inform (input_location,
5209				      "The ABI of passing struct with"
5210				      " a flexible array member has"
5211				      " changed in GCC 4.4");
5212			    }
5213			  continue;
5214			}
5215		      num = classify_argument (TYPE_MODE (type), type,
5216					       subclasses,
5217					       (int_bit_position (field)
5218						+ bit_offset) % 256);
5219		      if (!num)
5220			return 0;
5221		      pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5222		      for (i = 0; i < num && (i + pos) < words; i++)
5223			classes[i + pos] =
5224			  merge_classes (subclasses[i], classes[i + pos]);
5225		    }
5226		}
5227	    }
5228	  break;
5229
5230	case ARRAY_TYPE:
5231	  /* Arrays are handled as small records.  */
5232	  {
5233	    int num;
5234	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5235				     TREE_TYPE (type), subclasses, bit_offset);
5236	    if (!num)
5237	      return 0;
5238
5239	    /* The partial classes are now full classes.  */
5240	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5241	      subclasses[0] = X86_64_SSE_CLASS;
5242	    if (subclasses[0] == X86_64_INTEGERSI_CLASS
5243		&& !((bit_offset % 64) == 0 && bytes == 4))
5244	      subclasses[0] = X86_64_INTEGER_CLASS;
5245
5246	    for (i = 0; i < words; i++)
5247	      classes[i] = subclasses[i % num];
5248
5249	    break;
5250	  }
5251	case UNION_TYPE:
5252	case QUAL_UNION_TYPE:
5253	  /* Unions are similar to RECORD_TYPE but offset is always 0.
5254	     */
5255	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5256	    {
5257	      if (TREE_CODE (field) == FIELD_DECL)
5258		{
5259		  int num;
5260
5261		  if (TREE_TYPE (field) == error_mark_node)
5262		    continue;
5263
5264		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5265					   TREE_TYPE (field), subclasses,
5266					   bit_offset);
5267		  if (!num)
5268		    return 0;
5269		  for (i = 0; i < num; i++)
5270		    classes[i] = merge_classes (subclasses[i], classes[i]);
5271		}
5272	    }
5273	  break;
5274
5275	default:
5276	  gcc_unreachable ();
5277	}
5278
5279      if (words > 2)
5280	{
5281	  /* When size > 16 bytes, if the first one isn't
5282	     X86_64_SSE_CLASS or any other ones aren't
5283	     X86_64_SSEUP_CLASS, everything should be passed in
5284	     memory.  */
5285	  if (classes[0] != X86_64_SSE_CLASS)
5286	      return 0;
5287
5288	  for (i = 1; i < words; i++)
5289	    if (classes[i] != X86_64_SSEUP_CLASS)
5290	      return 0;
5291	}
5292
5293      /* Final merger cleanup.  */
5294      for (i = 0; i < words; i++)
5295	{
5296	  /* If one class is MEMORY, everything should be passed in
5297	     memory.  */
5298	  if (classes[i] == X86_64_MEMORY_CLASS)
5299	    return 0;
5300
5301	  /* The X86_64_SSEUP_CLASS should be always preceded by
5302	     X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
5303	  if (classes[i] == X86_64_SSEUP_CLASS
5304	      && classes[i - 1] != X86_64_SSE_CLASS
5305	      && classes[i - 1] != X86_64_SSEUP_CLASS)
5306	    {
5307	      /* The first one should never be X86_64_SSEUP_CLASS.  */
5308	      gcc_assert (i != 0);
5309	      classes[i] = X86_64_SSE_CLASS;
5310	    }
5311
5312	  /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5313	       everything should be passed in memory.  */
5314	  if (classes[i] == X86_64_X87UP_CLASS
5315	      && (classes[i - 1] != X86_64_X87_CLASS))
5316	    {
5317	      static bool warned;
5318
5319	      /* The first one should never be X86_64_X87UP_CLASS.  */
5320	      gcc_assert (i != 0);
5321	      if (!warned && warn_psabi)
5322		{
5323		  warned = true;
5324		  inform (input_location,
5325			  "The ABI of passing union with long double"
5326			  " has changed in GCC 4.4");
5327		}
5328	      return 0;
5329	    }
5330	}
5331      return words;
5332    }
5333
5334  /* Compute alignment needed.  We align all types to natural boundaries with
5335     exception of XFmode that is aligned to 64bits.  */
5336  if (mode != VOIDmode && mode != BLKmode)
5337    {
5338      int mode_alignment = GET_MODE_BITSIZE (mode);
5339
5340      if (mode == XFmode)
5341	mode_alignment = 128;
5342      else if (mode == XCmode)
5343	mode_alignment = 256;
5344      if (COMPLEX_MODE_P (mode))
5345	mode_alignment /= 2;
5346      /* Misaligned fields are always returned in memory.  */
5347      if (bit_offset % mode_alignment)
5348	return 0;
5349    }
5350
5351  /* for V1xx modes, just use the base mode */
5352  if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5353      && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5354    mode = GET_MODE_INNER (mode);
5355
5356  /* Classification of atomic types.  */
5357  switch (mode)
5358    {
5359    case SDmode:
5360    case DDmode:
5361      classes[0] = X86_64_SSE_CLASS;
5362      return 1;
5363    case TDmode:
5364      classes[0] = X86_64_SSE_CLASS;
5365      classes[1] = X86_64_SSEUP_CLASS;
5366      return 2;
5367    case DImode:
5368    case SImode:
5369    case HImode:
5370    case QImode:
5371    case CSImode:
5372    case CHImode:
5373    case CQImode:
5374      {
5375	int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5376
5377	if (size <= 32)
5378	  {
5379	    classes[0] = X86_64_INTEGERSI_CLASS;
5380	    return 1;
5381	  }
5382	else if (size <= 64)
5383	  {
5384	    classes[0] = X86_64_INTEGER_CLASS;
5385	    return 1;
5386	  }
5387	else if (size <= 64+32)
5388	  {
5389	    classes[0] = X86_64_INTEGER_CLASS;
5390	    classes[1] = X86_64_INTEGERSI_CLASS;
5391	    return 2;
5392	  }
5393	else if (size <= 64+64)
5394	  {
5395	    classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5396	    return 2;
5397	  }
5398	else
5399	  gcc_unreachable ();
5400      }
5401    case CDImode:
5402    case TImode:
5403      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5404      return 2;
5405    case COImode:
5406    case OImode:
5407      /* OImode shouldn't be used directly.  */
5408      gcc_unreachable ();
5409    case CTImode:
5410      return 0;
5411    case SFmode:
5412      if (!(bit_offset % 64))
5413	classes[0] = X86_64_SSESF_CLASS;
5414      else
5415	classes[0] = X86_64_SSE_CLASS;
5416      return 1;
5417    case DFmode:
5418      classes[0] = X86_64_SSEDF_CLASS;
5419      return 1;
5420    case XFmode:
5421      classes[0] = X86_64_X87_CLASS;
5422      classes[1] = X86_64_X87UP_CLASS;
5423      return 2;
5424    case TFmode:
5425      classes[0] = X86_64_SSE_CLASS;
5426      classes[1] = X86_64_SSEUP_CLASS;
5427      return 2;
5428    case SCmode:
5429      classes[0] = X86_64_SSE_CLASS;
5430      if (!(bit_offset % 64))
5431	return 1;
5432      else
5433	{
5434	  static bool warned;
5435
5436	  if (!warned && warn_psabi)
5437	    {
5438	      warned = true;
5439	      inform (input_location,
5440		      "The ABI of passing structure with complex float"
5441		      " member has changed in GCC 4.4");
5442	    }
5443	  classes[1] = X86_64_SSESF_CLASS;
5444	  return 2;
5445	}
5446    case DCmode:
5447      classes[0] = X86_64_SSEDF_CLASS;
5448      classes[1] = X86_64_SSEDF_CLASS;
5449      return 2;
5450    case XCmode:
5451      classes[0] = X86_64_COMPLEX_X87_CLASS;
5452      return 1;
5453    case TCmode:
5454      /* This modes is larger than 16 bytes.  */
5455      return 0;
5456    case V8SFmode:
5457    case V8SImode:
5458    case V32QImode:
5459    case V16HImode:
5460    case V4DFmode:
5461    case V4DImode:
5462      classes[0] = X86_64_SSE_CLASS;
5463      classes[1] = X86_64_SSEUP_CLASS;
5464      classes[2] = X86_64_SSEUP_CLASS;
5465      classes[3] = X86_64_SSEUP_CLASS;
5466      return 4;
5467    case V4SFmode:
5468    case V4SImode:
5469    case V16QImode:
5470    case V8HImode:
5471    case V2DFmode:
5472    case V2DImode:
5473      classes[0] = X86_64_SSE_CLASS;
5474      classes[1] = X86_64_SSEUP_CLASS;
5475      return 2;
5476    case V1TImode:
5477    case V1DImode:
5478    case V2SFmode:
5479    case V2SImode:
5480    case V4HImode:
5481    case V8QImode:
5482      classes[0] = X86_64_SSE_CLASS;
5483      return 1;
5484    case BLKmode:
5485    case VOIDmode:
5486      return 0;
5487    default:
5488      gcc_assert (VECTOR_MODE_P (mode));
5489
5490      if (bytes > 16)
5491	return 0;
5492
5493      gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5494
5495      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5496	classes[0] = X86_64_INTEGERSI_CLASS;
5497      else
5498	classes[0] = X86_64_INTEGER_CLASS;
5499      classes[1] = X86_64_INTEGER_CLASS;
5500      return 1 + (bytes > 8);
5501    }
5502}
5503
5504/* Examine the argument and return set number of register required in each
5505   class.  Return 0 iff parameter should be passed in memory.  */
5506static int
5507examine_argument (enum machine_mode mode, const_tree type, int in_return,
5508		  int *int_nregs, int *sse_nregs)
5509{
5510  enum x86_64_reg_class regclass[MAX_CLASSES];
5511  int n = classify_argument (mode, type, regclass, 0);
5512
5513  *int_nregs = 0;
5514  *sse_nregs = 0;
5515  if (!n)
5516    return 0;
5517  for (n--; n >= 0; n--)
5518    switch (regclass[n])
5519      {
5520      case X86_64_INTEGER_CLASS:
5521      case X86_64_INTEGERSI_CLASS:
5522	(*int_nregs)++;
5523	break;
5524      case X86_64_SSE_CLASS:
5525      case X86_64_SSESF_CLASS:
5526      case X86_64_SSEDF_CLASS:
5527	(*sse_nregs)++;
5528	break;
5529      case X86_64_NO_CLASS:
5530      case X86_64_SSEUP_CLASS:
5531	break;
5532      case X86_64_X87_CLASS:
5533      case X86_64_X87UP_CLASS:
5534	if (!in_return)
5535	  return 0;
5536	break;
5537      case X86_64_COMPLEX_X87_CLASS:
5538	return in_return ? 2 : 0;
5539      case X86_64_MEMORY_CLASS:
5540	gcc_unreachable ();
5541      }
5542  return 1;
5543}
5544
5545/* Construct container for the argument used by GCC interface.  See
5546   FUNCTION_ARG for the detailed description.  */
5547
5548static rtx
5549construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5550		     const_tree type, int in_return, int nintregs, int nsseregs,
5551		     const int *intreg, int sse_regno)
5552{
5553  /* The following variables hold the static issued_error state.  */
5554  static bool issued_sse_arg_error;
5555  static bool issued_sse_ret_error;
5556  static bool issued_x87_ret_error;
5557
5558  enum machine_mode tmpmode;
5559  int bytes =
5560    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5561  enum x86_64_reg_class regclass[MAX_CLASSES];
5562  int n;
5563  int i;
5564  int nexps = 0;
5565  int needed_sseregs, needed_intregs;
5566  rtx exp[MAX_CLASSES];
5567  rtx ret;
5568
5569  n = classify_argument (mode, type, regclass, 0);
5570  if (!n)
5571    return NULL;
5572  if (!examine_argument (mode, type, in_return, &needed_intregs,
5573			 &needed_sseregs))
5574    return NULL;
5575  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5576    return NULL;
5577
5578  /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
5579     some less clueful developer tries to use floating-point anyway.  */
5580  if (needed_sseregs && !TARGET_SSE)
5581    {
5582      if (in_return)
5583	{
5584	  if (!issued_sse_ret_error)
5585	    {
5586	      error ("SSE register return with SSE disabled");
5587	      issued_sse_ret_error = true;
5588	    }
5589	}
5590      else if (!issued_sse_arg_error)
5591	{
5592	  error ("SSE register argument with SSE disabled");
5593	  issued_sse_arg_error = true;
5594	}
5595      return NULL;
5596    }
5597
5598  /* Likewise, error if the ABI requires us to return values in the
5599     x87 registers and the user specified -mno-80387.  */
5600  if (!TARGET_80387 && in_return)
5601    for (i = 0; i < n; i++)
5602      if (regclass[i] == X86_64_X87_CLASS
5603	  || regclass[i] == X86_64_X87UP_CLASS
5604	  || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5605	{
5606	  if (!issued_x87_ret_error)
5607	    {
5608	      error ("x87 register return with x87 disabled");
5609	      issued_x87_ret_error = true;
5610	    }
5611	  return NULL;
5612	}
5613
5614  /* First construct simple cases.  Avoid SCmode, since we want to use
5615     single register to pass this type.  */
5616  if (n == 1 && mode != SCmode)
5617    switch (regclass[0])
5618      {
5619      case X86_64_INTEGER_CLASS:
5620      case X86_64_INTEGERSI_CLASS:
5621	return gen_rtx_REG (mode, intreg[0]);
5622      case X86_64_SSE_CLASS:
5623      case X86_64_SSESF_CLASS:
5624      case X86_64_SSEDF_CLASS:
5625	if (mode != BLKmode)
5626	  return gen_reg_or_parallel (mode, orig_mode,
5627				      SSE_REGNO (sse_regno));
5628	break;
5629      case X86_64_X87_CLASS:
5630      case X86_64_COMPLEX_X87_CLASS:
5631	return gen_rtx_REG (mode, FIRST_STACK_REG);
5632      case X86_64_NO_CLASS:
5633	/* Zero sized array, struct or class.  */
5634	return NULL;
5635      default:
5636	gcc_unreachable ();
5637      }
5638  if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5639      && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5640    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5641  if (n == 4
5642      && regclass[0] == X86_64_SSE_CLASS
5643      && regclass[1] == X86_64_SSEUP_CLASS
5644      && regclass[2] == X86_64_SSEUP_CLASS
5645      && regclass[3] == X86_64_SSEUP_CLASS
5646      && mode != BLKmode)
5647    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5648
5649  if (n == 2
5650      && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5651    return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5652  if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5653      && regclass[1] == X86_64_INTEGER_CLASS
5654      && (mode == CDImode || mode == TImode || mode == TFmode)
5655      && intreg[0] + 1 == intreg[1])
5656    return gen_rtx_REG (mode, intreg[0]);
5657
5658  /* Otherwise figure out the entries of the PARALLEL.  */
5659  for (i = 0; i < n; i++)
5660    {
5661      int pos;
5662
5663      switch (regclass[i])
5664        {
5665	  case X86_64_NO_CLASS:
5666	    break;
5667	  case X86_64_INTEGER_CLASS:
5668	  case X86_64_INTEGERSI_CLASS:
5669	    /* Merge TImodes on aligned occasions here too.  */
5670	    if (i * 8 + 8 > bytes)
5671	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5672	    else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5673	      tmpmode = SImode;
5674	    else
5675	      tmpmode = DImode;
5676	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
5677	    if (tmpmode == BLKmode)
5678	      tmpmode = DImode;
5679	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5680					       gen_rtx_REG (tmpmode, *intreg),
5681					       GEN_INT (i*8));
5682	    intreg++;
5683	    break;
5684	  case X86_64_SSESF_CLASS:
5685	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5686					       gen_rtx_REG (SFmode,
5687							    SSE_REGNO (sse_regno)),
5688					       GEN_INT (i*8));
5689	    sse_regno++;
5690	    break;
5691	  case X86_64_SSEDF_CLASS:
5692	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5693					       gen_rtx_REG (DFmode,
5694							    SSE_REGNO (sse_regno)),
5695					       GEN_INT (i*8));
5696	    sse_regno++;
5697	    break;
5698	  case X86_64_SSE_CLASS:
5699	    pos = i;
5700	    switch (n)
5701	      {
5702	      case 1:
5703		tmpmode = DImode;
5704		break;
5705	      case 2:
5706		if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5707		  {
5708		    tmpmode = TImode;
5709		    i++;
5710		  }
5711		else
5712		  tmpmode = DImode;
5713		break;
5714	      case 4:
5715		gcc_assert (i == 0
5716			    && regclass[1] == X86_64_SSEUP_CLASS
5717			    && regclass[2] == X86_64_SSEUP_CLASS
5718			    && regclass[3] == X86_64_SSEUP_CLASS);
5719		tmpmode = OImode;
5720		i += 3;
5721		break;
5722	      default:
5723		gcc_unreachable ();
5724	      }
5725	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5726					       gen_rtx_REG (tmpmode,
5727							    SSE_REGNO (sse_regno)),
5728					       GEN_INT (pos*8));
5729	    sse_regno++;
5730	    break;
5731	  default:
5732	    gcc_unreachable ();
5733	}
5734    }
5735
5736  /* Empty aligned struct, union or class.  */
5737  if (nexps == 0)
5738    return NULL;
5739
5740  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5741  for (i = 0; i < nexps; i++)
5742    XVECEXP (ret, 0, i) = exp [i];
5743  return ret;
5744}
5745
5746/* Update the data in CUM to advance over an argument of mode MODE
5747   and data type TYPE.  (TYPE is null for libcalls where that information
5748   may not be available.)  */
5749
5750static void
5751function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5752			 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5753{
5754  switch (mode)
5755    {
5756    default:
5757      break;
5758
5759    case BLKmode:
5760      if (bytes < 0)
5761	break;
5762      /* FALLTHRU */
5763
5764    case DImode:
5765    case SImode:
5766    case HImode:
5767    case QImode:
5768      cum->words += words;
5769      cum->nregs -= words;
5770      cum->regno += words;
5771
5772      if (cum->nregs <= 0)
5773	{
5774	  cum->nregs = 0;
5775	  cum->regno = 0;
5776	}
5777      break;
5778
5779    case OImode:
5780      /* OImode shouldn't be used directly.  */
5781      gcc_unreachable ();
5782
5783    case DFmode:
5784      if (cum->float_in_sse < 2)
5785	break;
5786    case SFmode:
5787      if (cum->float_in_sse < 1)
5788	break;
5789      /* FALLTHRU */
5790
5791    case V8SFmode:
5792    case V8SImode:
5793    case V32QImode:
5794    case V16HImode:
5795    case V4DFmode:
5796    case V4DImode:
5797    case TImode:
5798    case V16QImode:
5799    case V8HImode:
5800    case V4SImode:
5801    case V2DImode:
5802    case V4SFmode:
5803    case V2DFmode:
5804      if (!type || !AGGREGATE_TYPE_P (type))
5805	{
5806	  cum->sse_words += words;
5807	  cum->sse_nregs -= 1;
5808	  cum->sse_regno += 1;
5809	  if (cum->sse_nregs <= 0)
5810	    {
5811	      cum->sse_nregs = 0;
5812	      cum->sse_regno = 0;
5813	    }
5814	}
5815      break;
5816
5817    case V8QImode:
5818    case V4HImode:
5819    case V2SImode:
5820    case V2SFmode:
5821    case V1TImode:
5822    case V1DImode:
5823      if (!type || !AGGREGATE_TYPE_P (type))
5824	{
5825	  cum->mmx_words += words;
5826	  cum->mmx_nregs -= 1;
5827	  cum->mmx_regno += 1;
5828	  if (cum->mmx_nregs <= 0)
5829	    {
5830	      cum->mmx_nregs = 0;
5831	      cum->mmx_regno = 0;
5832	    }
5833	}
5834      break;
5835    }
5836}
5837
5838static void
5839function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5840			 tree type, HOST_WIDE_INT words, int named)
5841{
5842  int int_nregs, sse_nregs;
5843
5844  /* Unnamed 256bit vector mode parameters are passed on stack.  */
5845  if (!named && VALID_AVX256_REG_MODE (mode))
5846    return;
5847
5848  if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
5849      && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5850    {
5851      cum->nregs -= int_nregs;
5852      cum->sse_nregs -= sse_nregs;
5853      cum->regno += int_nregs;
5854      cum->sse_regno += sse_nregs;
5855    }
5856  else
5857    {
5858      int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
5859      cum->words = (cum->words + align - 1) & ~(align - 1);
5860      cum->words += words;
5861    }
5862}
5863
5864static void
5865function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5866			    HOST_WIDE_INT words)
5867{
5868  /* Otherwise, this should be passed indirect.  */
5869  gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5870
5871  cum->words += words;
5872  if (cum->nregs > 0)
5873    {
5874      cum->nregs -= 1;
5875      cum->regno += 1;
5876    }
5877}
5878
5879void
5880function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5881		      tree type, int named)
5882{
5883  HOST_WIDE_INT bytes, words;
5884
5885  if (mode == BLKmode)
5886    bytes = int_size_in_bytes (type);
5887  else
5888    bytes = GET_MODE_SIZE (mode);
5889  words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5890
5891  if (type)
5892    mode = type_natural_mode (type, NULL);
5893
5894  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5895    function_arg_advance_ms_64 (cum, bytes, words);
5896  else if (TARGET_64BIT)
5897    function_arg_advance_64 (cum, mode, type, words, named);
5898  else
5899    function_arg_advance_32 (cum, mode, type, bytes, words);
5900}
5901
5902/* Define where to put the arguments to a function.
5903   Value is zero to push the argument on the stack,
5904   or a hard register in which to store the argument.
5905
5906   MODE is the argument's machine mode.
5907   TYPE is the data type of the argument (as a tree).
5908    This is null for libcalls where that information may
5909    not be available.
5910   CUM is a variable of type CUMULATIVE_ARGS which gives info about
5911    the preceding args and about the function being called.
5912   NAMED is nonzero if this argument is a named parameter
5913    (otherwise it is an extra parameter matching an ellipsis).  */
5914
5915static rtx
5916function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5917		 enum machine_mode orig_mode, tree type,
5918		 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5919{
5920  static bool warnedsse, warnedmmx;
5921
5922  /* Avoid the AL settings for the Unix64 ABI.  */
5923  if (mode == VOIDmode)
5924    return constm1_rtx;
5925
5926  switch (mode)
5927    {
5928    default:
5929      break;
5930
5931    case BLKmode:
5932      if (bytes < 0)
5933	break;
5934      /* FALLTHRU */
5935    case DImode:
5936    case SImode:
5937    case HImode:
5938    case QImode:
5939      if (words <= cum->nregs)
5940	{
5941	  int regno = cum->regno;
5942
5943	  /* Fastcall allocates the first two DWORD (SImode) or
5944            smaller arguments to ECX and EDX if it isn't an
5945            aggregate type .  */
5946	  if (cum->fastcall)
5947	    {
5948	      if (mode == BLKmode
5949		  || mode == DImode
5950		  || (type && AGGREGATE_TYPE_P (type)))
5951	        break;
5952
5953	      /* ECX not EAX is the first allocated register.  */
5954	      if (regno == AX_REG)
5955		regno = CX_REG;
5956	    }
5957	  return gen_rtx_REG (mode, regno);
5958	}
5959      break;
5960
5961    case DFmode:
5962      if (cum->float_in_sse < 2)
5963	break;
5964    case SFmode:
5965      if (cum->float_in_sse < 1)
5966	break;
5967      /* FALLTHRU */
5968    case TImode:
5969      /* In 32bit, we pass TImode in xmm registers.  */
5970    case V16QImode:
5971    case V8HImode:
5972    case V4SImode:
5973    case V2DImode:
5974    case V4SFmode:
5975    case V2DFmode:
5976      if (!type || !AGGREGATE_TYPE_P (type))
5977	{
5978	  if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5979	    {
5980	      warnedsse = true;
5981	      warning (0, "SSE vector argument without SSE enabled "
5982		       "changes the ABI");
5983	    }
5984	  if (cum->sse_nregs)
5985	    return gen_reg_or_parallel (mode, orig_mode,
5986				        cum->sse_regno + FIRST_SSE_REG);
5987	}
5988      break;
5989
5990    case OImode:
5991      /* OImode shouldn't be used directly.  */
5992      gcc_unreachable ();
5993
5994    case V8SFmode:
5995    case V8SImode:
5996    case V32QImode:
5997    case V16HImode:
5998    case V4DFmode:
5999    case V4DImode:
6000      if (!type || !AGGREGATE_TYPE_P (type))
6001	{
6002	  if (cum->sse_nregs)
6003	    return gen_reg_or_parallel (mode, orig_mode,
6004				        cum->sse_regno + FIRST_SSE_REG);
6005	}
6006      break;
6007
6008    case V8QImode:
6009    case V4HImode:
6010    case V2SImode:
6011    case V2SFmode:
6012    case V1TImode:
6013    case V1DImode:
6014      if (!type || !AGGREGATE_TYPE_P (type))
6015	{
6016	  if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6017	    {
6018	      warnedmmx = true;
6019	      warning (0, "MMX vector argument without MMX enabled "
6020		       "changes the ABI");
6021	    }
6022	  if (cum->mmx_nregs)
6023	    return gen_reg_or_parallel (mode, orig_mode,
6024				        cum->mmx_regno + FIRST_MMX_REG);
6025	}
6026      break;
6027    }
6028
6029  return NULL_RTX;
6030}
6031
6032static rtx
6033function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6034		 enum machine_mode orig_mode, tree type, int named)
6035{
6036  /* Handle a hidden AL argument containing number of registers
6037     for varargs x86-64 functions.  */
6038  if (mode == VOIDmode)
6039    return GEN_INT (cum->maybe_vaarg
6040		    ? (cum->sse_nregs < 0
6041		       ? X86_64_SSE_REGPARM_MAX
6042		       : cum->sse_regno)
6043		    : -1);
6044
6045  switch (mode)
6046    {
6047    default:
6048      break;
6049
6050    case V8SFmode:
6051    case V8SImode:
6052    case V32QImode:
6053    case V16HImode:
6054    case V4DFmode:
6055    case V4DImode:
6056      /* Unnamed 256bit vector mode parameters are passed on stack.  */
6057      if (!named)
6058	return NULL;
6059      break;
6060    }
6061
6062  return construct_container (mode, orig_mode, type, 0, cum->nregs,
6063			      cum->sse_nregs,
6064			      &x86_64_int_parameter_registers [cum->regno],
6065			      cum->sse_regno);
6066}
6067
6068static rtx
6069function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6070		    enum machine_mode orig_mode, int named,
6071		    HOST_WIDE_INT bytes)
6072{
6073  unsigned int regno;
6074
6075  /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6076     We use value of -2 to specify that current function call is MSABI.  */
6077  if (mode == VOIDmode)
6078    return GEN_INT (-2);
6079
6080  /* If we've run out of registers, it goes on the stack.  */
6081  if (cum->nregs == 0)
6082    return NULL_RTX;
6083
6084  regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6085
6086  /* Only floating point modes are passed in anything but integer regs.  */
6087  if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6088    {
6089      if (named)
6090	regno = cum->regno + FIRST_SSE_REG;
6091      else
6092	{
6093	  rtx t1, t2;
6094
6095	  /* Unnamed floating parameters are passed in both the
6096	     SSE and integer registers.  */
6097	  t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6098	  t2 = gen_rtx_REG (mode, regno);
6099	  t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6100	  t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6101	  return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6102	}
6103    }
6104  /* Handle aggregated types passed in register.  */
6105  if (orig_mode == BLKmode)
6106    {
6107      if (bytes > 0 && bytes <= 8)
6108        mode = (bytes > 4 ? DImode : SImode);
6109      if (mode == BLKmode)
6110        mode = DImode;
6111    }
6112
6113  return gen_reg_or_parallel (mode, orig_mode, regno);
6114}
6115
6116rtx
6117function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6118	      tree type, int named)
6119{
6120  enum machine_mode mode = omode;
6121  HOST_WIDE_INT bytes, words;
6122
6123  if (mode == BLKmode)
6124    bytes = int_size_in_bytes (type);
6125  else
6126    bytes = GET_MODE_SIZE (mode);
6127  words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6128
6129  /* To simplify the code below, represent vector types with a vector mode
6130     even if MMX/SSE are not active.  */
6131  if (type && TREE_CODE (type) == VECTOR_TYPE)
6132    mode = type_natural_mode (type, cum);
6133
6134  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6135    return function_arg_ms_64 (cum, mode, omode, named, bytes);
6136  else if (TARGET_64BIT)
6137    return function_arg_64 (cum, mode, omode, type, named);
6138  else
6139    return function_arg_32 (cum, mode, omode, type, bytes, words);
6140}
6141
6142/* A C expression that indicates when an argument must be passed by
6143   reference.  If nonzero for an argument, a copy of that argument is
6144   made in memory and a pointer to the argument is passed instead of
6145   the argument itself.  The pointer is passed in whatever way is
6146   appropriate for passing a pointer to that type.  */
6147
6148static bool
6149ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6150			enum machine_mode mode ATTRIBUTE_UNUSED,
6151			const_tree type, bool named ATTRIBUTE_UNUSED)
6152{
6153  /* See Windows x64 Software Convention.  */
6154  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6155    {
6156      int msize = (int) GET_MODE_SIZE (mode);
6157      if (type)
6158	{
6159	  /* Arrays are passed by reference.  */
6160	  if (TREE_CODE (type) == ARRAY_TYPE)
6161	    return true;
6162
6163	  if (AGGREGATE_TYPE_P (type))
6164	    {
6165	      /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6166	         are passed by reference.  */
6167	      msize = int_size_in_bytes (type);
6168	    }
6169	}
6170
6171      /* __m128 is passed by reference.  */
6172      switch (msize) {
6173      case 1: case 2: case 4: case 8:
6174        break;
6175      default:
6176        return true;
6177      }
6178    }
6179  else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6180    return 1;
6181
6182  return 0;
6183}
6184
6185/* Return true when TYPE should be 128bit aligned for 32bit argument passing
6186   ABI.  */
6187static bool
6188contains_aligned_value_p (const_tree type)
6189{
6190  enum machine_mode mode = TYPE_MODE (type);
6191  if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6192       || mode == TDmode
6193       || mode == TFmode
6194       || mode == TCmode)
6195      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6196    return true;
6197  if (TYPE_ALIGN (type) < 128)
6198    return false;
6199
6200  if (AGGREGATE_TYPE_P (type))
6201    {
6202      /* Walk the aggregates recursively.  */
6203      switch (TREE_CODE (type))
6204	{
6205	case RECORD_TYPE:
6206	case UNION_TYPE:
6207	case QUAL_UNION_TYPE:
6208	  {
6209	    tree field;
6210
6211	    /* Walk all the structure fields.  */
6212	    for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6213	      {
6214		if (TREE_CODE (field) == FIELD_DECL
6215		    && contains_aligned_value_p (TREE_TYPE (field)))
6216		  return true;
6217	      }
6218	    break;
6219	  }
6220
6221	case ARRAY_TYPE:
6222	  /* Just for use if some languages passes arrays by value.  */
6223	  if (contains_aligned_value_p (TREE_TYPE (type)))
6224	    return true;
6225	  break;
6226
6227	default:
6228	  gcc_unreachable ();
6229	}
6230    }
6231  return false;
6232}
6233
6234/* Gives the alignment boundary, in bits, of an argument with the
6235   specified mode and type.  */
6236
6237int
6238ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6239{
6240  int align;
6241  if (type)
6242    {
6243      /* Since canonical type is used for call, we convert it to
6244	 canonical type if needed.  */
6245      if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6246	type = TYPE_CANONICAL (type);
6247      align = TYPE_ALIGN (type);
6248    }
6249  else
6250    align = GET_MODE_ALIGNMENT (mode);
6251  if (align < PARM_BOUNDARY)
6252    align = PARM_BOUNDARY;
6253  /* In 32bit, only _Decimal128 and __float128 are aligned to their
6254     natural boundaries.  */
6255  if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6256    {
6257      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
6258	 make an exception for SSE modes since these require 128bit
6259	 alignment.
6260
6261	 The handling here differs from field_alignment.  ICC aligns MMX
6262	 arguments to 4 byte boundaries, while structure fields are aligned
6263	 to 8 byte boundaries.  */
6264      if (!type)
6265	{
6266	  if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6267	    align = PARM_BOUNDARY;
6268	}
6269      else
6270	{
6271	  if (!contains_aligned_value_p (type))
6272	    align = PARM_BOUNDARY;
6273	}
6274    }
6275  if (align > BIGGEST_ALIGNMENT)
6276    align = BIGGEST_ALIGNMENT;
6277  return align;
6278}
6279
6280/* Return true if N is a possible register number of function value.  */
6281
6282bool
6283ix86_function_value_regno_p (int regno)
6284{
6285  switch (regno)
6286    {
6287    case 0:
6288      return true;
6289
6290    case FIRST_FLOAT_REG:
6291      /* TODO: The function should depend on current function ABI but
6292       builtins.c would need updating then. Therefore we use the
6293       default ABI.  */
6294      if (TARGET_64BIT && ix86_abi == MS_ABI)
6295	return false;
6296      return TARGET_FLOAT_RETURNS_IN_80387;
6297
6298    case FIRST_SSE_REG:
6299      return TARGET_SSE;
6300
6301    case FIRST_MMX_REG:
6302      if (TARGET_MACHO || TARGET_64BIT)
6303	return false;
6304      return TARGET_MMX;
6305    }
6306
6307  return false;
6308}
6309
6310/* Define how to find the value returned by a function.
6311   VALTYPE is the data type of the value (as a tree).
6312   If the precise function being called is known, FUNC is its FUNCTION_DECL;
6313   otherwise, FUNC is 0.  */
6314
6315static rtx
6316function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6317		   const_tree fntype, const_tree fn)
6318{
6319  unsigned int regno;
6320
6321  /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6322     we normally prevent this case when mmx is not available.  However
6323     some ABIs may require the result to be returned like DImode.  */
6324  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6325    regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6326
6327  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
6328     we prevent this case when sse is not available.  However some ABIs
6329     may require the result to be returned like integer TImode.  */
6330  else if (mode == TImode
6331	   || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6332    regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6333
6334  /* 32-byte vector modes in %ymm0.   */
6335  else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6336    regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6337
6338  /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
6339  else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6340    regno = FIRST_FLOAT_REG;
6341  else
6342    /* Most things go in %eax.  */
6343    regno = AX_REG;
6344
6345  /* Override FP return register with %xmm0 for local functions when
6346     SSE math is enabled or for functions with sseregparm attribute.  */
6347  if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6348    {
6349      int sse_level = ix86_function_sseregparm (fntype, fn, false);
6350      if ((sse_level >= 1 && mode == SFmode)
6351	  || (sse_level == 2 && mode == DFmode))
6352	regno = FIRST_SSE_REG;
6353    }
6354
6355  /* OImode shouldn't be used directly.  */
6356  gcc_assert (mode != OImode);
6357
6358  return gen_rtx_REG (orig_mode, regno);
6359}
6360
6361static rtx
6362function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6363		   const_tree valtype)
6364{
6365  rtx ret;
6366
6367  /* Handle libcalls, which don't provide a type node.  */
6368  if (valtype == NULL)
6369    {
6370      switch (mode)
6371	{
6372	case SFmode:
6373	case SCmode:
6374	case DFmode:
6375	case DCmode:
6376	case TFmode:
6377	case SDmode:
6378	case DDmode:
6379	case TDmode:
6380	  return gen_rtx_REG (mode, FIRST_SSE_REG);
6381	case XFmode:
6382	case XCmode:
6383	  return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6384	case TCmode:
6385	  return NULL;
6386	default:
6387	  return gen_rtx_REG (mode, AX_REG);
6388	}
6389    }
6390
6391  ret = construct_container (mode, orig_mode, valtype, 1,
6392			     X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6393			     x86_64_int_return_registers, 0);
6394
6395  /* For zero sized structures, construct_container returns NULL, but we
6396     need to keep rest of compiler happy by returning meaningful value.  */
6397  if (!ret)
6398    ret = gen_rtx_REG (orig_mode, AX_REG);
6399
6400  return ret;
6401}
6402
6403static rtx
6404function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6405{
6406  unsigned int regno = AX_REG;
6407
6408  if (TARGET_SSE)
6409    {
6410      switch (GET_MODE_SIZE (mode))
6411        {
6412        case 16:
6413          if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6414	     && !COMPLEX_MODE_P (mode))
6415	    regno = FIRST_SSE_REG;
6416	  break;
6417	case 8:
6418	case 4:
6419	  if (mode == SFmode || mode == DFmode)
6420	    regno = FIRST_SSE_REG;
6421	  break;
6422	default:
6423	  break;
6424        }
6425    }
6426  return gen_rtx_REG (orig_mode, regno);
6427}
6428
6429static rtx
6430ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6431		       enum machine_mode orig_mode, enum machine_mode mode)
6432{
6433  const_tree fn, fntype;
6434
6435  fn = NULL_TREE;
6436  if (fntype_or_decl && DECL_P (fntype_or_decl))
6437    fn = fntype_or_decl;
6438  fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6439
6440  if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6441    return function_value_ms_64 (orig_mode, mode);
6442  else if (TARGET_64BIT)
6443    return function_value_64 (orig_mode, mode, valtype);
6444  else
6445    return function_value_32 (orig_mode, mode, fntype, fn);
6446}
6447
6448static rtx
6449ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6450		     bool outgoing ATTRIBUTE_UNUSED)
6451{
6452  enum machine_mode mode, orig_mode;
6453
6454  orig_mode = TYPE_MODE (valtype);
6455  mode = type_natural_mode (valtype, NULL);
6456  return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6457}
6458
6459rtx
6460ix86_libcall_value (enum machine_mode mode)
6461{
6462  return ix86_function_value_1 (NULL, NULL, mode, mode);
6463}
6464
6465/* Return true iff type is returned in memory.  */
6466
6467static int ATTRIBUTE_UNUSED
6468return_in_memory_32 (const_tree type, enum machine_mode mode)
6469{
6470  HOST_WIDE_INT size;
6471
6472  if (mode == BLKmode)
6473    return 1;
6474
6475  size = int_size_in_bytes (type);
6476
6477  if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6478    return 0;
6479
6480  if (VECTOR_MODE_P (mode) || mode == TImode)
6481    {
6482      /* User-created vectors small enough to fit in EAX.  */
6483      if (size < 8)
6484	return 0;
6485
6486      /* MMX/3dNow values are returned in MM0,
6487	 except when it doesn't exits.  */
6488      if (size == 8)
6489	return (TARGET_MMX ? 0 : 1);
6490
6491      /* SSE values are returned in XMM0, except when it doesn't exist.  */
6492      if (size == 16)
6493	return (TARGET_SSE ? 0 : 1);
6494
6495      /* AVX values are returned in YMM0, except when it doesn't exist.  */
6496      if (size == 32)
6497	return TARGET_AVX ? 0 : 1;
6498    }
6499
6500  if (mode == XFmode)
6501    return 0;
6502
6503  if (size > 12)
6504    return 1;
6505
6506  /* OImode shouldn't be used directly.  */
6507  gcc_assert (mode != OImode);
6508
6509  return 0;
6510}
6511
6512static int ATTRIBUTE_UNUSED
6513return_in_memory_64 (const_tree type, enum machine_mode mode)
6514{
6515  int needed_intregs, needed_sseregs;
6516  return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6517}
6518
6519static int ATTRIBUTE_UNUSED
6520return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6521{
6522  HOST_WIDE_INT size = int_size_in_bytes (type);
6523
6524  /* __m128 is returned in xmm0.  */
6525  if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6526      && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6527    return 0;
6528
6529  /* Otherwise, the size must be exactly in [1248]. */
6530  return (size != 1 && size != 2 && size != 4 && size != 8);
6531}
6532
6533static bool
6534ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6535{
6536#ifdef SUBTARGET_RETURN_IN_MEMORY
6537  return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6538#else
6539  const enum machine_mode mode = type_natural_mode (type, NULL);
6540
6541  if (TARGET_64BIT)
6542    {
6543      if (ix86_function_type_abi (fntype) == MS_ABI)
6544	return return_in_memory_ms_64 (type, mode);
6545      else
6546	return return_in_memory_64 (type, mode);
6547    }
6548  else
6549    return return_in_memory_32 (type, mode);
6550#endif
6551}
6552
6553/* Return false iff TYPE is returned in memory.  This version is used
6554   on Solaris 2.  It is similar to the generic ix86_return_in_memory,
6555   but differs notably in that when MMX is available, 8-byte vectors
6556   are returned in memory, rather than in MMX registers.  */
6557
6558bool
6559ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6560{
6561  int size;
6562  enum machine_mode mode = type_natural_mode (type, NULL);
6563
6564  if (TARGET_64BIT)
6565    return return_in_memory_64 (type, mode);
6566
6567  if (mode == BLKmode)
6568    return 1;
6569
6570  size = int_size_in_bytes (type);
6571
6572  if (VECTOR_MODE_P (mode))
6573    {
6574      /* Return in memory only if MMX registers *are* available.  This
6575	 seems backwards, but it is consistent with the existing
6576	 Solaris x86 ABI.  */
6577      if (size == 8)
6578	return TARGET_MMX;
6579      if (size == 16)
6580	return !TARGET_SSE;
6581    }
6582  else if (mode == TImode)
6583    return !TARGET_SSE;
6584  else if (mode == XFmode)
6585    return 0;
6586
6587  return size > 12;
6588}
6589
6590/* When returning SSE vector types, we have a choice of either
6591     (1) being abi incompatible with a -march switch, or
6592     (2) generating an error.
6593   Given no good solution, I think the safest thing is one warning.
6594   The user won't be able to use -Werror, but....
6595
6596   Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6597   called in response to actually generating a caller or callee that
6598   uses such a type.  As opposed to TARGET_RETURN_IN_MEMORY, which is called
6599   via aggregate_value_p for general type probing from tree-ssa.  */
6600
6601static rtx
6602ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6603{
6604  static bool warnedsse, warnedmmx;
6605
6606  if (!TARGET_64BIT && type)
6607    {
6608      /* Look at the return type of the function, not the function type.  */
6609      enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6610
6611      if (!TARGET_SSE && !warnedsse)
6612	{
6613	  if (mode == TImode
6614	      || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6615	    {
6616	      warnedsse = true;
6617	      warning (0, "SSE vector return without SSE enabled "
6618		       "changes the ABI");
6619	    }
6620	}
6621
6622      if (!TARGET_MMX && !warnedmmx)
6623	{
6624	  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6625	    {
6626	      warnedmmx = true;
6627	      warning (0, "MMX vector return without MMX enabled "
6628		       "changes the ABI");
6629	    }
6630	}
6631    }
6632
6633  return NULL;
6634}
6635
6636
6637/* Create the va_list data type.  */
6638
6639/* Returns the calling convention specific va_list date type.
6640   The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI.  */
6641
6642static tree
6643ix86_build_builtin_va_list_abi (enum calling_abi abi)
6644{
6645  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6646
6647  /* For i386 we use plain pointer to argument area.  */
6648  if (!TARGET_64BIT || abi == MS_ABI)
6649    return build_pointer_type (char_type_node);
6650
6651  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6652  type_decl = build_decl (BUILTINS_LOCATION,
6653			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
6654
6655  f_gpr = build_decl (BUILTINS_LOCATION,
6656		      FIELD_DECL, get_identifier ("gp_offset"),
6657		      unsigned_type_node);
6658  f_fpr = build_decl (BUILTINS_LOCATION,
6659		      FIELD_DECL, get_identifier ("fp_offset"),
6660		      unsigned_type_node);
6661  f_ovf = build_decl (BUILTINS_LOCATION,
6662		      FIELD_DECL, get_identifier ("overflow_arg_area"),
6663		      ptr_type_node);
6664  f_sav = build_decl (BUILTINS_LOCATION,
6665		      FIELD_DECL, get_identifier ("reg_save_area"),
6666		      ptr_type_node);
6667
6668  va_list_gpr_counter_field = f_gpr;
6669  va_list_fpr_counter_field = f_fpr;
6670
6671  DECL_FIELD_CONTEXT (f_gpr) = record;
6672  DECL_FIELD_CONTEXT (f_fpr) = record;
6673  DECL_FIELD_CONTEXT (f_ovf) = record;
6674  DECL_FIELD_CONTEXT (f_sav) = record;
6675
6676  TREE_CHAIN (record) = type_decl;
6677  TYPE_NAME (record) = type_decl;
6678  TYPE_FIELDS (record) = f_gpr;
6679  TREE_CHAIN (f_gpr) = f_fpr;
6680  TREE_CHAIN (f_fpr) = f_ovf;
6681  TREE_CHAIN (f_ovf) = f_sav;
6682
6683  layout_type (record);
6684
6685  /* The correct type is an array type of one element.  */
6686  return build_array_type (record, build_index_type (size_zero_node));
6687}
6688
6689/* Setup the builtin va_list data type and for 64-bit the additional
6690   calling convention specific va_list data types.  */
6691
6692static tree
6693ix86_build_builtin_va_list (void)
6694{
6695  tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6696
6697  /* Initialize abi specific va_list builtin types.  */
6698  if (TARGET_64BIT)
6699    {
6700      tree t;
6701      if (ix86_abi == MS_ABI)
6702        {
6703          t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6704          if (TREE_CODE (t) != RECORD_TYPE)
6705            t = build_variant_type_copy (t);
6706          sysv_va_list_type_node = t;
6707        }
6708      else
6709        {
6710          t = ret;
6711          if (TREE_CODE (t) != RECORD_TYPE)
6712            t = build_variant_type_copy (t);
6713          sysv_va_list_type_node = t;
6714        }
6715      if (ix86_abi != MS_ABI)
6716        {
6717          t = ix86_build_builtin_va_list_abi (MS_ABI);
6718          if (TREE_CODE (t) != RECORD_TYPE)
6719            t = build_variant_type_copy (t);
6720          ms_va_list_type_node = t;
6721        }
6722      else
6723        {
6724          t = ret;
6725          if (TREE_CODE (t) != RECORD_TYPE)
6726            t = build_variant_type_copy (t);
6727          ms_va_list_type_node = t;
6728        }
6729    }
6730
6731  return ret;
6732}
6733
6734/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
6735
6736static void
6737setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6738{
6739  rtx save_area, mem;
6740  rtx label;
6741  rtx label_ref;
6742  rtx tmp_reg;
6743  rtx nsse_reg;
6744  alias_set_type set;
6745  int i;
6746
6747  /* GPR size of varargs save area.  */
6748  if (cfun->va_list_gpr_size)
6749    ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6750  else
6751    ix86_varargs_gpr_size = 0;
6752
6753  /* FPR size of varargs save area.  We don't need it if we don't pass
6754     anything in SSE registers.  */
6755  if (cum->sse_nregs && cfun->va_list_fpr_size)
6756    ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6757  else
6758    ix86_varargs_fpr_size = 0;
6759
6760  if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6761    return;
6762
6763  save_area = frame_pointer_rtx;
6764  set = get_varargs_alias_set ();
6765
6766  for (i = cum->regno;
6767       i < X86_64_REGPARM_MAX
6768       && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6769       i++)
6770    {
6771      mem = gen_rtx_MEM (Pmode,
6772			 plus_constant (save_area, i * UNITS_PER_WORD));
6773      MEM_NOTRAP_P (mem) = 1;
6774      set_mem_alias_set (mem, set);
6775      emit_move_insn (mem, gen_rtx_REG (Pmode,
6776					x86_64_int_parameter_registers[i]));
6777    }
6778
6779  if (ix86_varargs_fpr_size)
6780    {
6781      /* Stack must be aligned to 16byte for FP register save area.  */
6782      if (crtl->stack_alignment_needed < 128)
6783	crtl->stack_alignment_needed = 128;
6784
6785      /* Now emit code to save SSE registers.  The AX parameter contains number
6786	 of SSE parameter registers used to call this function.  We use
6787	 sse_prologue_save insn template that produces computed jump across
6788	 SSE saves.  We need some preparation work to get this working.  */
6789
6790      label = gen_label_rtx ();
6791      label_ref = gen_rtx_LABEL_REF (Pmode, label);
6792
6793      /* Compute address to jump to :
6794         label - eax*4 + nnamed_sse_arguments*4 Or
6795         label - eax*5 + nnamed_sse_arguments*5 for AVX.  */
6796      tmp_reg = gen_reg_rtx (Pmode);
6797      nsse_reg = gen_reg_rtx (Pmode);
6798      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6799      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6800			      gen_rtx_MULT (Pmode, nsse_reg,
6801					    GEN_INT (4))));
6802
6803      /* vmovaps is one byte longer than movaps.  */
6804      if (TARGET_AVX)
6805	emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6806				gen_rtx_PLUS (Pmode, tmp_reg,
6807					      nsse_reg)));
6808
6809      if (cum->sse_regno)
6810	emit_move_insn
6811	  (nsse_reg,
6812	   gen_rtx_CONST (DImode,
6813			  gen_rtx_PLUS (DImode,
6814					label_ref,
6815					GEN_INT (cum->sse_regno
6816						 * (TARGET_AVX ? 5 : 4)))));
6817      else
6818	emit_move_insn (nsse_reg, label_ref);
6819      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6820
6821      /* Compute address of memory block we save into.  We always use pointer
6822	 pointing 127 bytes after first byte to store - this is needed to keep
6823	 instruction size limited by 4 bytes (5 bytes for AVX) with one
6824	 byte displacement.  */
6825      tmp_reg = gen_reg_rtx (Pmode);
6826      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6827			      plus_constant (save_area,
6828					     ix86_varargs_gpr_size + 127)));
6829      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6830      MEM_NOTRAP_P (mem) = 1;
6831      set_mem_alias_set (mem, set);
6832      set_mem_align (mem, BITS_PER_WORD);
6833
6834      /* And finally do the dirty job!  */
6835      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6836					GEN_INT (cum->sse_regno), label));
6837    }
6838}
6839
6840static void
6841setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6842{
6843  alias_set_type set = get_varargs_alias_set ();
6844  int i;
6845
6846  for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
6847    {
6848      rtx reg, mem;
6849
6850      mem = gen_rtx_MEM (Pmode,
6851			 plus_constant (virtual_incoming_args_rtx,
6852					i * UNITS_PER_WORD));
6853      MEM_NOTRAP_P (mem) = 1;
6854      set_mem_alias_set (mem, set);
6855
6856      reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6857      emit_move_insn (mem, reg);
6858    }
6859}
6860
6861static void
6862ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6863			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
6864			     int no_rtl)
6865{
6866  CUMULATIVE_ARGS next_cum;
6867  tree fntype;
6868
6869  /* This argument doesn't appear to be used anymore.  Which is good,
6870     because the old code here didn't suppress rtl generation.  */
6871  gcc_assert (!no_rtl);
6872
6873  if (!TARGET_64BIT)
6874    return;
6875
6876  fntype = TREE_TYPE (current_function_decl);
6877
6878  /* For varargs, we do not want to skip the dummy va_dcl argument.
6879     For stdargs, we do want to skip the last named argument.  */
6880  next_cum = *cum;
6881  if (stdarg_p (fntype))
6882    function_arg_advance (&next_cum, mode, type, 1);
6883
6884  if (cum->call_abi == MS_ABI)
6885    setup_incoming_varargs_ms_64 (&next_cum);
6886  else
6887    setup_incoming_varargs_64 (&next_cum);
6888}
6889
6890/* Checks if TYPE is of kind va_list char *.  */
6891
6892static bool
6893is_va_list_char_pointer (tree type)
6894{
6895  tree canonic;
6896
6897  /* For 32-bit it is always true.  */
6898  if (!TARGET_64BIT)
6899    return true;
6900  canonic = ix86_canonical_va_list_type (type);
6901  return (canonic == ms_va_list_type_node
6902          || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6903}
6904
6905/* Implement va_start.  */
6906
6907static void
6908ix86_va_start (tree valist, rtx nextarg)
6909{
6910  HOST_WIDE_INT words, n_gpr, n_fpr;
6911  tree f_gpr, f_fpr, f_ovf, f_sav;
6912  tree gpr, fpr, ovf, sav, t;
6913  tree type;
6914
6915  /* Only 64bit target needs something special.  */
6916  if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6917    {
6918      std_expand_builtin_va_start (valist, nextarg);
6919      return;
6920    }
6921
6922  f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6923  f_fpr = TREE_CHAIN (f_gpr);
6924  f_ovf = TREE_CHAIN (f_fpr);
6925  f_sav = TREE_CHAIN (f_ovf);
6926
6927  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6928  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6929  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6930  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6931  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6932
6933  /* Count number of gp and fp argument registers used.  */
6934  words = crtl->args.info.words;
6935  n_gpr = crtl->args.info.regno;
6936  n_fpr = crtl->args.info.sse_regno;
6937
6938  if (cfun->va_list_gpr_size)
6939    {
6940      type = TREE_TYPE (gpr);
6941      t = build2 (MODIFY_EXPR, type,
6942		  gpr, build_int_cst (type, n_gpr * 8));
6943      TREE_SIDE_EFFECTS (t) = 1;
6944      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6945    }
6946
6947  if (TARGET_SSE && cfun->va_list_fpr_size)
6948    {
6949      type = TREE_TYPE (fpr);
6950      t = build2 (MODIFY_EXPR, type, fpr,
6951		  build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6952      TREE_SIDE_EFFECTS (t) = 1;
6953      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6954    }
6955
6956  /* Find the overflow area.  */
6957  type = TREE_TYPE (ovf);
6958  t = make_tree (type, crtl->args.internal_arg_pointer);
6959  if (words != 0)
6960    t = build2 (POINTER_PLUS_EXPR, type, t,
6961	        size_int (words * UNITS_PER_WORD));
6962  t = build2 (MODIFY_EXPR, type, ovf, t);
6963  TREE_SIDE_EFFECTS (t) = 1;
6964  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6965
6966  if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6967    {
6968      /* Find the register save area.
6969	 Prologue of the function save it right above stack frame.  */
6970      type = TREE_TYPE (sav);
6971      t = make_tree (type, frame_pointer_rtx);
6972      if (!ix86_varargs_gpr_size)
6973	t = build2 (POINTER_PLUS_EXPR, type, t,
6974		    size_int (-8 * X86_64_REGPARM_MAX));
6975      t = build2 (MODIFY_EXPR, type, sav, t);
6976      TREE_SIDE_EFFECTS (t) = 1;
6977      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6978    }
6979}
6980
6981/* Implement va_arg.  */
6982
6983static tree
6984ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6985		      gimple_seq *post_p)
6986{
6987  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6988  tree f_gpr, f_fpr, f_ovf, f_sav;
6989  tree gpr, fpr, ovf, sav, t;
6990  int size, rsize;
6991  tree lab_false, lab_over = NULL_TREE;
6992  tree addr, t2;
6993  rtx container;
6994  int indirect_p = 0;
6995  tree ptrtype;
6996  enum machine_mode nat_mode;
6997  int arg_boundary;
6998
6999  /* Only 64bit target needs something special.  */
7000  if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7001    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7002
7003  f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7004  f_fpr = TREE_CHAIN (f_gpr);
7005  f_ovf = TREE_CHAIN (f_fpr);
7006  f_sav = TREE_CHAIN (f_ovf);
7007
7008  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7009		build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7010  valist = build_va_arg_indirect_ref (valist);
7011  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7012  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7013  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7014
7015  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7016  if (indirect_p)
7017    type = build_pointer_type (type);
7018  size = int_size_in_bytes (type);
7019  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7020
7021  nat_mode = type_natural_mode (type, NULL);
7022  switch (nat_mode)
7023    {
7024    case V8SFmode:
7025    case V8SImode:
7026    case V32QImode:
7027    case V16HImode:
7028    case V4DFmode:
7029    case V4DImode:
7030      /* Unnamed 256bit vector mode parameters are passed on stack.  */
7031      if (ix86_cfun_abi () == SYSV_ABI)
7032	{
7033	  container = NULL;
7034	  break;
7035	}
7036
7037    default:
7038      container = construct_container (nat_mode, TYPE_MODE (type),
7039				       type, 0, X86_64_REGPARM_MAX,
7040				       X86_64_SSE_REGPARM_MAX, intreg,
7041				       0);
7042      break;
7043    }
7044
7045  /* Pull the value out of the saved registers.  */
7046
7047  addr = create_tmp_var (ptr_type_node, "addr");
7048
7049  if (container)
7050    {
7051      int needed_intregs, needed_sseregs;
7052      bool need_temp;
7053      tree int_addr, sse_addr;
7054
7055      lab_false = create_artificial_label (UNKNOWN_LOCATION);
7056      lab_over = create_artificial_label (UNKNOWN_LOCATION);
7057
7058      examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7059
7060      need_temp = (!REG_P (container)
7061		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
7062		       || TYPE_ALIGN (type) > 128));
7063
7064      /* In case we are passing structure, verify that it is consecutive block
7065         on the register save area.  If not we need to do moves.  */
7066      if (!need_temp && !REG_P (container))
7067	{
7068	  /* Verify that all registers are strictly consecutive  */
7069	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7070	    {
7071	      int i;
7072
7073	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7074		{
7075		  rtx slot = XVECEXP (container, 0, i);
7076		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7077		      || INTVAL (XEXP (slot, 1)) != i * 16)
7078		    need_temp = 1;
7079		}
7080	    }
7081	  else
7082	    {
7083	      int i;
7084
7085	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7086		{
7087		  rtx slot = XVECEXP (container, 0, i);
7088		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7089		      || INTVAL (XEXP (slot, 1)) != i * 8)
7090		    need_temp = 1;
7091		}
7092	    }
7093	}
7094      if (!need_temp)
7095	{
7096	  int_addr = addr;
7097	  sse_addr = addr;
7098	}
7099      else
7100	{
7101	  int_addr = create_tmp_var (ptr_type_node, "int_addr");
7102	  sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7103	}
7104
7105      /* First ensure that we fit completely in registers.  */
7106      if (needed_intregs)
7107	{
7108	  t = build_int_cst (TREE_TYPE (gpr),
7109			     (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7110	  t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7111	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7112	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7113	  gimplify_and_add (t, pre_p);
7114	}
7115      if (needed_sseregs)
7116	{
7117	  t = build_int_cst (TREE_TYPE (fpr),
7118			     (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7119			     + X86_64_REGPARM_MAX * 8);
7120	  t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7121	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7122	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7123	  gimplify_and_add (t, pre_p);
7124	}
7125
7126      /* Compute index to start of area used for integer regs.  */
7127      if (needed_intregs)
7128	{
7129	  /* int_addr = gpr + sav; */
7130	  t = fold_convert (sizetype, gpr);
7131	  t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7132	  gimplify_assign (int_addr, t, pre_p);
7133	}
7134      if (needed_sseregs)
7135	{
7136	  /* sse_addr = fpr + sav; */
7137	  t = fold_convert (sizetype, fpr);
7138	  t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7139	  gimplify_assign (sse_addr, t, pre_p);
7140	}
7141      if (need_temp)
7142	{
7143	  int i, prev_size = 0;
7144	  tree temp = create_tmp_var (type, "va_arg_tmp");
7145
7146	  /* addr = &temp; */
7147	  t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7148	  gimplify_assign (addr, t, pre_p);
7149
7150	  for (i = 0; i < XVECLEN (container, 0); i++)
7151	    {
7152	      rtx slot = XVECEXP (container, 0, i);
7153	      rtx reg = XEXP (slot, 0);
7154	      enum machine_mode mode = GET_MODE (reg);
7155	      tree piece_type;
7156	      tree addr_type;
7157	      tree daddr_type;
7158	      tree src_addr, src;
7159	      int src_offset;
7160	      tree dest_addr, dest;
7161	      int cur_size = GET_MODE_SIZE (mode);
7162
7163	      gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7164	      prev_size = INTVAL (XEXP (slot, 1));
7165	      if (prev_size + cur_size > size)
7166		{
7167		  cur_size = size - prev_size;
7168		  mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7169		  if (mode == BLKmode)
7170		    mode = QImode;
7171		}
7172	      piece_type = lang_hooks.types.type_for_mode (mode, 1);
7173	      if (mode == GET_MODE (reg))
7174		addr_type = build_pointer_type (piece_type);
7175	      else
7176		addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7177							 true);
7178	      daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7179							true);
7180
7181	      if (SSE_REGNO_P (REGNO (reg)))
7182		{
7183		  src_addr = sse_addr;
7184		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7185		}
7186	      else
7187		{
7188		  src_addr = int_addr;
7189		  src_offset = REGNO (reg) * 8;
7190		}
7191	      src_addr = fold_convert (addr_type, src_addr);
7192	      src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7193				      size_int (src_offset));
7194
7195	      dest_addr = fold_convert (daddr_type, addr);
7196	      dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7197				       size_int (prev_size));
7198	      if (cur_size == GET_MODE_SIZE (mode))
7199		{
7200		  src = build_va_arg_indirect_ref (src_addr);
7201		  dest = build_va_arg_indirect_ref (dest_addr);
7202
7203		  gimplify_assign (dest, src, pre_p);
7204		}
7205	      else
7206		{
7207		  tree copy
7208		    = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7209				       3, dest_addr, src_addr,
7210				       size_int (cur_size));
7211		  gimplify_and_add (copy, pre_p);
7212		}
7213	      prev_size += cur_size;
7214	    }
7215	}
7216
7217      if (needed_intregs)
7218	{
7219	  t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7220		      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7221	  gimplify_assign (gpr, t, pre_p);
7222	}
7223
7224      if (needed_sseregs)
7225	{
7226	  t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7227		      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7228	  gimplify_assign (fpr, t, pre_p);
7229	}
7230
7231      gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7232
7233      gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7234    }
7235
7236  /* ... otherwise out of the overflow area.  */
7237
7238  /* When we align parameter on stack for caller, if the parameter
7239     alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7240     aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
7241     here with caller.  */
7242  arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7243  if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7244    arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7245
7246  /* Care for on-stack alignment if needed.  */
7247  if (arg_boundary <= 64
7248      || integer_zerop (TYPE_SIZE (type)))
7249    t = ovf;
7250 else
7251    {
7252      HOST_WIDE_INT align = arg_boundary / 8;
7253      t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7254		  size_int (align - 1));
7255      t = fold_convert (sizetype, t);
7256      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7257		  size_int (-align));
7258      t = fold_convert (TREE_TYPE (ovf), t);
7259    }
7260  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7261  gimplify_assign (addr, t, pre_p);
7262
7263  t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7264	      size_int (rsize * UNITS_PER_WORD));
7265  gimplify_assign (unshare_expr (ovf), t, pre_p);
7266
7267  if (container)
7268    gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7269
7270  ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7271  addr = fold_convert (ptrtype, addr);
7272
7273  if (indirect_p)
7274    addr = build_va_arg_indirect_ref (addr);
7275  return build_va_arg_indirect_ref (addr);
7276}
7277
7278/* Return nonzero if OPNUM's MEM should be matched
7279   in movabs* patterns.  */
7280
7281int
7282ix86_check_movabs (rtx insn, int opnum)
7283{
7284  rtx set, mem;
7285
7286  set = PATTERN (insn);
7287  if (GET_CODE (set) == PARALLEL)
7288    set = XVECEXP (set, 0, 0);
7289  gcc_assert (GET_CODE (set) == SET);
7290  mem = XEXP (set, opnum);
7291  while (GET_CODE (mem) == SUBREG)
7292    mem = SUBREG_REG (mem);
7293  gcc_assert (MEM_P (mem));
7294  return (volatile_ok || !MEM_VOLATILE_P (mem));
7295}
7296
7297/* Initialize the table of extra 80387 mathematical constants.  */
7298
7299static void
7300init_ext_80387_constants (void)
7301{
7302  static const char * cst[5] =
7303  {
7304    "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
7305    "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
7306    "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
7307    "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
7308    "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
7309  };
7310  int i;
7311
7312  for (i = 0; i < 5; i++)
7313    {
7314      real_from_string (&ext_80387_constants_table[i], cst[i]);
7315      /* Ensure each constant is rounded to XFmode precision.  */
7316      real_convert (&ext_80387_constants_table[i],
7317		    XFmode, &ext_80387_constants_table[i]);
7318    }
7319
7320  ext_80387_constants_init = 1;
7321}
7322
7323/* Return true if the constant is something that can be loaded with
7324   a special instruction.  */
7325
7326int
7327standard_80387_constant_p (rtx x)
7328{
7329  enum machine_mode mode = GET_MODE (x);
7330
7331  REAL_VALUE_TYPE r;
7332
7333  if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7334    return -1;
7335
7336  if (x == CONST0_RTX (mode))
7337    return 1;
7338  if (x == CONST1_RTX (mode))
7339    return 2;
7340
7341  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7342
7343  /* For XFmode constants, try to find a special 80387 instruction when
7344     optimizing for size or on those CPUs that benefit from them.  */
7345  if (mode == XFmode
7346      && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7347    {
7348      int i;
7349
7350      if (! ext_80387_constants_init)
7351	init_ext_80387_constants ();
7352
7353      for (i = 0; i < 5; i++)
7354        if (real_identical (&r, &ext_80387_constants_table[i]))
7355	  return i + 3;
7356    }
7357
7358  /* Load of the constant -0.0 or -1.0 will be split as
7359     fldz;fchs or fld1;fchs sequence.  */
7360  if (real_isnegzero (&r))
7361    return 8;
7362  if (real_identical (&r, &dconstm1))
7363    return 9;
7364
7365  return 0;
7366}
7367
7368/* Return the opcode of the special instruction to be used to load
7369   the constant X.  */
7370
7371const char *
7372standard_80387_constant_opcode (rtx x)
7373{
7374  switch (standard_80387_constant_p (x))
7375    {
7376    case 1:
7377      return "fldz";
7378    case 2:
7379      return "fld1";
7380    case 3:
7381      return "fldlg2";
7382    case 4:
7383      return "fldln2";
7384    case 5:
7385      return "fldl2e";
7386    case 6:
7387      return "fldl2t";
7388    case 7:
7389      return "fldpi";
7390    case 8:
7391    case 9:
7392      return "#";
7393    default:
7394      gcc_unreachable ();
7395    }
7396}
7397
7398/* Return the CONST_DOUBLE representing the 80387 constant that is
7399   loaded by the specified special instruction.  The argument IDX
7400   matches the return value from standard_80387_constant_p.  */
7401
7402rtx
7403standard_80387_constant_rtx (int idx)
7404{
7405  int i;
7406
7407  if (! ext_80387_constants_init)
7408    init_ext_80387_constants ();
7409
7410  switch (idx)
7411    {
7412    case 3:
7413    case 4:
7414    case 5:
7415    case 6:
7416    case 7:
7417      i = idx - 3;
7418      break;
7419
7420    default:
7421      gcc_unreachable ();
7422    }
7423
7424  return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7425				       XFmode);
7426}
7427
7428/* Return 1 if X is all 0s and 2 if x is all 1s
7429   in supported SSE vector mode.  */
7430
7431int
7432standard_sse_constant_p (rtx x)
7433{
7434  enum machine_mode mode = GET_MODE (x);
7435
7436  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7437    return 1;
7438  if (vector_all_ones_operand (x, mode))
7439    switch (mode)
7440      {
7441      case V16QImode:
7442      case V8HImode:
7443      case V4SImode:
7444      case V2DImode:
7445	if (TARGET_SSE2)
7446	  return 2;
7447      default:
7448	break;
7449      }
7450
7451  return 0;
7452}
7453
7454/* Return the opcode of the special instruction to be used to load
7455   the constant X.  */
7456
7457const char *
7458standard_sse_constant_opcode (rtx insn, rtx x)
7459{
7460  switch (standard_sse_constant_p (x))
7461    {
7462    case 1:
7463      switch (get_attr_mode (insn))
7464	{
7465	case MODE_V4SF:
7466	  return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7467	case MODE_V2DF:
7468	  return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7469	case MODE_TI:
7470	  return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7471	case MODE_V8SF:
7472	  return "vxorps\t%x0, %x0, %x0";
7473	case MODE_V4DF:
7474	  return "vxorpd\t%x0, %x0, %x0";
7475	case MODE_OI:
7476	  return "vpxor\t%x0, %x0, %x0";
7477	default:
7478	  break;
7479	}
7480    case 2:
7481      return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7482    default:
7483      break;
7484    }
7485  gcc_unreachable ();
7486}
7487
7488/* Returns 1 if OP contains a symbol reference */
7489
7490int
7491symbolic_reference_mentioned_p (rtx op)
7492{
7493  const char *fmt;
7494  int i;
7495
7496  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7497    return 1;
7498
7499  fmt = GET_RTX_FORMAT (GET_CODE (op));
7500  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7501    {
7502      if (fmt[i] == 'E')
7503	{
7504	  int j;
7505
7506	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7507	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7508	      return 1;
7509	}
7510
7511      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7512	return 1;
7513    }
7514
7515  return 0;
7516}
7517
7518/* Return 1 if it is appropriate to emit `ret' instructions in the
7519   body of a function.  Do this only if the epilogue is simple, needing a
7520   couple of insns.  Prior to reloading, we can't tell how many registers
7521   must be saved, so return 0 then.  Return 0 if there is no frame
7522   marker to de-allocate.  */
7523
7524int
7525ix86_can_use_return_insn_p (void)
7526{
7527  struct ix86_frame frame;
7528
7529  if (! reload_completed || frame_pointer_needed)
7530    return 0;
7531
7532  /* Don't allow more than 32 pop, since that's all we can do
7533     with one instruction.  */
7534  if (crtl->args.pops_args
7535      && crtl->args.size >= 32768)
7536    return 0;
7537
7538  ix86_compute_frame_layout (&frame);
7539  return frame.to_allocate == 0 && frame.padding0 == 0
7540         && (frame.nregs + frame.nsseregs) == 0;
7541}
7542
7543/* Value should be nonzero if functions must have frame pointers.
7544   Zero means the frame pointer need not be set up (and parms may
7545   be accessed via the stack pointer) in functions that seem suitable.  */
7546
7547static bool
7548ix86_frame_pointer_required (void)
7549{
7550  /* If we accessed previous frames, then the generated code expects
7551     to be able to access the saved ebp value in our frame.  */
7552  if (cfun->machine->accesses_prev_frame)
7553    return true;
7554
7555  /* Several x86 os'es need a frame pointer for other reasons,
7556     usually pertaining to setjmp.  */
7557  if (SUBTARGET_FRAME_POINTER_REQUIRED)
7558    return true;
7559
7560  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7561     the frame pointer by default.  Turn it back on now if we've not
7562     got a leaf function.  */
7563  if (TARGET_OMIT_LEAF_FRAME_POINTER
7564      && (!current_function_is_leaf
7565	  || ix86_current_function_calls_tls_descriptor))
7566    return true;
7567
7568  if (crtl->profile)
7569    return true;
7570
7571  return false;
7572}
7573
7574/* Record that the current function accesses previous call frames.  */
7575
7576void
7577ix86_setup_frame_addresses (void)
7578{
7579  cfun->machine->accesses_prev_frame = 1;
7580}
7581
7582#ifndef USE_HIDDEN_LINKONCE
7583# if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7584#  define USE_HIDDEN_LINKONCE 1
7585# else
7586#  define USE_HIDDEN_LINKONCE 0
7587# endif
7588#endif
7589
7590static int pic_labels_used;
7591
7592/* Fills in the label name that should be used for a pc thunk for
7593   the given register.  */
7594
7595static void
7596get_pc_thunk_name (char name[32], unsigned int regno)
7597{
7598  gcc_assert (!TARGET_64BIT);
7599
7600  if (USE_HIDDEN_LINKONCE)
7601    sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7602  else
7603    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7604}
7605
7606
7607/* This function generates code for -fpic that loads %ebx with
7608   the return address of the caller and then returns.  */
7609
7610static void
7611ix86_code_end (void)
7612{
7613  rtx xops[2];
7614  int regno;
7615
7616  for (regno = 0; regno < 8; ++regno)
7617    {
7618      char name[32];
7619      tree decl;
7620
7621      if (! ((pic_labels_used >> regno) & 1))
7622	continue;
7623
7624      get_pc_thunk_name (name, regno);
7625
7626      decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
7627			 get_identifier (name),
7628			 build_function_type (void_type_node, void_list_node));
7629      DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
7630				       NULL_TREE, void_type_node);
7631      TREE_PUBLIC (decl) = 1;
7632      TREE_STATIC (decl) = 1;
7633
7634#if TARGET_MACHO
7635      if (TARGET_MACHO)
7636	{
7637	  switch_to_section (darwin_sections[text_coal_section]);
7638	  fputs ("\t.weak_definition\t", asm_out_file);
7639	  assemble_name (asm_out_file, name);
7640	  fputs ("\n\t.private_extern\t", asm_out_file);
7641	  assemble_name (asm_out_file, name);
7642	  fputs ("\n", asm_out_file);
7643	  ASM_OUTPUT_LABEL (asm_out_file, name);
7644	  DECL_WEAK (decl) = 1;
7645	}
7646      else
7647#endif
7648      if (USE_HIDDEN_LINKONCE)
7649	{
7650	  DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7651
7652	  (*targetm.asm_out.unique_section) (decl, 0);
7653	  switch_to_section (get_named_section (decl, NULL, 0));
7654
7655	  (*targetm.asm_out.globalize_label) (asm_out_file, name);
7656	  fputs ("\t.hidden\t", asm_out_file);
7657	  assemble_name (asm_out_file, name);
7658	  putc ('\n', asm_out_file);
7659	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7660	}
7661      else
7662	{
7663	  switch_to_section (text_section);
7664	  ASM_OUTPUT_LABEL (asm_out_file, name);
7665	}
7666
7667      DECL_INITIAL (decl) = make_node (BLOCK);
7668      current_function_decl = decl;
7669      init_function_start (decl);
7670      first_function_block_is_cold = false;
7671      /* Make sure unwind info is emitted for the thunk if needed.  */
7672      final_start_function (emit_barrier (), asm_out_file, 1);
7673
7674      xops[0] = gen_rtx_REG (Pmode, regno);
7675      xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7676      output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7677      output_asm_insn ("ret", xops);
7678      final_end_function ();
7679      init_insn_lengths ();
7680      free_after_compilation (cfun);
7681      set_cfun (NULL);
7682      current_function_decl = NULL;
7683    }
7684}
7685
7686/* Emit code for the SET_GOT patterns.  */
7687
7688const char *
7689output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7690{
7691  rtx xops[3];
7692
7693  xops[0] = dest;
7694
7695  if (TARGET_VXWORKS_RTP && flag_pic)
7696    {
7697      /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
7698      xops[2] = gen_rtx_MEM (Pmode,
7699			     gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7700      output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7701
7702      /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7703	 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7704	 an unadorned address.  */
7705      xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7706      SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7707      output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7708      return "";
7709    }
7710
7711  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7712
7713  if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7714    {
7715      xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7716
7717      if (!flag_pic)
7718	output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7719      else
7720	{
7721	  output_asm_insn ("call\t%a2", xops);
7722#ifdef DWARF2_UNWIND_INFO
7723	  /* The call to next label acts as a push.  */
7724	  if (dwarf2out_do_frame ())
7725	    {
7726	      rtx insn;
7727	      start_sequence ();
7728	      insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
7729					     gen_rtx_PLUS (Pmode,
7730							   stack_pointer_rtx,
7731							   GEN_INT (-4))));
7732	      RTX_FRAME_RELATED_P (insn) = 1;
7733	      dwarf2out_frame_debug (insn, true);
7734	      end_sequence ();
7735	    }
7736#endif
7737	}
7738
7739#if TARGET_MACHO
7740      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
7741         is what will be referenced by the Mach-O PIC subsystem.  */
7742      if (!label)
7743	ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7744#endif
7745
7746      (*targetm.asm_out.internal_label) (asm_out_file, "L",
7747				 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7748
7749      if (flag_pic)
7750	{
7751	  output_asm_insn ("pop%z0\t%0", xops);
7752#ifdef DWARF2_UNWIND_INFO
7753	  /* The pop is a pop and clobbers dest, but doesn't restore it
7754	     for unwind info purposes.  */
7755	  if (dwarf2out_do_frame ())
7756	    {
7757	      rtx insn;
7758	      start_sequence ();
7759	      insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
7760	      dwarf2out_frame_debug (insn, true);
7761	      insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
7762					     gen_rtx_PLUS (Pmode,
7763							   stack_pointer_rtx,
7764							   GEN_INT (4))));
7765	      RTX_FRAME_RELATED_P (insn) = 1;
7766	      dwarf2out_frame_debug (insn, true);
7767	      end_sequence ();
7768	    }
7769#endif
7770	}
7771    }
7772  else
7773    {
7774      char name[32];
7775      get_pc_thunk_name (name, REGNO (dest));
7776      pic_labels_used |= 1 << REGNO (dest);
7777
7778#ifdef DWARF2_UNWIND_INFO
7779      /* Ensure all queued register saves are flushed before the
7780	 call.  */
7781      if (dwarf2out_do_frame ())
7782	dwarf2out_flush_queued_reg_saves ();
7783#endif
7784      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7785      xops[2] = gen_rtx_MEM (QImode, xops[2]);
7786      output_asm_insn ("call\t%X2", xops);
7787      /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
7788         is what will be referenced by the Mach-O PIC subsystem.  */
7789#if TARGET_MACHO
7790      if (!label)
7791	ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7792      else
7793        targetm.asm_out.internal_label (asm_out_file, "L",
7794					   CODE_LABEL_NUMBER (label));
7795#endif
7796    }
7797
7798  if (TARGET_MACHO)
7799    return "";
7800
7801  if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7802    output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7803  else
7804    output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7805
7806  return "";
7807}
7808
7809/* Generate an "push" pattern for input ARG.  */
7810
7811static rtx
7812gen_push (rtx arg)
7813{
7814  if (ix86_cfa_state->reg == stack_pointer_rtx)
7815    ix86_cfa_state->offset += UNITS_PER_WORD;
7816
7817  return gen_rtx_SET (VOIDmode,
7818		      gen_rtx_MEM (Pmode,
7819				   gen_rtx_PRE_DEC (Pmode,
7820						    stack_pointer_rtx)),
7821		      arg);
7822}
7823
7824/* Return >= 0 if there is an unused call-clobbered register available
7825   for the entire function.  */
7826
7827static unsigned int
7828ix86_select_alt_pic_regnum (void)
7829{
7830  if (current_function_is_leaf && !crtl->profile
7831      && !ix86_current_function_calls_tls_descriptor)
7832    {
7833      int i, drap;
7834      /* Can't use the same register for both PIC and DRAP.  */
7835      if (crtl->drap_reg)
7836	drap = REGNO (crtl->drap_reg);
7837      else
7838	drap = -1;
7839      for (i = 2; i >= 0; --i)
7840        if (i != drap && !df_regs_ever_live_p (i))
7841	  return i;
7842    }
7843
7844  return INVALID_REGNUM;
7845}
7846
7847/* Return 1 if we need to save REGNO.  */
7848static int
7849ix86_save_reg (unsigned int regno, int maybe_eh_return)
7850{
7851  if (pic_offset_table_rtx
7852      && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7853      && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7854	  || crtl->profile
7855	  || crtl->calls_eh_return
7856	  || crtl->uses_const_pool))
7857    {
7858      if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7859	return 0;
7860      return 1;
7861    }
7862
7863  if (crtl->calls_eh_return && maybe_eh_return)
7864    {
7865      unsigned i;
7866      for (i = 0; ; i++)
7867	{
7868	  unsigned test = EH_RETURN_DATA_REGNO (i);
7869	  if (test == INVALID_REGNUM)
7870	    break;
7871	  if (test == regno)
7872	    return 1;
7873	}
7874    }
7875
7876  if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
7877    return 1;
7878
7879  return (df_regs_ever_live_p (regno)
7880	  && !call_used_regs[regno]
7881	  && !fixed_regs[regno]
7882	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7883}
7884
7885/* Return number of saved general prupose registers.  */
7886
7887static int
7888ix86_nsaved_regs (void)
7889{
7890  int nregs = 0;
7891  int regno;
7892
7893  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7894    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7895      nregs ++;
7896  return nregs;
7897}
7898
7899/* Return number of saved SSE registrers.  */
7900
7901static int
7902ix86_nsaved_sseregs (void)
7903{
7904  int nregs = 0;
7905  int regno;
7906
7907  if (ix86_cfun_abi () != MS_ABI)
7908    return 0;
7909  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7910    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7911      nregs ++;
7912  return nregs;
7913}
7914
7915/* Given FROM and TO register numbers, say whether this elimination is
7916   allowed.  If stack alignment is needed, we can only replace argument
7917   pointer with hard frame pointer, or replace frame pointer with stack
7918   pointer.  Otherwise, frame pointer elimination is automatically
7919   handled and all other eliminations are valid.  */
7920
7921static bool
7922ix86_can_eliminate (const int from, const int to)
7923{
7924  if (stack_realign_fp)
7925    return ((from == ARG_POINTER_REGNUM
7926	     && to == HARD_FRAME_POINTER_REGNUM)
7927	    || (from == FRAME_POINTER_REGNUM
7928		&& to == STACK_POINTER_REGNUM));
7929  else
7930    return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
7931}
7932
7933/* Return the offset between two registers, one to be eliminated, and the other
7934   its replacement, at the start of a routine.  */
7935
7936HOST_WIDE_INT
7937ix86_initial_elimination_offset (int from, int to)
7938{
7939  struct ix86_frame frame;
7940  ix86_compute_frame_layout (&frame);
7941
7942  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7943    return frame.hard_frame_pointer_offset;
7944  else if (from == FRAME_POINTER_REGNUM
7945	   && to == HARD_FRAME_POINTER_REGNUM)
7946    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7947  else
7948    {
7949      gcc_assert (to == STACK_POINTER_REGNUM);
7950
7951      if (from == ARG_POINTER_REGNUM)
7952	return frame.stack_pointer_offset;
7953
7954      gcc_assert (from == FRAME_POINTER_REGNUM);
7955      return frame.stack_pointer_offset - frame.frame_pointer_offset;
7956    }
7957}
7958
7959/* In a dynamically-aligned function, we can't know the offset from
7960   stack pointer to frame pointer, so we must ensure that setjmp
7961   eliminates fp against the hard fp (%ebp) rather than trying to
7962   index from %esp up to the top of the frame across a gap that is
7963   of unknown (at compile-time) size.  */
7964static rtx
7965ix86_builtin_setjmp_frame_value (void)
7966{
7967  return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7968}
7969
7970/* Fill structure ix86_frame about frame of currently computed function.  */
7971
7972static void
7973ix86_compute_frame_layout (struct ix86_frame *frame)
7974{
7975  unsigned int stack_alignment_needed;
7976  HOST_WIDE_INT offset;
7977  unsigned int preferred_alignment;
7978  HOST_WIDE_INT size = get_frame_size ();
7979
7980  frame->nregs = ix86_nsaved_regs ();
7981  frame->nsseregs = ix86_nsaved_sseregs ();
7982
7983  stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7984  preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7985
7986  /* MS ABI seem to require stack alignment to be always 16 except for function
7987     prologues.  */
7988  if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7989    {
7990      preferred_alignment = 16;
7991      stack_alignment_needed = 16;
7992      crtl->preferred_stack_boundary = 128;
7993      crtl->stack_alignment_needed = 128;
7994    }
7995
7996  gcc_assert (!size || stack_alignment_needed);
7997  gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7998  gcc_assert (preferred_alignment <= stack_alignment_needed);
7999
8000  /* During reload iteration the amount of registers saved can change.
8001     Recompute the value as needed.  Do not recompute when amount of registers
8002     didn't change as reload does multiple calls to the function and does not
8003     expect the decision to change within single iteration.  */
8004  if (!optimize_function_for_size_p (cfun)
8005      && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8006    {
8007      int count = frame->nregs;
8008
8009      cfun->machine->use_fast_prologue_epilogue_nregs = count;
8010      /* The fast prologue uses move instead of push to save registers.  This
8011         is significantly longer, but also executes faster as modern hardware
8012         can execute the moves in parallel, but can't do that for push/pop.
8013
8014	 Be careful about choosing what prologue to emit:  When function takes
8015	 many instructions to execute we may use slow version as well as in
8016	 case function is known to be outside hot spot (this is known with
8017	 feedback only).  Weight the size of function by number of registers
8018	 to save as it is cheap to use one or two push instructions but very
8019	 slow to use many of them.  */
8020      if (count)
8021	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8022      if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
8023	  || (flag_branch_probabilities
8024	      && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
8025        cfun->machine->use_fast_prologue_epilogue = false;
8026      else
8027        cfun->machine->use_fast_prologue_epilogue
8028	   = !expensive_function_p (count);
8029    }
8030  if (TARGET_PROLOGUE_USING_MOVE
8031      && cfun->machine->use_fast_prologue_epilogue)
8032    frame->save_regs_using_mov = true;
8033  else
8034    frame->save_regs_using_mov = false;
8035
8036  /* Skip return address.  */
8037  offset = UNITS_PER_WORD;
8038
8039  /* Skip pushed static chain.  */
8040  if (ix86_static_chain_on_stack)
8041    offset += UNITS_PER_WORD;
8042
8043  /* Skip saved base pointer.  */
8044  if (frame_pointer_needed)
8045    offset += UNITS_PER_WORD;
8046
8047  frame->hard_frame_pointer_offset = offset;
8048
8049  /* Set offset to aligned because the realigned frame starts from
8050     here.  */
8051  if (stack_realign_fp)
8052    offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
8053
8054  /* Register save area */
8055  offset += frame->nregs * UNITS_PER_WORD;
8056
8057  /* Align SSE reg save area.  */
8058  if (frame->nsseregs)
8059    frame->padding0 = ((offset + 16 - 1) & -16) - offset;
8060  else
8061    frame->padding0 = 0;
8062
8063  /* SSE register save area.  */
8064  offset += frame->padding0 + frame->nsseregs * 16;
8065
8066  /* Va-arg area */
8067  frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8068  offset += frame->va_arg_size;
8069
8070  /* Align start of frame for local function.  */
8071  frame->padding1 = ((offset + stack_alignment_needed - 1)
8072		     & -stack_alignment_needed) - offset;
8073
8074  offset += frame->padding1;
8075
8076  /* Frame pointer points here.  */
8077  frame->frame_pointer_offset = offset;
8078
8079  offset += size;
8080
8081  /* Add outgoing arguments area.  Can be skipped if we eliminated
8082     all the function calls as dead code.
8083     Skipping is however impossible when function calls alloca.  Alloca
8084     expander assumes that last crtl->outgoing_args_size
8085     of stack frame are unused.  */
8086  if (ACCUMULATE_OUTGOING_ARGS
8087      && (!current_function_is_leaf || cfun->calls_alloca
8088	  || ix86_current_function_calls_tls_descriptor))
8089    {
8090      offset += crtl->outgoing_args_size;
8091      frame->outgoing_arguments_size = crtl->outgoing_args_size;
8092    }
8093  else
8094    frame->outgoing_arguments_size = 0;
8095
8096  /* Align stack boundary.  Only needed if we're calling another function
8097     or using alloca.  */
8098  if (!current_function_is_leaf || cfun->calls_alloca
8099      || ix86_current_function_calls_tls_descriptor)
8100    frame->padding2 = ((offset + preferred_alignment - 1)
8101		       & -preferred_alignment) - offset;
8102  else
8103    frame->padding2 = 0;
8104
8105  offset += frame->padding2;
8106
8107  /* We've reached end of stack frame.  */
8108  frame->stack_pointer_offset = offset;
8109
8110  /* Size prologue needs to allocate.  */
8111  frame->to_allocate =
8112    (size + frame->padding1 + frame->padding2
8113     + frame->outgoing_arguments_size + frame->va_arg_size);
8114
8115  if ((!frame->to_allocate && frame->nregs <= 1)
8116      || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
8117    frame->save_regs_using_mov = false;
8118
8119  if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8120      && current_function_sp_is_unchanging
8121      && current_function_is_leaf
8122      && !ix86_current_function_calls_tls_descriptor)
8123    {
8124      frame->red_zone_size = frame->to_allocate;
8125      if (frame->save_regs_using_mov)
8126	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8127      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8128	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8129    }
8130  else
8131    frame->red_zone_size = 0;
8132  frame->to_allocate -= frame->red_zone_size;
8133  frame->stack_pointer_offset -= frame->red_zone_size;
8134}
8135
8136/* Emit code to save registers in the prologue.  */
8137
8138static void
8139ix86_emit_save_regs (void)
8140{
8141  unsigned int regno;
8142  rtx insn;
8143
8144  for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8145    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8146      {
8147	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8148	RTX_FRAME_RELATED_P (insn) = 1;
8149      }
8150}
8151
8152/* Emit code to save registers using MOV insns.  First register
8153   is restored from POINTER + OFFSET.  */
8154static void
8155ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8156{
8157  unsigned int regno;
8158  rtx insn;
8159
8160  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8161    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8162      {
8163	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8164					       Pmode, offset),
8165			       gen_rtx_REG (Pmode, regno));
8166	RTX_FRAME_RELATED_P (insn) = 1;
8167	offset += UNITS_PER_WORD;
8168      }
8169}
8170
8171/* Emit code to save registers using MOV insns.  First register
8172   is restored from POINTER + OFFSET.  */
8173static void
8174ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8175{
8176  unsigned int regno;
8177  rtx insn;
8178  rtx mem;
8179
8180  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8181    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8182      {
8183	mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8184	set_mem_align (mem, 128);
8185	insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8186	RTX_FRAME_RELATED_P (insn) = 1;
8187	offset += 16;
8188      }
8189}
8190
8191static GTY(()) rtx queued_cfa_restores;
8192
8193/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8194   manipulation insn.  Don't add it if the previously
8195   saved value will be left untouched within stack red-zone till return,
8196   as unwinders can find the same value in the register and
8197   on the stack.  */
8198
8199static void
8200ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
8201{
8202  if (TARGET_RED_ZONE
8203      && !TARGET_64BIT_MS_ABI
8204      && red_offset + RED_ZONE_SIZE >= 0
8205      && crtl->args.pops_args < 65536)
8206    return;
8207
8208  if (insn)
8209    {
8210      add_reg_note (insn, REG_CFA_RESTORE, reg);
8211      RTX_FRAME_RELATED_P (insn) = 1;
8212    }
8213  else
8214    queued_cfa_restores
8215      = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8216}
8217
8218/* Add queued REG_CFA_RESTORE notes if any to INSN.  */
8219
8220static void
8221ix86_add_queued_cfa_restore_notes (rtx insn)
8222{
8223  rtx last;
8224  if (!queued_cfa_restores)
8225    return;
8226  for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8227    ;
8228  XEXP (last, 1) = REG_NOTES (insn);
8229  REG_NOTES (insn) = queued_cfa_restores;
8230  queued_cfa_restores = NULL_RTX;
8231  RTX_FRAME_RELATED_P (insn) = 1;
8232}
8233
8234/* Expand prologue or epilogue stack adjustment.
8235   The pattern exist to put a dependency on all ebp-based memory accesses.
8236   STYLE should be negative if instructions should be marked as frame related,
8237   zero if %r11 register is live and cannot be freely used and positive
8238   otherwise.  */
8239
8240static void
8241pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8242			   int style, bool set_cfa)
8243{
8244  rtx insn;
8245
8246  if (! TARGET_64BIT)
8247    insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8248  else if (x86_64_immediate_operand (offset, DImode))
8249    insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8250  else
8251    {
8252      rtx r11;
8253      /* r11 is used by indirect sibcall return as well, set before the
8254	 epilogue and used after the epilogue.  ATM indirect sibcall
8255	 shouldn't be used together with huge frame sizes in one
8256	 function because of the frame_size check in sibcall.c.  */
8257      gcc_assert (style);
8258      r11 = gen_rtx_REG (DImode, R11_REG);
8259      insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8260      if (style < 0)
8261	RTX_FRAME_RELATED_P (insn) = 1;
8262      insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8263							       offset));
8264    }
8265
8266  if (style >= 0)
8267    ix86_add_queued_cfa_restore_notes (insn);
8268
8269  if (set_cfa)
8270    {
8271      rtx r;
8272
8273      gcc_assert (ix86_cfa_state->reg == src);
8274      ix86_cfa_state->offset += INTVAL (offset);
8275      ix86_cfa_state->reg = dest;
8276
8277      r = gen_rtx_PLUS (Pmode, src, offset);
8278      r = gen_rtx_SET (VOIDmode, dest, r);
8279      add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8280      RTX_FRAME_RELATED_P (insn) = 1;
8281    }
8282  else if (style < 0)
8283    RTX_FRAME_RELATED_P (insn) = 1;
8284}
8285
8286/* Find an available register to be used as dynamic realign argument
8287   pointer regsiter.  Such a register will be written in prologue and
8288   used in begin of body, so it must not be
8289	1. parameter passing register.
8290	2. GOT pointer.
8291   We reuse static-chain register if it is available.  Otherwise, we
8292   use DI for i386 and R13 for x86-64.  We chose R13 since it has
8293   shorter encoding.
8294
8295   Return: the regno of chosen register.  */
8296
8297static unsigned int
8298find_drap_reg (void)
8299{
8300  tree decl = cfun->decl;
8301
8302  if (TARGET_64BIT)
8303    {
8304      /* Use R13 for nested function or function need static chain.
8305	 Since function with tail call may use any caller-saved
8306	 registers in epilogue, DRAP must not use caller-saved
8307	 register in such case.  */
8308      if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8309	return R13_REG;
8310
8311      return R10_REG;
8312    }
8313  else
8314    {
8315      /* Use DI for nested function or function need static chain.
8316	 Since function with tail call may use any caller-saved
8317	 registers in epilogue, DRAP must not use caller-saved
8318	 register in such case.  */
8319      if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8320	return DI_REG;
8321
8322      /* Reuse static chain register if it isn't used for parameter
8323         passing.  */
8324      if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8325	  && !lookup_attribute ("fastcall",
8326    				TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8327	return CX_REG;
8328      else
8329	return DI_REG;
8330    }
8331}
8332
8333/* Return minimum incoming stack alignment.  */
8334
8335static unsigned int
8336ix86_minimum_incoming_stack_boundary (bool sibcall)
8337{
8338  unsigned int incoming_stack_boundary;
8339
8340  /* Prefer the one specified at command line. */
8341  if (ix86_user_incoming_stack_boundary)
8342    incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8343  /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8344     if -mstackrealign is used, it isn't used for sibcall check and
8345     estimated stack alignment is 128bit.  */
8346  else if (!sibcall
8347	   && !TARGET_64BIT
8348	   && ix86_force_align_arg_pointer
8349	   && crtl->stack_alignment_estimated == 128)
8350    incoming_stack_boundary = MIN_STACK_BOUNDARY;
8351  else
8352    incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8353
8354  /* Incoming stack alignment can be changed on individual functions
8355     via force_align_arg_pointer attribute.  We use the smallest
8356     incoming stack boundary.  */
8357  if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8358      && lookup_attribute (ix86_force_align_arg_pointer_string,
8359			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8360    incoming_stack_boundary = MIN_STACK_BOUNDARY;
8361
8362  /* The incoming stack frame has to be aligned at least at
8363     parm_stack_boundary.  */
8364  if (incoming_stack_boundary < crtl->parm_stack_boundary)
8365    incoming_stack_boundary = crtl->parm_stack_boundary;
8366
8367  /* Stack at entrance of main is aligned by runtime.  We use the
8368     smallest incoming stack boundary. */
8369  if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8370      && DECL_NAME (current_function_decl)
8371      && MAIN_NAME_P (DECL_NAME (current_function_decl))
8372      && DECL_FILE_SCOPE_P (current_function_decl))
8373    incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8374
8375  return incoming_stack_boundary;
8376}
8377
8378/* Update incoming stack boundary and estimated stack alignment.  */
8379
8380static void
8381ix86_update_stack_boundary (void)
8382{
8383  ix86_incoming_stack_boundary
8384    = ix86_minimum_incoming_stack_boundary (false);
8385
8386  /* x86_64 vararg needs 16byte stack alignment for register save
8387     area.  */
8388  if (TARGET_64BIT
8389      && cfun->stdarg
8390      && crtl->stack_alignment_estimated < 128)
8391    crtl->stack_alignment_estimated = 128;
8392}
8393
8394/* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
8395   needed or an rtx for DRAP otherwise.  */
8396
8397static rtx
8398ix86_get_drap_rtx (void)
8399{
8400  if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8401    crtl->need_drap = true;
8402
8403  if (stack_realign_drap)
8404    {
8405      /* Assign DRAP to vDRAP and returns vDRAP */
8406      unsigned int regno = find_drap_reg ();
8407      rtx drap_vreg;
8408      rtx arg_ptr;
8409      rtx seq, insn;
8410
8411      arg_ptr = gen_rtx_REG (Pmode, regno);
8412      crtl->drap_reg = arg_ptr;
8413
8414      start_sequence ();
8415      drap_vreg = copy_to_reg (arg_ptr);
8416      seq = get_insns ();
8417      end_sequence ();
8418
8419      insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8420      if (!optimize)
8421	{
8422	  add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8423	  RTX_FRAME_RELATED_P (insn) = 1;
8424	}
8425      return drap_vreg;
8426    }
8427  else
8428    return NULL;
8429}
8430
8431/* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
8432
8433static rtx
8434ix86_internal_arg_pointer (void)
8435{
8436  return virtual_incoming_args_rtx;
8437}
8438
8439/* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8440   to be generated in correct form.  */
8441static void
8442ix86_finalize_stack_realign_flags (void)
8443{
8444  /* Check if stack realign is really needed after reload, and
8445     stores result in cfun */
8446  unsigned int incoming_stack_boundary
8447    = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8448       ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8449  unsigned int stack_realign = (incoming_stack_boundary
8450				< (current_function_is_leaf
8451				   ? crtl->max_used_stack_slot_alignment
8452				   : crtl->stack_alignment_needed));
8453
8454  if (crtl->stack_realign_finalized)
8455    {
8456      /* After stack_realign_needed is finalized, we can't no longer
8457	 change it.  */
8458      gcc_assert (crtl->stack_realign_needed == stack_realign);
8459    }
8460  else
8461    {
8462      crtl->stack_realign_needed = stack_realign;
8463      crtl->stack_realign_finalized = true;
8464    }
8465}
8466
8467/* Expand the prologue into a bunch of separate insns.  */
8468
8469void
8470ix86_expand_prologue (void)
8471{
8472  rtx insn;
8473  bool pic_reg_used;
8474  struct ix86_frame frame;
8475  HOST_WIDE_INT allocate;
8476  int gen_frame_pointer = frame_pointer_needed;
8477
8478  ix86_finalize_stack_realign_flags ();
8479
8480  /* DRAP should not coexist with stack_realign_fp */
8481  gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8482
8483  /* Initialize CFA state for before the prologue.  */
8484  ix86_cfa_state->reg = stack_pointer_rtx;
8485  ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
8486
8487  ix86_compute_frame_layout (&frame);
8488
8489  if (ix86_function_ms_hook_prologue (current_function_decl))
8490    {
8491      rtx push, mov;
8492
8493      /* Make sure the function starts with
8494	 8b ff     movl.s %edi,%edi
8495	 55        push   %ebp
8496	 8b ec     movl.s %esp,%ebp
8497
8498	 This matches the hookable function prologue in Win32 API
8499	 functions in Microsoft Windows XP Service Pack 2 and newer.
8500	 Wine uses this to enable Windows apps to hook the Win32 API
8501	 functions provided by Wine.  */
8502      insn = emit_insn (gen_vswapmov (gen_rtx_REG (SImode, DI_REG),
8503				      gen_rtx_REG (SImode, DI_REG)));
8504      push = emit_insn (gen_push (hard_frame_pointer_rtx));
8505      mov = emit_insn (gen_vswapmov (hard_frame_pointer_rtx,
8506				     stack_pointer_rtx));
8507
8508      if (frame_pointer_needed && !(crtl->drap_reg
8509				    && crtl->stack_realign_needed))
8510	{
8511	  /* The push %ebp and movl.s %esp, %ebp already set up
8512	     the frame pointer.  No need to do this again. */
8513	  gen_frame_pointer = 0;
8514	  RTX_FRAME_RELATED_P (push) = 1;
8515	  RTX_FRAME_RELATED_P (mov) = 1;
8516	  if (ix86_cfa_state->reg == stack_pointer_rtx)
8517	    ix86_cfa_state->reg = hard_frame_pointer_rtx;
8518	}
8519      else
8520	/* If the frame pointer is not needed, pop %ebp again. This
8521	   could be optimized for cases where ebp needs to be backed up
8522	   for some other reason.  If stack realignment is needed, pop
8523	   the base pointer again, align the stack, and later regenerate
8524	   the frame pointer setup.  The frame pointer generated by the
8525	   hook prologue is not aligned, so it can't be used.  */
8526	insn = emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8527    }
8528
8529  /* The first insn of a function that accepts its static chain on the
8530     stack is to push the register that would be filled in by a direct
8531     call.  This insn will be skipped by the trampoline.  */
8532  if (ix86_static_chain_on_stack)
8533    {
8534      rtx t;
8535
8536      insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
8537      emit_insn (gen_blockage ());
8538
8539      /* We don't want to interpret this push insn as a register save,
8540	 only as a stack adjustment.  The real copy of the register as
8541	 a save will be done later, if needed.  */
8542      t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
8543      t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8544      add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8545      RTX_FRAME_RELATED_P (insn) = 1;
8546    }
8547
8548  /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8549     of DRAP is needed and stack realignment is really needed after reload */
8550  if (crtl->drap_reg && crtl->stack_realign_needed)
8551    {
8552      rtx x, y;
8553      int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8554      int param_ptr_offset = UNITS_PER_WORD;
8555
8556      if (ix86_static_chain_on_stack)
8557	param_ptr_offset += UNITS_PER_WORD;
8558      if (!call_used_regs[REGNO (crtl->drap_reg)])
8559	param_ptr_offset += UNITS_PER_WORD;
8560
8561      gcc_assert (stack_realign_drap);
8562
8563      /* Grab the argument pointer.  */
8564      x = plus_constant (stack_pointer_rtx, param_ptr_offset);
8565      y = crtl->drap_reg;
8566
8567      /* Only need to push parameter pointer reg if it is caller
8568	 saved reg */
8569      if (!call_used_regs[REGNO (crtl->drap_reg)])
8570	{
8571	  /* Push arg pointer reg */
8572	  insn = emit_insn (gen_push (y));
8573	  RTX_FRAME_RELATED_P (insn) = 1;
8574	}
8575
8576      insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8577      RTX_FRAME_RELATED_P (insn) = 1;
8578      ix86_cfa_state->reg = crtl->drap_reg;
8579
8580      /* Align the stack.  */
8581      insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8582					   stack_pointer_rtx,
8583					   GEN_INT (-align_bytes)));
8584      RTX_FRAME_RELATED_P (insn) = 1;
8585
8586      /* Replicate the return address on the stack so that return
8587	 address can be reached via (argp - 1) slot.  This is needed
8588	 to implement macro RETURN_ADDR_RTX and intrinsic function
8589	 expand_builtin_return_addr etc.  */
8590      x = crtl->drap_reg;
8591      x = gen_frame_mem (Pmode,
8592                         plus_constant (x, -UNITS_PER_WORD));
8593      insn = emit_insn (gen_push (x));
8594      RTX_FRAME_RELATED_P (insn) = 1;
8595    }
8596
8597  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
8598     slower on all targets.  Also sdb doesn't like it.  */
8599
8600  if (gen_frame_pointer)
8601    {
8602      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8603      RTX_FRAME_RELATED_P (insn) = 1;
8604
8605      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8606      RTX_FRAME_RELATED_P (insn) = 1;
8607
8608      if (ix86_cfa_state->reg == stack_pointer_rtx)
8609        ix86_cfa_state->reg = hard_frame_pointer_rtx;
8610    }
8611
8612  if (stack_realign_fp)
8613    {
8614      int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8615      gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8616
8617      /* Align the stack.  */
8618      insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8619					   stack_pointer_rtx,
8620					   GEN_INT (-align_bytes)));
8621      RTX_FRAME_RELATED_P (insn) = 1;
8622    }
8623
8624  allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8625
8626  if (!frame.save_regs_using_mov)
8627    ix86_emit_save_regs ();
8628  else
8629    allocate += frame.nregs * UNITS_PER_WORD;
8630
8631  /* When using red zone we may start register saving before allocating
8632     the stack frame saving one cycle of the prologue. However I will
8633     avoid doing this if I am going to have to probe the stack since
8634     at least on x86_64 the stack probe can turn into a call that clobbers
8635     a red zone location */
8636  if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8637      && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8638    ix86_emit_save_regs_using_mov ((frame_pointer_needed
8639				     && !crtl->stack_realign_needed)
8640                                   ? hard_frame_pointer_rtx
8641				   : stack_pointer_rtx,
8642				   -frame.nregs * UNITS_PER_WORD);
8643
8644  if (allocate == 0)
8645    ;
8646  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8647    pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8648			       GEN_INT (-allocate), -1,
8649			       ix86_cfa_state->reg == stack_pointer_rtx);
8650  else
8651    {
8652      rtx eax = gen_rtx_REG (Pmode, AX_REG);
8653      rtx r10 = NULL;
8654      bool eax_live = false;
8655      bool r10_live = false;
8656      rtx t;
8657
8658      if (TARGET_64BIT)
8659        r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
8660      if (!TARGET_64BIT_MS_ABI)
8661        eax_live = ix86_eax_live_at_start_p ();
8662
8663      if (eax_live)
8664	{
8665	  emit_insn (gen_push (eax));
8666	  allocate -= UNITS_PER_WORD;
8667	}
8668      if (r10_live)
8669       {
8670         r10 = gen_rtx_REG (Pmode, R10_REG);
8671         emit_insn (gen_push (r10));
8672         allocate -= UNITS_PER_WORD;
8673       }
8674
8675      emit_move_insn (eax, GEN_INT (allocate));
8676
8677      if (TARGET_64BIT)
8678	insn = gen_allocate_stack_worker_64 (eax, eax);
8679      else
8680	insn = gen_allocate_stack_worker_32 (eax, eax);
8681      insn = emit_insn (insn);
8682
8683      if (ix86_cfa_state->reg == stack_pointer_rtx)
8684	{
8685	  ix86_cfa_state->offset += allocate;
8686	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8687	  t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8688	  add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8689	  RTX_FRAME_RELATED_P (insn) = 1;
8690	}
8691
8692      if (eax_live && r10_live)
8693	{
8694	  if (frame_pointer_needed)
8695	    {
8696	      t = plus_constant (hard_frame_pointer_rtx,
8697				 allocate
8698				 - frame.to_allocate
8699				 - frame.nregs * UNITS_PER_WORD);
8700	      emit_move_insn (r10, gen_rtx_MEM (Pmode, t));
8701	      t = plus_constant (hard_frame_pointer_rtx,
8702				 allocate + UNITS_PER_WORD
8703				 - frame.to_allocate
8704				 - frame.nregs * UNITS_PER_WORD);
8705	      emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8706	    }
8707          else
8708	    {
8709	      t = plus_constant (stack_pointer_rtx, allocate);
8710	      emit_move_insn (r10, gen_rtx_MEM (Pmode, t));
8711	      t = plus_constant (stack_pointer_rtx, allocate + UNITS_PER_WORD);
8712	      emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8713	    }
8714	}
8715      else if (eax_live || r10_live)
8716	{
8717	  if (frame_pointer_needed)
8718	    t = plus_constant (hard_frame_pointer_rtx,
8719			       allocate
8720			       - frame.to_allocate
8721			       - frame.nregs * UNITS_PER_WORD);
8722	  else
8723	    t = plus_constant (stack_pointer_rtx, allocate);
8724	  emit_move_insn ((eax_live ? eax : r10), gen_rtx_MEM (Pmode, t));
8725	}
8726    }
8727
8728  if (frame.save_regs_using_mov
8729      && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8730         && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8731    {
8732      if (!frame_pointer_needed
8733	  || !(frame.to_allocate + frame.padding0)
8734	  || crtl->stack_realign_needed)
8735        ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8736				       frame.to_allocate
8737				       + frame.nsseregs * 16 + frame.padding0);
8738      else
8739        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8740				       -frame.nregs * UNITS_PER_WORD);
8741    }
8742  if (!frame_pointer_needed
8743      || !(frame.to_allocate + frame.padding0)
8744      || crtl->stack_realign_needed)
8745    ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8746				       frame.to_allocate);
8747  else
8748    ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8749				       - frame.nregs * UNITS_PER_WORD
8750				       - frame.nsseregs * 16
8751				       - frame.padding0);
8752
8753  pic_reg_used = false;
8754  if (pic_offset_table_rtx
8755      && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8756	  || crtl->profile))
8757    {
8758      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8759
8760      if (alt_pic_reg_used != INVALID_REGNUM)
8761	SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8762
8763      pic_reg_used = true;
8764    }
8765
8766  if (pic_reg_used)
8767    {
8768      if (TARGET_64BIT)
8769	{
8770	  if (ix86_cmodel == CM_LARGE_PIC)
8771	    {
8772              rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8773	      rtx label = gen_label_rtx ();
8774	      emit_label (label);
8775	      LABEL_PRESERVE_P (label) = 1;
8776	      gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8777	      insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8778	      insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8779	      insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8780					    pic_offset_table_rtx, tmp_reg));
8781	    }
8782	  else
8783            insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8784	}
8785      else
8786        insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8787    }
8788
8789  /* In the pic_reg_used case, make sure that the got load isn't deleted
8790     when mcount needs it.  Blockage to avoid call movement across mcount
8791     call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8792     note.  */
8793  if (crtl->profile && pic_reg_used)
8794    emit_insn (gen_prologue_use (pic_offset_table_rtx));
8795
8796  if (crtl->drap_reg && !crtl->stack_realign_needed)
8797    {
8798      /* vDRAP is setup but after reload it turns out stack realign
8799         isn't necessary, here we will emit prologue to setup DRAP
8800         without stack realign adjustment */
8801      rtx x;
8802      int drap_bp_offset = UNITS_PER_WORD * 2;
8803
8804      if (ix86_static_chain_on_stack)
8805	drap_bp_offset += UNITS_PER_WORD;
8806      x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8807      insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8808    }
8809
8810  /* Prevent instructions from being scheduled into register save push
8811     sequence when access to the redzone area is done through frame pointer.
8812     The offset between the frame pointer and the stack pointer is calculated
8813     relative to the value of the stack pointer at the end of the function
8814     prologue, and moving instructions that access redzone area via frame
8815     pointer inside push sequence violates this assumption.  */
8816  if (frame_pointer_needed && frame.red_zone_size)
8817    emit_insn (gen_memory_blockage ());
8818
8819  /* Emit cld instruction if stringops are used in the function.  */
8820  if (TARGET_CLD && ix86_current_function_needs_cld)
8821    emit_insn (gen_cld ());
8822}
8823
8824/* Emit code to restore REG using a POP insn.  */
8825
8826static void
8827ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
8828{
8829  rtx insn = emit_insn (ix86_gen_pop1 (reg));
8830
8831  if (ix86_cfa_state->reg == crtl->drap_reg
8832      && REGNO (reg) == REGNO (crtl->drap_reg))
8833    {
8834      /* Previously we'd represented the CFA as an expression
8835	 like *(%ebp - 8).  We've just popped that value from
8836	 the stack, which means we need to reset the CFA to
8837	 the drap register.  This will remain until we restore
8838	 the stack pointer.  */
8839      add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8840      RTX_FRAME_RELATED_P (insn) = 1;
8841      return;
8842    }
8843
8844  if (ix86_cfa_state->reg == stack_pointer_rtx)
8845    {
8846      ix86_cfa_state->offset -= UNITS_PER_WORD;
8847      add_reg_note (insn, REG_CFA_ADJUST_CFA,
8848		    copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
8849      RTX_FRAME_RELATED_P (insn) = 1;
8850    }
8851
8852  /* When the frame pointer is the CFA, and we pop it, we are
8853     swapping back to the stack pointer as the CFA.  This happens
8854     for stack frames that don't allocate other data, so we assume
8855     the stack pointer is now pointing at the return address, i.e.
8856     the function entry state, which makes the offset be 1 word.  */
8857  else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
8858	   && reg == hard_frame_pointer_rtx)
8859    {
8860      ix86_cfa_state->reg = stack_pointer_rtx;
8861      ix86_cfa_state->offset -= UNITS_PER_WORD;
8862
8863      add_reg_note (insn, REG_CFA_DEF_CFA,
8864		    gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8865				  GEN_INT (ix86_cfa_state->offset)));
8866      RTX_FRAME_RELATED_P (insn) = 1;
8867    }
8868
8869  ix86_add_cfa_restore_note (insn, reg, red_offset);
8870}
8871
8872/* Emit code to restore saved registers using POP insns.  */
8873
8874static void
8875ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
8876{
8877  int regno;
8878
8879  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8880    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8881      {
8882	ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
8883					 red_offset);
8884	red_offset += UNITS_PER_WORD;
8885      }
8886}
8887
8888/* Emit code and notes for the LEAVE instruction.  */
8889
8890static void
8891ix86_emit_leave (HOST_WIDE_INT red_offset)
8892{
8893  rtx insn = emit_insn (ix86_gen_leave ());
8894
8895  ix86_add_queued_cfa_restore_notes (insn);
8896
8897  if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
8898    {
8899      ix86_cfa_state->reg = stack_pointer_rtx;
8900      ix86_cfa_state->offset -= UNITS_PER_WORD;
8901
8902      add_reg_note (insn, REG_CFA_ADJUST_CFA,
8903		    copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
8904      RTX_FRAME_RELATED_P (insn) = 1;
8905      ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
8906    }
8907}
8908
8909/* Emit code to restore saved registers using MOV insns.  First register
8910   is restored from POINTER + OFFSET.  */
8911static void
8912ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8913				  HOST_WIDE_INT red_offset,
8914				  int maybe_eh_return)
8915{
8916  unsigned int regno;
8917  rtx base_address = gen_rtx_MEM (Pmode, pointer);
8918  rtx insn;
8919
8920  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8921    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8922      {
8923	rtx reg = gen_rtx_REG (Pmode, regno);
8924
8925	/* Ensure that adjust_address won't be forced to produce pointer
8926	   out of range allowed by x86-64 instruction set.  */
8927	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8928	  {
8929	    rtx r11;
8930
8931	    r11 = gen_rtx_REG (DImode, R11_REG);
8932	    emit_move_insn (r11, GEN_INT (offset));
8933	    emit_insn (gen_adddi3 (r11, r11, pointer));
8934	    base_address = gen_rtx_MEM (Pmode, r11);
8935	    offset = 0;
8936	  }
8937	insn = emit_move_insn (reg,
8938			       adjust_address (base_address, Pmode, offset));
8939	offset += UNITS_PER_WORD;
8940
8941        if (ix86_cfa_state->reg == crtl->drap_reg
8942	    && regno == REGNO (crtl->drap_reg))
8943	  {
8944	    /* Previously we'd represented the CFA as an expression
8945	       like *(%ebp - 8).  We've just popped that value from
8946	       the stack, which means we need to reset the CFA to
8947	       the drap register.  This will remain until we restore
8948	       the stack pointer.  */
8949	    add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8950	    RTX_FRAME_RELATED_P (insn) = 1;
8951	  }
8952	else
8953	  ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8954
8955	red_offset += UNITS_PER_WORD;
8956      }
8957}
8958
8959/* Emit code to restore saved registers using MOV insns.  First register
8960   is restored from POINTER + OFFSET.  */
8961static void
8962ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8963				      HOST_WIDE_INT red_offset,
8964				      int maybe_eh_return)
8965{
8966  int regno;
8967  rtx base_address = gen_rtx_MEM (TImode, pointer);
8968  rtx mem;
8969
8970  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8971    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8972      {
8973	rtx reg = gen_rtx_REG (TImode, regno);
8974
8975	/* Ensure that adjust_address won't be forced to produce pointer
8976	   out of range allowed by x86-64 instruction set.  */
8977	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8978	  {
8979	    rtx r11;
8980
8981	    r11 = gen_rtx_REG (DImode, R11_REG);
8982	    emit_move_insn (r11, GEN_INT (offset));
8983	    emit_insn (gen_adddi3 (r11, r11, pointer));
8984	    base_address = gen_rtx_MEM (TImode, r11);
8985	    offset = 0;
8986	  }
8987	mem = adjust_address (base_address, TImode, offset);
8988	set_mem_align (mem, 128);
8989	emit_move_insn (reg, mem);
8990	offset += 16;
8991
8992	ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8993
8994	red_offset += 16;
8995      }
8996}
8997
8998/* Restore function stack, frame, and registers.  */
8999
9000void
9001ix86_expand_epilogue (int style)
9002{
9003  int sp_valid;
9004  struct ix86_frame frame;
9005  HOST_WIDE_INT offset, red_offset;
9006  struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
9007  bool using_drap;
9008
9009  ix86_finalize_stack_realign_flags ();
9010
9011 /* When stack is realigned, SP must be valid.  */
9012  sp_valid = (!frame_pointer_needed
9013	      || current_function_sp_is_unchanging
9014	      || stack_realign_fp);
9015
9016  ix86_compute_frame_layout (&frame);
9017
9018  /* See the comment about red zone and frame
9019     pointer usage in ix86_expand_prologue.  */
9020  if (frame_pointer_needed && frame.red_zone_size)
9021    emit_insn (gen_memory_blockage ());
9022
9023  using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9024  gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
9025
9026  /* Calculate start of saved registers relative to ebp.  Special care
9027     must be taken for the normal return case of a function using
9028     eh_return: the eax and edx registers are marked as saved, but not
9029     restored along this path.  */
9030  offset = frame.nregs;
9031  if (crtl->calls_eh_return && style != 2)
9032    offset -= 2;
9033  offset *= -UNITS_PER_WORD;
9034  offset -= frame.nsseregs * 16 + frame.padding0;
9035
9036  /* Calculate start of saved registers relative to esp on entry of the
9037     function.  When realigning stack, this needs to be the most negative
9038     value possible at runtime.  */
9039  red_offset = offset;
9040  if (using_drap)
9041    red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
9042		  + UNITS_PER_WORD;
9043  else if (stack_realign_fp)
9044    red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
9045		  - UNITS_PER_WORD;
9046  if (ix86_static_chain_on_stack)
9047    red_offset -= UNITS_PER_WORD;
9048  if (frame_pointer_needed)
9049    red_offset -= UNITS_PER_WORD;
9050
9051  /* If we're only restoring one register and sp is not valid then
9052     using a move instruction to restore the register since it's
9053     less work than reloading sp and popping the register.
9054
9055     The default code result in stack adjustment using add/lea instruction,
9056     while this code results in LEAVE instruction (or discrete equivalent),
9057     so it is profitable in some other cases as well.  Especially when there
9058     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
9059     and there is exactly one register to pop. This heuristic may need some
9060     tuning in future.  */
9061  if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
9062      || (TARGET_EPILOGUE_USING_MOVE
9063	  && cfun->machine->use_fast_prologue_epilogue
9064	  && ((frame.nregs + frame.nsseregs) > 1
9065	      || (frame.to_allocate + frame.padding0) != 0))
9066      || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
9067	  && (frame.to_allocate + frame.padding0) != 0)
9068      || (frame_pointer_needed && TARGET_USE_LEAVE
9069	  && cfun->machine->use_fast_prologue_epilogue
9070	  && (frame.nregs + frame.nsseregs) == 1)
9071      || crtl->calls_eh_return)
9072    {
9073      /* Restore registers.  We can use ebp or esp to address the memory
9074	 locations.  If both are available, default to ebp, since offsets
9075	 are known to be small.  Only exception is esp pointing directly
9076	 to the end of block of saved registers, where we may simplify
9077	 addressing mode.
9078
9079	 If we are realigning stack with bp and sp, regs restore can't
9080	 be addressed by bp. sp must be used instead.  */
9081
9082      if (!frame_pointer_needed
9083	  || (sp_valid && !(frame.to_allocate + frame.padding0))
9084	  || stack_realign_fp)
9085	{
9086	  ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9087						frame.to_allocate, red_offset,
9088						style == 2);
9089	  ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
9090					    frame.to_allocate
9091					    + frame.nsseregs * 16
9092					    + frame.padding0,
9093					    red_offset
9094					    + frame.nsseregs * 16
9095					    + frame.padding0, style == 2);
9096	}
9097      else
9098        {
9099	  ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
9100						offset, red_offset,
9101						style == 2);
9102	  ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
9103					    offset
9104					    + frame.nsseregs * 16
9105					    + frame.padding0,
9106					    red_offset
9107					    + frame.nsseregs * 16
9108					    + frame.padding0, style == 2);
9109        }
9110
9111      red_offset -= offset;
9112
9113      /* eh_return epilogues need %ecx added to the stack pointer.  */
9114      if (style == 2)
9115	{
9116	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
9117
9118	  /* Stack align doesn't work with eh_return.  */
9119	  gcc_assert (!crtl->stack_realign_needed);
9120	  /* Neither does regparm nested functions.  */
9121	  gcc_assert (!ix86_static_chain_on_stack);
9122
9123	  if (frame_pointer_needed)
9124	    {
9125	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9126	      tmp = plus_constant (tmp, UNITS_PER_WORD);
9127	      tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
9128
9129	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
9130	      tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
9131
9132	      /* Note that we use SA as a temporary CFA, as the return
9133		 address is at the proper place relative to it.  We
9134		 pretend this happens at the FP restore insn because
9135		 prior to this insn the FP would be stored at the wrong
9136		 offset relative to SA, and after this insn we have no
9137		 other reasonable register to use for the CFA.  We don't
9138		 bother resetting the CFA to the SP for the duration of
9139		 the return insn.  */
9140	      add_reg_note (tmp, REG_CFA_DEF_CFA,
9141			    plus_constant (sa, UNITS_PER_WORD));
9142	      ix86_add_queued_cfa_restore_notes (tmp);
9143	      add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
9144	      RTX_FRAME_RELATED_P (tmp) = 1;
9145	      ix86_cfa_state->reg = sa;
9146	      ix86_cfa_state->offset = UNITS_PER_WORD;
9147
9148	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9149					 const0_rtx, style, false);
9150	    }
9151	  else
9152	    {
9153	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9154	      tmp = plus_constant (tmp, (frame.to_allocate
9155                                         + frame.nregs * UNITS_PER_WORD
9156					 + frame.nsseregs * 16
9157					 + frame.padding0));
9158	      tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
9159	      ix86_add_queued_cfa_restore_notes (tmp);
9160
9161	      gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
9162	      if (ix86_cfa_state->offset != UNITS_PER_WORD)
9163		{
9164		  ix86_cfa_state->offset = UNITS_PER_WORD;
9165		  add_reg_note (tmp, REG_CFA_DEF_CFA,
9166				plus_constant (stack_pointer_rtx,
9167					       UNITS_PER_WORD));
9168		  RTX_FRAME_RELATED_P (tmp) = 1;
9169		}
9170	    }
9171	}
9172      else if (!frame_pointer_needed)
9173	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9174				   GEN_INT (frame.to_allocate
9175					    + frame.nregs * UNITS_PER_WORD
9176					    + frame.nsseregs * 16
9177					    + frame.padding0),
9178				   style, !using_drap);
9179      /* If not an i386, mov & pop is faster than "leave".  */
9180      else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
9181	       || !cfun->machine->use_fast_prologue_epilogue)
9182	ix86_emit_leave (red_offset);
9183      else
9184	{
9185	  pro_epilogue_adjust_stack (stack_pointer_rtx,
9186				     hard_frame_pointer_rtx,
9187				     const0_rtx, style, !using_drap);
9188
9189	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
9190	}
9191    }
9192  else
9193    {
9194      /* First step is to deallocate the stack frame so that we can
9195	 pop the registers.
9196
9197	 If we realign stack with frame pointer, then stack pointer
9198         won't be able to recover via lea $offset(%bp), %sp, because
9199         there is a padding area between bp and sp for realign.
9200         "add $to_allocate, %sp" must be used instead.  */
9201      if (!sp_valid)
9202	{
9203	  gcc_assert (frame_pointer_needed);
9204          gcc_assert (!stack_realign_fp);
9205	  pro_epilogue_adjust_stack (stack_pointer_rtx,
9206				     hard_frame_pointer_rtx,
9207				     GEN_INT (offset), style, false);
9208          ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9209						0, red_offset,
9210						style == 2);
9211	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9212				     GEN_INT (frame.nsseregs * 16
9213					      + frame.padding0),
9214				     style, false);
9215	}
9216      else if (frame.to_allocate || frame.padding0 || frame.nsseregs)
9217	{
9218          ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9219						frame.to_allocate, red_offset,
9220						style == 2);
9221	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9222				     GEN_INT (frame.to_allocate
9223				     	      + frame.nsseregs * 16
9224					      + frame.padding0), style,
9225				     !using_drap && !frame_pointer_needed);
9226	}
9227
9228      ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
9229					+ frame.padding0);
9230      red_offset -= offset;
9231
9232      if (frame_pointer_needed)
9233	{
9234	  /* Leave results in shorter dependency chains on CPUs that are
9235	     able to grok it fast.  */
9236	  if (TARGET_USE_LEAVE)
9237	    ix86_emit_leave (red_offset);
9238	  else
9239            {
9240              /* For stack realigned really happens, recover stack
9241                 pointer to hard frame pointer is a must, if not using
9242                 leave.  */
9243              if (stack_realign_fp)
9244		pro_epilogue_adjust_stack (stack_pointer_rtx,
9245					   hard_frame_pointer_rtx,
9246					   const0_rtx, style, !using_drap);
9247	      ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
9248					       red_offset);
9249            }
9250	}
9251    }
9252
9253  if (using_drap)
9254    {
9255      int param_ptr_offset = UNITS_PER_WORD;
9256      rtx insn;
9257
9258      gcc_assert (stack_realign_drap);
9259
9260      if (ix86_static_chain_on_stack)
9261	param_ptr_offset += UNITS_PER_WORD;
9262      if (!call_used_regs[REGNO (crtl->drap_reg)])
9263	param_ptr_offset += UNITS_PER_WORD;
9264
9265      insn = emit_insn (gen_rtx_SET
9266			(VOIDmode, stack_pointer_rtx,
9267			 gen_rtx_PLUS (Pmode,
9268				       crtl->drap_reg,
9269				       GEN_INT (-param_ptr_offset))));
9270
9271      ix86_cfa_state->reg = stack_pointer_rtx;
9272      ix86_cfa_state->offset = param_ptr_offset;
9273
9274      add_reg_note (insn, REG_CFA_DEF_CFA,
9275		    gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
9276				  GEN_INT (ix86_cfa_state->offset)));
9277      RTX_FRAME_RELATED_P (insn) = 1;
9278
9279      if (!call_used_regs[REGNO (crtl->drap_reg)])
9280	ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
9281    }
9282
9283  /* Remove the saved static chain from the stack.  The use of ECX is
9284     merely as a scratch register, not as the actual static chain.  */
9285  if (ix86_static_chain_on_stack)
9286    {
9287      rtx r, insn;
9288
9289      gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
9290      ix86_cfa_state->offset += UNITS_PER_WORD;
9291
9292      r = gen_rtx_REG (Pmode, CX_REG);
9293      insn = emit_insn (ix86_gen_pop1 (r));
9294
9295      r = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9296      r = gen_rtx_SET (VOIDmode, stack_pointer_rtx, r);
9297      add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9298      RTX_FRAME_RELATED_P (insn) = 1;
9299    }
9300
9301  /* Sibcall epilogues don't want a return instruction.  */
9302  if (style == 0)
9303    {
9304      *ix86_cfa_state = cfa_state_save;
9305      return;
9306    }
9307
9308  if (crtl->args.pops_args && crtl->args.size)
9309    {
9310      rtx popc = GEN_INT (crtl->args.pops_args);
9311
9312      /* i386 can only pop 64K bytes.  If asked to pop more, pop return
9313	 address, do explicit add, and jump indirectly to the caller.  */
9314
9315      if (crtl->args.pops_args >= 65536)
9316	{
9317	  rtx ecx = gen_rtx_REG (SImode, CX_REG);
9318	  rtx insn;
9319
9320	  /* There is no "pascal" calling convention in any 64bit ABI.  */
9321	  gcc_assert (!TARGET_64BIT);
9322
9323	  insn = emit_insn (gen_popsi1 (ecx));
9324	  ix86_cfa_state->offset -= UNITS_PER_WORD;
9325
9326	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
9327			copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9328	  add_reg_note (insn, REG_CFA_REGISTER,
9329			gen_rtx_SET (VOIDmode, ecx, pc_rtx));
9330	  RTX_FRAME_RELATED_P (insn) = 1;
9331
9332	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9333				     popc, -1, true);
9334	  emit_jump_insn (gen_return_indirect_internal (ecx));
9335	}
9336      else
9337	emit_jump_insn (gen_return_pop_internal (popc));
9338    }
9339  else
9340    emit_jump_insn (gen_return_internal ());
9341
9342  /* Restore the state back to the state from the prologue,
9343     so that it's correct for the next epilogue.  */
9344  *ix86_cfa_state = cfa_state_save;
9345}
9346
9347/* Reset from the function's potential modifications.  */
9348
9349static void
9350ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
9351			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
9352{
9353  if (pic_offset_table_rtx)
9354    SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9355#if TARGET_MACHO
9356  /* Mach-O doesn't support labels at the end of objects, so if
9357     it looks like we might want one, insert a NOP.  */
9358  {
9359    rtx insn = get_last_insn ();
9360    while (insn
9361	   && NOTE_P (insn)
9362	   && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9363      insn = PREV_INSN (insn);
9364    if (insn
9365	&& (LABEL_P (insn)
9366	    || (NOTE_P (insn)
9367		&& NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
9368      fputs ("\tnop\n", file);
9369  }
9370#endif
9371
9372}
9373
9374/* Extract the parts of an RTL expression that is a valid memory address
9375   for an instruction.  Return 0 if the structure of the address is
9376   grossly off.  Return -1 if the address contains ASHIFT, so it is not
9377   strictly valid, but still used for computing length of lea instruction.  */
9378
9379int
9380ix86_decompose_address (rtx addr, struct ix86_address *out)
9381{
9382  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9383  rtx base_reg, index_reg;
9384  HOST_WIDE_INT scale = 1;
9385  rtx scale_rtx = NULL_RTX;
9386  int retval = 1;
9387  enum ix86_address_seg seg = SEG_DEFAULT;
9388
9389  if (REG_P (addr) || GET_CODE (addr) == SUBREG)
9390    base = addr;
9391  else if (GET_CODE (addr) == PLUS)
9392    {
9393      rtx addends[4], op;
9394      int n = 0, i;
9395
9396      op = addr;
9397      do
9398	{
9399	  if (n >= 4)
9400	    return 0;
9401	  addends[n++] = XEXP (op, 1);
9402	  op = XEXP (op, 0);
9403	}
9404      while (GET_CODE (op) == PLUS);
9405      if (n >= 4)
9406	return 0;
9407      addends[n] = op;
9408
9409      for (i = n; i >= 0; --i)
9410	{
9411	  op = addends[i];
9412	  switch (GET_CODE (op))
9413	    {
9414	    case MULT:
9415	      if (index)
9416		return 0;
9417	      index = XEXP (op, 0);
9418	      scale_rtx = XEXP (op, 1);
9419	      break;
9420
9421	    case UNSPEC:
9422	      if (XINT (op, 1) == UNSPEC_TP
9423	          && TARGET_TLS_DIRECT_SEG_REFS
9424	          && seg == SEG_DEFAULT)
9425		seg = TARGET_64BIT ? SEG_FS : SEG_GS;
9426	      else
9427		return 0;
9428	      break;
9429
9430	    case REG:
9431	    case SUBREG:
9432	      if (!base)
9433		base = op;
9434	      else if (!index)
9435		index = op;
9436	      else
9437		return 0;
9438	      break;
9439
9440	    case CONST:
9441	    case CONST_INT:
9442	    case SYMBOL_REF:
9443	    case LABEL_REF:
9444	      if (disp)
9445		return 0;
9446	      disp = op;
9447	      break;
9448
9449	    default:
9450	      return 0;
9451	    }
9452	}
9453    }
9454  else if (GET_CODE (addr) == MULT)
9455    {
9456      index = XEXP (addr, 0);		/* index*scale */
9457      scale_rtx = XEXP (addr, 1);
9458    }
9459  else if (GET_CODE (addr) == ASHIFT)
9460    {
9461      rtx tmp;
9462
9463      /* We're called for lea too, which implements ashift on occasion.  */
9464      index = XEXP (addr, 0);
9465      tmp = XEXP (addr, 1);
9466      if (!CONST_INT_P (tmp))
9467	return 0;
9468      scale = INTVAL (tmp);
9469      if ((unsigned HOST_WIDE_INT) scale > 3)
9470	return 0;
9471      scale = 1 << scale;
9472      retval = -1;
9473    }
9474  else
9475    disp = addr;			/* displacement */
9476
9477  /* Extract the integral value of scale.  */
9478  if (scale_rtx)
9479    {
9480      if (!CONST_INT_P (scale_rtx))
9481	return 0;
9482      scale = INTVAL (scale_rtx);
9483    }
9484
9485  base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
9486  index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
9487
9488  /* Avoid useless 0 displacement.  */
9489  if (disp == const0_rtx && (base || index))
9490    disp = NULL_RTX;
9491
9492  /* Allow arg pointer and stack pointer as index if there is not scaling.  */
9493  if (base_reg && index_reg && scale == 1
9494      && (index_reg == arg_pointer_rtx
9495	  || index_reg == frame_pointer_rtx
9496	  || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
9497    {
9498      rtx tmp;
9499      tmp = base, base = index, index = tmp;
9500      tmp = base_reg, base_reg = index_reg, index_reg = tmp;
9501    }
9502
9503  /* Special case: %ebp cannot be encoded as a base without a displacement.
9504     Similarly %r13.  */
9505  if (!disp
9506      && base_reg
9507      && (base_reg == hard_frame_pointer_rtx
9508	  || base_reg == frame_pointer_rtx
9509	  || base_reg == arg_pointer_rtx
9510	  || (REG_P (base_reg)
9511	      && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
9512		  || REGNO (base_reg) == R13_REG))))
9513    disp = const0_rtx;
9514
9515  /* Special case: on K6, [%esi] makes the instruction vector decoded.
9516     Avoid this by transforming to [%esi+0].
9517     Reload calls address legitimization without cfun defined, so we need
9518     to test cfun for being non-NULL. */
9519  if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9520      && base_reg && !index_reg && !disp
9521      && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
9522    disp = const0_rtx;
9523
9524  /* Special case: encode reg+reg instead of reg*2.  */
9525  if (!base && index && scale == 2)
9526    base = index, base_reg = index_reg, scale = 1;
9527
9528  /* Special case: scaling cannot be encoded without base or displacement.  */
9529  if (!base && !disp && index && scale != 1)
9530    disp = const0_rtx;
9531
9532  out->base = base;
9533  out->index = index;
9534  out->disp = disp;
9535  out->scale = scale;
9536  out->seg = seg;
9537
9538  return retval;
9539}
9540
9541/* Return cost of the memory address x.
9542   For i386, it is better to use a complex address than let gcc copy
9543   the address into a reg and make a new pseudo.  But not if the address
9544   requires to two regs - that would mean more pseudos with longer
9545   lifetimes.  */
9546static int
9547ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
9548{
9549  struct ix86_address parts;
9550  int cost = 1;
9551  int ok = ix86_decompose_address (x, &parts);
9552
9553  gcc_assert (ok);
9554
9555  if (parts.base && GET_CODE (parts.base) == SUBREG)
9556    parts.base = SUBREG_REG (parts.base);
9557  if (parts.index && GET_CODE (parts.index) == SUBREG)
9558    parts.index = SUBREG_REG (parts.index);
9559
9560  /* Attempt to minimize number of registers in the address.  */
9561  if ((parts.base
9562       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
9563      || (parts.index
9564	  && (!REG_P (parts.index)
9565	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
9566    cost++;
9567
9568  if (parts.base
9569      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9570      && parts.index
9571      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9572      && parts.base != parts.index)
9573    cost++;
9574
9575  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9576     since it's predecode logic can't detect the length of instructions
9577     and it degenerates to vector decoded.  Increase cost of such
9578     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
9579     to split such addresses or even refuse such addresses at all.
9580
9581     Following addressing modes are affected:
9582      [base+scale*index]
9583      [scale*index+disp]
9584      [base+index]
9585
9586     The first and last case  may be avoidable by explicitly coding the zero in
9587     memory address, but I don't have AMD-K6 machine handy to check this
9588     theory.  */
9589
9590  if (TARGET_K6
9591      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9592	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9593	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9594    cost += 10;
9595
9596  return cost;
9597}
9598
9599/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9600   this is used for to form addresses to local data when -fPIC is in
9601   use.  */
9602
9603static bool
9604darwin_local_data_pic (rtx disp)
9605{
9606  return (GET_CODE (disp) == UNSPEC
9607	  && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9608}
9609
9610/* Determine if a given RTX is a valid constant.  We already know this
9611   satisfies CONSTANT_P.  */
9612
9613bool
9614legitimate_constant_p (rtx x)
9615{
9616  switch (GET_CODE (x))
9617    {
9618    case CONST:
9619      x = XEXP (x, 0);
9620
9621      if (GET_CODE (x) == PLUS)
9622	{
9623	  if (!CONST_INT_P (XEXP (x, 1)))
9624	    return false;
9625	  x = XEXP (x, 0);
9626	}
9627
9628      if (TARGET_MACHO && darwin_local_data_pic (x))
9629	return true;
9630
9631      /* Only some unspecs are valid as "constants".  */
9632      if (GET_CODE (x) == UNSPEC)
9633	switch (XINT (x, 1))
9634	  {
9635	  case UNSPEC_GOT:
9636	  case UNSPEC_GOTOFF:
9637	  case UNSPEC_PLTOFF:
9638	    return TARGET_64BIT;
9639	  case UNSPEC_TPOFF:
9640	  case UNSPEC_NTPOFF:
9641	    x = XVECEXP (x, 0, 0);
9642	    return (GET_CODE (x) == SYMBOL_REF
9643		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9644	  case UNSPEC_DTPOFF:
9645	    x = XVECEXP (x, 0, 0);
9646	    return (GET_CODE (x) == SYMBOL_REF
9647		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9648	  default:
9649	    return false;
9650	  }
9651
9652      /* We must have drilled down to a symbol.  */
9653      if (GET_CODE (x) == LABEL_REF)
9654	return true;
9655      if (GET_CODE (x) != SYMBOL_REF)
9656	return false;
9657      /* FALLTHRU */
9658
9659    case SYMBOL_REF:
9660      /* TLS symbols are never valid.  */
9661      if (SYMBOL_REF_TLS_MODEL (x))
9662	return false;
9663
9664      /* DLLIMPORT symbols are never valid.  */
9665      if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9666	  && SYMBOL_REF_DLLIMPORT_P (x))
9667	return false;
9668      break;
9669
9670    case CONST_DOUBLE:
9671      if (GET_MODE (x) == TImode
9672	  && x != CONST0_RTX (TImode)
9673          && !TARGET_64BIT)
9674	return false;
9675      break;
9676
9677    case CONST_VECTOR:
9678      if (!standard_sse_constant_p (x))
9679	return false;
9680
9681    default:
9682      break;
9683    }
9684
9685  /* Otherwise we handle everything else in the move patterns.  */
9686  return true;
9687}
9688
9689/* Determine if it's legal to put X into the constant pool.  This
9690   is not possible for the address of thread-local symbols, which
9691   is checked above.  */
9692
9693static bool
9694ix86_cannot_force_const_mem (rtx x)
9695{
9696  /* We can always put integral constants and vectors in memory.  */
9697  switch (GET_CODE (x))
9698    {
9699    case CONST_INT:
9700    case CONST_DOUBLE:
9701    case CONST_VECTOR:
9702      return false;
9703
9704    default:
9705      break;
9706    }
9707  return !legitimate_constant_p (x);
9708}
9709
9710
9711/* Nonzero if the constant value X is a legitimate general operand
9712   when generating PIC code.  It is given that flag_pic is on and
9713   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
9714
9715bool
9716legitimate_pic_operand_p (rtx x)
9717{
9718  rtx inner;
9719
9720  switch (GET_CODE (x))
9721    {
9722    case CONST:
9723      inner = XEXP (x, 0);
9724      if (GET_CODE (inner) == PLUS
9725	  && CONST_INT_P (XEXP (inner, 1)))
9726	inner = XEXP (inner, 0);
9727
9728      /* Only some unspecs are valid as "constants".  */
9729      if (GET_CODE (inner) == UNSPEC)
9730	switch (XINT (inner, 1))
9731	  {
9732	  case UNSPEC_GOT:
9733	  case UNSPEC_GOTOFF:
9734	  case UNSPEC_PLTOFF:
9735	    return TARGET_64BIT;
9736	  case UNSPEC_TPOFF:
9737	    x = XVECEXP (inner, 0, 0);
9738	    return (GET_CODE (x) == SYMBOL_REF
9739		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9740	  case UNSPEC_MACHOPIC_OFFSET:
9741	    return legitimate_pic_address_disp_p (x);
9742	  default:
9743	    return false;
9744	  }
9745      /* FALLTHRU */
9746
9747    case SYMBOL_REF:
9748    case LABEL_REF:
9749      return legitimate_pic_address_disp_p (x);
9750
9751    default:
9752      return true;
9753    }
9754}
9755
9756/* Determine if a given CONST RTX is a valid memory displacement
9757   in PIC mode.  */
9758
9759int
9760legitimate_pic_address_disp_p (rtx disp)
9761{
9762  bool saw_plus;
9763
9764  /* In 64bit mode we can allow direct addresses of symbols and labels
9765     when they are not dynamic symbols.  */
9766  if (TARGET_64BIT)
9767    {
9768      rtx op0 = disp, op1;
9769
9770      switch (GET_CODE (disp))
9771	{
9772	case LABEL_REF:
9773	  return true;
9774
9775	case CONST:
9776	  if (GET_CODE (XEXP (disp, 0)) != PLUS)
9777	    break;
9778	  op0 = XEXP (XEXP (disp, 0), 0);
9779	  op1 = XEXP (XEXP (disp, 0), 1);
9780	  if (!CONST_INT_P (op1)
9781	      || INTVAL (op1) >= 16*1024*1024
9782	      || INTVAL (op1) < -16*1024*1024)
9783            break;
9784	  if (GET_CODE (op0) == LABEL_REF)
9785	    return true;
9786	  if (GET_CODE (op0) != SYMBOL_REF)
9787	    break;
9788	  /* FALLTHRU */
9789
9790	case SYMBOL_REF:
9791	  /* TLS references should always be enclosed in UNSPEC.  */
9792	  if (SYMBOL_REF_TLS_MODEL (op0))
9793	    return false;
9794	  if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9795	      && ix86_cmodel != CM_LARGE_PIC)
9796	    return true;
9797	  break;
9798
9799	default:
9800	  break;
9801	}
9802    }
9803  if (GET_CODE (disp) != CONST)
9804    return 0;
9805  disp = XEXP (disp, 0);
9806
9807  if (TARGET_64BIT)
9808    {
9809      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
9810         of GOT tables.  We should not need these anyway.  */
9811      if (GET_CODE (disp) != UNSPEC
9812	  || (XINT (disp, 1) != UNSPEC_GOTPCREL
9813	      && XINT (disp, 1) != UNSPEC_GOTOFF
9814	      && XINT (disp, 1) != UNSPEC_PLTOFF))
9815	return 0;
9816
9817      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9818	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9819	return 0;
9820      return 1;
9821    }
9822
9823  saw_plus = false;
9824  if (GET_CODE (disp) == PLUS)
9825    {
9826      if (!CONST_INT_P (XEXP (disp, 1)))
9827	return 0;
9828      disp = XEXP (disp, 0);
9829      saw_plus = true;
9830    }
9831
9832  if (TARGET_MACHO && darwin_local_data_pic (disp))
9833    return 1;
9834
9835  if (GET_CODE (disp) != UNSPEC)
9836    return 0;
9837
9838  switch (XINT (disp, 1))
9839    {
9840    case UNSPEC_GOT:
9841      if (saw_plus)
9842	return false;
9843      /* We need to check for both symbols and labels because VxWorks loads
9844	 text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
9845	 details.  */
9846      return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9847	      || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9848    case UNSPEC_GOTOFF:
9849      /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9850	 While ABI specify also 32bit relocation but we don't produce it in
9851	 small PIC model at all.  */
9852      if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9853	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9854	  && !TARGET_64BIT)
9855        return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9856      return false;
9857    case UNSPEC_GOTTPOFF:
9858    case UNSPEC_GOTNTPOFF:
9859    case UNSPEC_INDNTPOFF:
9860      if (saw_plus)
9861	return false;
9862      disp = XVECEXP (disp, 0, 0);
9863      return (GET_CODE (disp) == SYMBOL_REF
9864	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9865    case UNSPEC_NTPOFF:
9866      disp = XVECEXP (disp, 0, 0);
9867      return (GET_CODE (disp) == SYMBOL_REF
9868	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9869    case UNSPEC_DTPOFF:
9870      disp = XVECEXP (disp, 0, 0);
9871      return (GET_CODE (disp) == SYMBOL_REF
9872	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9873    }
9874
9875  return 0;
9876}
9877
9878/* Recognizes RTL expressions that are valid memory addresses for an
9879   instruction.  The MODE argument is the machine mode for the MEM
9880   expression that wants to use this address.
9881
9882   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
9883   convert common non-canonical forms to canonical form so that they will
9884   be recognized.  */
9885
9886static bool
9887ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9888		           rtx addr, bool strict)
9889{
9890  struct ix86_address parts;
9891  rtx base, index, disp;
9892  HOST_WIDE_INT scale;
9893
9894  if (ix86_decompose_address (addr, &parts) <= 0)
9895    /* Decomposition failed.  */
9896    return false;
9897
9898  base = parts.base;
9899  index = parts.index;
9900  disp = parts.disp;
9901  scale = parts.scale;
9902
9903  /* Validate base register.
9904
9905     Don't allow SUBREG's that span more than a word here.  It can lead to spill
9906     failures when the base is one word out of a two word structure, which is
9907     represented internally as a DImode int.  */
9908
9909  if (base)
9910    {
9911      rtx reg;
9912
9913      if (REG_P (base))
9914  	reg = base;
9915      else if (GET_CODE (base) == SUBREG
9916	       && REG_P (SUBREG_REG (base))
9917	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9918		  <= UNITS_PER_WORD)
9919  	reg = SUBREG_REG (base);
9920      else
9921	/* Base is not a register.  */
9922	return false;
9923
9924      if (GET_MODE (base) != Pmode)
9925	/* Base is not in Pmode.  */
9926	return false;
9927
9928      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9929	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9930	/* Base is not valid.  */
9931	return false;
9932    }
9933
9934  /* Validate index register.
9935
9936     Don't allow SUBREG's that span more than a word here -- same as above.  */
9937
9938  if (index)
9939    {
9940      rtx reg;
9941
9942      if (REG_P (index))
9943  	reg = index;
9944      else if (GET_CODE (index) == SUBREG
9945	       && REG_P (SUBREG_REG (index))
9946	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9947		  <= UNITS_PER_WORD)
9948  	reg = SUBREG_REG (index);
9949      else
9950	/* Index is not a register.  */
9951	return false;
9952
9953      if (GET_MODE (index) != Pmode)
9954	/* Index is not in Pmode.  */
9955	return false;
9956
9957      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9958	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9959	/* Index is not valid.  */
9960	return false;
9961    }
9962
9963  /* Validate scale factor.  */
9964  if (scale != 1)
9965    {
9966      if (!index)
9967	/* Scale without index.  */
9968	return false;
9969
9970      if (scale != 2 && scale != 4 && scale != 8)
9971	/* Scale is not a valid multiplier.  */
9972	return false;
9973    }
9974
9975  /* Validate displacement.  */
9976  if (disp)
9977    {
9978      if (GET_CODE (disp) == CONST
9979	  && GET_CODE (XEXP (disp, 0)) == UNSPEC
9980	  && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9981	switch (XINT (XEXP (disp, 0), 1))
9982	  {
9983	  /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9984	     used.  While ABI specify also 32bit relocations, we don't produce
9985	     them at all and use IP relative instead.  */
9986	  case UNSPEC_GOT:
9987	  case UNSPEC_GOTOFF:
9988	    gcc_assert (flag_pic);
9989	    if (!TARGET_64BIT)
9990	      goto is_legitimate_pic;
9991
9992	    /* 64bit address unspec.  */
9993	    return false;
9994
9995	  case UNSPEC_GOTPCREL:
9996	    gcc_assert (flag_pic);
9997	    goto is_legitimate_pic;
9998
9999	  case UNSPEC_GOTTPOFF:
10000	  case UNSPEC_GOTNTPOFF:
10001	  case UNSPEC_INDNTPOFF:
10002	  case UNSPEC_NTPOFF:
10003	  case UNSPEC_DTPOFF:
10004	    break;
10005
10006	  default:
10007	    /* Invalid address unspec.  */
10008	    return false;
10009	  }
10010
10011      else if (SYMBOLIC_CONST (disp)
10012	       && (flag_pic
10013		   || (TARGET_MACHO
10014#if TARGET_MACHO
10015		       && MACHOPIC_INDIRECT
10016		       && !machopic_operand_p (disp)
10017#endif
10018	       )))
10019	{
10020
10021	is_legitimate_pic:
10022	  if (TARGET_64BIT && (index || base))
10023	    {
10024	      /* foo@dtpoff(%rX) is ok.  */
10025	      if (GET_CODE (disp) != CONST
10026		  || GET_CODE (XEXP (disp, 0)) != PLUS
10027		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10028		  || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10029		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10030		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10031		/* Non-constant pic memory reference.  */
10032		return false;
10033	    }
10034	  else if (! legitimate_pic_address_disp_p (disp))
10035	    /* Displacement is an invalid pic construct.  */
10036	    return false;
10037
10038          /* This code used to verify that a symbolic pic displacement
10039	     includes the pic_offset_table_rtx register.
10040
10041	     While this is good idea, unfortunately these constructs may
10042	     be created by "adds using lea" optimization for incorrect
10043	     code like:
10044
10045	     int a;
10046	     int foo(int i)
10047	       {
10048	         return *(&a+i);
10049	       }
10050
10051	     This code is nonsensical, but results in addressing
10052	     GOT table with pic_offset_table_rtx base.  We can't
10053	     just refuse it easily, since it gets matched by
10054	     "addsi3" pattern, that later gets split to lea in the
10055	     case output register differs from input.  While this
10056	     can be handled by separate addsi pattern for this case
10057	     that never results in lea, this seems to be easier and
10058	     correct fix for crash to disable this test.  */
10059	}
10060      else if (GET_CODE (disp) != LABEL_REF
10061	       && !CONST_INT_P (disp)
10062	       && (GET_CODE (disp) != CONST
10063		   || !legitimate_constant_p (disp))
10064	       && (GET_CODE (disp) != SYMBOL_REF
10065		   || !legitimate_constant_p (disp)))
10066	/* Displacement is not constant.  */
10067	return false;
10068      else if (TARGET_64BIT
10069	       && !x86_64_immediate_operand (disp, VOIDmode))
10070	/* Displacement is out of range.  */
10071	return false;
10072    }
10073
10074  /* Everything looks valid.  */
10075  return true;
10076}
10077
10078/* Determine if a given RTX is a valid constant address.  */
10079
10080bool
10081constant_address_p (rtx x)
10082{
10083  return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10084}
10085
10086/* Return a unique alias set for the GOT.  */
10087
10088static alias_set_type
10089ix86_GOT_alias_set (void)
10090{
10091  static alias_set_type set = -1;
10092  if (set == -1)
10093    set = new_alias_set ();
10094  return set;
10095}
10096
10097/* Return a legitimate reference for ORIG (an address) using the
10098   register REG.  If REG is 0, a new pseudo is generated.
10099
10100   There are two types of references that must be handled:
10101
10102   1. Global data references must load the address from the GOT, via
10103      the PIC reg.  An insn is emitted to do this load, and the reg is
10104      returned.
10105
10106   2. Static data references, constant pool addresses, and code labels
10107      compute the address as an offset from the GOT, whose base is in
10108      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
10109      differentiate them from global data objects.  The returned
10110      address is the PIC reg + an unspec constant.
10111
10112   TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10113   reg also appears in the address.  */
10114
10115static rtx
10116legitimize_pic_address (rtx orig, rtx reg)
10117{
10118  rtx addr = orig;
10119  rtx new_rtx = orig;
10120  rtx base;
10121
10122#if TARGET_MACHO
10123  if (TARGET_MACHO && !TARGET_64BIT)
10124    {
10125      if (reg == 0)
10126	reg = gen_reg_rtx (Pmode);
10127      /* Use the generic Mach-O PIC machinery.  */
10128      return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10129    }
10130#endif
10131
10132  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10133    new_rtx = addr;
10134  else if (TARGET_64BIT
10135	   && ix86_cmodel != CM_SMALL_PIC
10136	   && gotoff_operand (addr, Pmode))
10137    {
10138      rtx tmpreg;
10139      /* This symbol may be referenced via a displacement from the PIC
10140	 base address (@GOTOFF).  */
10141
10142      if (reload_in_progress)
10143	df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10144      if (GET_CODE (addr) == CONST)
10145	addr = XEXP (addr, 0);
10146      if (GET_CODE (addr) == PLUS)
10147	  {
10148            new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10149				      UNSPEC_GOTOFF);
10150	    new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10151	  }
10152	else
10153          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10154      new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10155      if (!reg)
10156        tmpreg = gen_reg_rtx (Pmode);
10157      else
10158	tmpreg = reg;
10159      emit_move_insn (tmpreg, new_rtx);
10160
10161      if (reg != 0)
10162	{
10163	  new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
10164					 tmpreg, 1, OPTAB_DIRECT);
10165	  new_rtx = reg;
10166	}
10167      else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
10168    }
10169  else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
10170    {
10171      /* This symbol may be referenced via a displacement from the PIC
10172	 base address (@GOTOFF).  */
10173
10174      if (reload_in_progress)
10175	df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10176      if (GET_CODE (addr) == CONST)
10177	addr = XEXP (addr, 0);
10178      if (GET_CODE (addr) == PLUS)
10179	  {
10180            new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10181				      UNSPEC_GOTOFF);
10182	    new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10183	  }
10184	else
10185          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10186      new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10187      new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10188
10189      if (reg != 0)
10190	{
10191	  emit_move_insn (reg, new_rtx);
10192	  new_rtx = reg;
10193	}
10194    }
10195  else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10196	   /* We can't use @GOTOFF for text labels on VxWorks;
10197	      see gotoff_operand.  */
10198	   || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10199    {
10200      if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10201        {
10202          if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
10203            return legitimize_dllimport_symbol (addr, true);
10204          if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
10205              && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
10206              && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
10207            {
10208              rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
10209              return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
10210            }
10211        }
10212
10213      if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10214	{
10215	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
10216	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10217	  new_rtx = gen_const_mem (Pmode, new_rtx);
10218	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10219
10220	  if (reg == 0)
10221	    reg = gen_reg_rtx (Pmode);
10222	  /* Use directly gen_movsi, otherwise the address is loaded
10223	     into register for CSE.  We don't want to CSE this addresses,
10224	     instead we CSE addresses from the GOT table, so skip this.  */
10225	  emit_insn (gen_movsi (reg, new_rtx));
10226	  new_rtx = reg;
10227	}
10228      else
10229	{
10230	  /* This symbol must be referenced via a load from the
10231	     Global Offset Table (@GOT).  */
10232
10233	  if (reload_in_progress)
10234	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10235	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10236	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10237	  if (TARGET_64BIT)
10238	    new_rtx = force_reg (Pmode, new_rtx);
10239	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10240	  new_rtx = gen_const_mem (Pmode, new_rtx);
10241	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10242
10243	  if (reg == 0)
10244	    reg = gen_reg_rtx (Pmode);
10245	  emit_move_insn (reg, new_rtx);
10246	  new_rtx = reg;
10247	}
10248    }
10249  else
10250    {
10251      if (CONST_INT_P (addr)
10252	  && !x86_64_immediate_operand (addr, VOIDmode))
10253	{
10254	  if (reg)
10255	    {
10256	      emit_move_insn (reg, addr);
10257	      new_rtx = reg;
10258	    }
10259	  else
10260	    new_rtx = force_reg (Pmode, addr);
10261	}
10262      else if (GET_CODE (addr) == CONST)
10263	{
10264	  addr = XEXP (addr, 0);
10265
10266	  /* We must match stuff we generate before.  Assume the only
10267	     unspecs that can get here are ours.  Not that we could do
10268	     anything with them anyway....  */
10269	  if (GET_CODE (addr) == UNSPEC
10270	      || (GET_CODE (addr) == PLUS
10271		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10272	    return orig;
10273	  gcc_assert (GET_CODE (addr) == PLUS);
10274	}
10275      if (GET_CODE (addr) == PLUS)
10276	{
10277	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10278
10279	  /* Check first to see if this is a constant offset from a @GOTOFF
10280	     symbol reference.  */
10281	  if (gotoff_operand (op0, Pmode)
10282	      && CONST_INT_P (op1))
10283	    {
10284	      if (!TARGET_64BIT)
10285		{
10286		  if (reload_in_progress)
10287		    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10288		  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10289					    UNSPEC_GOTOFF);
10290		  new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10291		  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10292		  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10293
10294		  if (reg != 0)
10295		    {
10296		      emit_move_insn (reg, new_rtx);
10297		      new_rtx = reg;
10298		    }
10299		}
10300	      else
10301		{
10302		  if (INTVAL (op1) < -16*1024*1024
10303		      || INTVAL (op1) >= 16*1024*1024)
10304		    {
10305		      if (!x86_64_immediate_operand (op1, Pmode))
10306			op1 = force_reg (Pmode, op1);
10307		      new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10308		    }
10309		}
10310	    }
10311	  else
10312	    {
10313	      base = legitimize_pic_address (XEXP (addr, 0), reg);
10314	      new_rtx  = legitimize_pic_address (XEXP (addr, 1),
10315						 base == reg ? NULL_RTX : reg);
10316
10317	      if (CONST_INT_P (new_rtx))
10318		new_rtx = plus_constant (base, INTVAL (new_rtx));
10319	      else
10320		{
10321		  if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
10322		    {
10323		      base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
10324		      new_rtx = XEXP (new_rtx, 1);
10325		    }
10326		  new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
10327		}
10328	    }
10329	}
10330    }
10331  return new_rtx;
10332}
10333
10334/* Load the thread pointer.  If TO_REG is true, force it into a register.  */
10335
10336static rtx
10337get_thread_pointer (int to_reg)
10338{
10339  rtx tp, reg, insn;
10340
10341  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10342  if (!to_reg)
10343    return tp;
10344
10345  reg = gen_reg_rtx (Pmode);
10346  insn = gen_rtx_SET (VOIDmode, reg, tp);
10347  insn = emit_insn (insn);
10348
10349  return reg;
10350}
10351
10352/* A subroutine of ix86_legitimize_address and ix86_expand_move.  FOR_MOV is
10353   false if we expect this to be used for a memory address and true if
10354   we expect to load the address into a register.  */
10355
10356static rtx
10357legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
10358{
10359  rtx dest, base, off, pic, tp;
10360  int type;
10361
10362  switch (model)
10363    {
10364    case TLS_MODEL_GLOBAL_DYNAMIC:
10365      dest = gen_reg_rtx (Pmode);
10366      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10367
10368      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10369	{
10370	  rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
10371
10372	  start_sequence ();
10373	  emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
10374	  insns = get_insns ();
10375	  end_sequence ();
10376
10377	  RTL_CONST_CALL_P (insns) = 1;
10378	  emit_libcall_block (insns, dest, rax, x);
10379	}
10380      else if (TARGET_64BIT && TARGET_GNU2_TLS)
10381	emit_insn (gen_tls_global_dynamic_64 (dest, x));
10382      else
10383	emit_insn (gen_tls_global_dynamic_32 (dest, x));
10384
10385      if (TARGET_GNU2_TLS)
10386	{
10387	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
10388
10389	  set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10390	}
10391      break;
10392
10393    case TLS_MODEL_LOCAL_DYNAMIC:
10394      base = gen_reg_rtx (Pmode);
10395      tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10396
10397      if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10398	{
10399	  rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
10400
10401	  start_sequence ();
10402	  emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
10403	  insns = get_insns ();
10404	  end_sequence ();
10405
10406	  note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
10407	  note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
10408	  RTL_CONST_CALL_P (insns) = 1;
10409	  emit_libcall_block (insns, base, rax, note);
10410	}
10411      else if (TARGET_64BIT && TARGET_GNU2_TLS)
10412	emit_insn (gen_tls_local_dynamic_base_64 (base));
10413      else
10414	emit_insn (gen_tls_local_dynamic_base_32 (base));
10415
10416      if (TARGET_GNU2_TLS)
10417	{
10418	  rtx x = ix86_tls_module_base ();
10419
10420	  set_unique_reg_note (get_last_insn (), REG_EQUIV,
10421			       gen_rtx_MINUS (Pmode, x, tp));
10422	}
10423
10424      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
10425      off = gen_rtx_CONST (Pmode, off);
10426
10427      dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
10428
10429      if (TARGET_GNU2_TLS)
10430	{
10431	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
10432
10433	  set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10434	}
10435
10436      break;
10437
10438    case TLS_MODEL_INITIAL_EXEC:
10439      if (TARGET_64BIT)
10440	{
10441	  if (TARGET_SUN_TLS)
10442	    {
10443	      /* The Sun linker took the AMD64 TLS spec literally
10444		 and can only handle %rax as destination of the
10445		 initial executable code sequence.  */
10446
10447	      dest = gen_reg_rtx (Pmode);
10448	      emit_insn (gen_tls_initial_exec_64_sun (dest, x));
10449	      return dest;
10450	    }
10451
10452	  pic = NULL;
10453	  type = UNSPEC_GOTNTPOFF;
10454	}
10455      else if (flag_pic)
10456	{
10457	  if (reload_in_progress)
10458	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10459	  pic = pic_offset_table_rtx;
10460	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
10461	}
10462      else if (!TARGET_ANY_GNU_TLS)
10463	{
10464	  pic = gen_reg_rtx (Pmode);
10465	  emit_insn (gen_set_got (pic));
10466	  type = UNSPEC_GOTTPOFF;
10467	}
10468      else
10469	{
10470	  pic = NULL;
10471	  type = UNSPEC_INDNTPOFF;
10472	}
10473
10474      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
10475      off = gen_rtx_CONST (Pmode, off);
10476      if (pic)
10477	off = gen_rtx_PLUS (Pmode, pic, off);
10478      off = gen_const_mem (Pmode, off);
10479      set_mem_alias_set (off, ix86_GOT_alias_set ());
10480
10481      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10482	{
10483          base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10484	  off = force_reg (Pmode, off);
10485	  return gen_rtx_PLUS (Pmode, base, off);
10486	}
10487      else
10488	{
10489	  base = get_thread_pointer (true);
10490	  dest = gen_reg_rtx (Pmode);
10491	  emit_insn (gen_subsi3 (dest, base, off));
10492	}
10493      break;
10494
10495    case TLS_MODEL_LOCAL_EXEC:
10496      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
10497			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10498			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
10499      off = gen_rtx_CONST (Pmode, off);
10500
10501      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10502	{
10503	  base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10504	  return gen_rtx_PLUS (Pmode, base, off);
10505	}
10506      else
10507	{
10508	  base = get_thread_pointer (true);
10509	  dest = gen_reg_rtx (Pmode);
10510	  emit_insn (gen_subsi3 (dest, base, off));
10511	}
10512      break;
10513
10514    default:
10515      gcc_unreachable ();
10516    }
10517
10518  return dest;
10519}
10520
10521/* Create or return the unique __imp_DECL dllimport symbol corresponding
10522   to symbol DECL.  */
10523
10524static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
10525  htab_t dllimport_map;
10526
10527static tree
10528get_dllimport_decl (tree decl)
10529{
10530  struct tree_map *h, in;
10531  void **loc;
10532  const char *name;
10533  const char *prefix;
10534  size_t namelen, prefixlen;
10535  char *imp_name;
10536  tree to;
10537  rtx rtl;
10538
10539  if (!dllimport_map)
10540    dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
10541
10542  in.hash = htab_hash_pointer (decl);
10543  in.base.from = decl;
10544  loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
10545  h = (struct tree_map *) *loc;
10546  if (h)
10547    return h->to;
10548
10549  *loc = h = GGC_NEW (struct tree_map);
10550  h->hash = in.hash;
10551  h->base.from = decl;
10552  h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
10553			   VAR_DECL, NULL, ptr_type_node);
10554  DECL_ARTIFICIAL (to) = 1;
10555  DECL_IGNORED_P (to) = 1;
10556  DECL_EXTERNAL (to) = 1;
10557  TREE_READONLY (to) = 1;
10558
10559  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
10560  name = targetm.strip_name_encoding (name);
10561  prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
10562    ? "*__imp_" : "*__imp__";
10563  namelen = strlen (name);
10564  prefixlen = strlen (prefix);
10565  imp_name = (char *) alloca (namelen + prefixlen + 1);
10566  memcpy (imp_name, prefix, prefixlen);
10567  memcpy (imp_name + prefixlen, name, namelen + 1);
10568
10569  name = ggc_alloc_string (imp_name, namelen + prefixlen);
10570  rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10571  SET_SYMBOL_REF_DECL (rtl, to);
10572  SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10573
10574  rtl = gen_const_mem (Pmode, rtl);
10575  set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10576
10577  SET_DECL_RTL (to, rtl);
10578  SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10579
10580  return to;
10581}
10582
10583/* Expand SYMBOL into its corresponding dllimport symbol.  WANT_REG is
10584   true if we require the result be a register.  */
10585
10586static rtx
10587legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10588{
10589  tree imp_decl;
10590  rtx x;
10591
10592  gcc_assert (SYMBOL_REF_DECL (symbol));
10593  imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10594
10595  x = DECL_RTL (imp_decl);
10596  if (want_reg)
10597    x = force_reg (Pmode, x);
10598  return x;
10599}
10600
10601/* Try machine-dependent ways of modifying an illegitimate address
10602   to be legitimate.  If we find one, return the new, valid address.
10603   This macro is used in only one place: `memory_address' in explow.c.
10604
10605   OLDX is the address as it was before break_out_memory_refs was called.
10606   In some cases it is useful to look at this to decide what needs to be done.
10607
10608   It is always safe for this macro to do nothing.  It exists to recognize
10609   opportunities to optimize the output.
10610
10611   For the 80386, we handle X+REG by loading X into a register R and
10612   using R+REG.  R will go in a general reg and indexing will be used.
10613   However, if REG is a broken-out memory address or multiplication,
10614   nothing needs to be done because REG can certainly go in a general reg.
10615
10616   When -fpic is used, special handling is needed for symbolic references.
10617   See comments by legitimize_pic_address in i386.c for details.  */
10618
10619static rtx
10620ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10621			 enum machine_mode mode)
10622{
10623  int changed = 0;
10624  unsigned log;
10625
10626  log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10627  if (log)
10628    return legitimize_tls_address (x, (enum tls_model) log, false);
10629  if (GET_CODE (x) == CONST
10630      && GET_CODE (XEXP (x, 0)) == PLUS
10631      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10632      && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10633    {
10634      rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10635				      (enum tls_model) log, false);
10636      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10637    }
10638
10639  if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10640    {
10641      if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10642	return legitimize_dllimport_symbol (x, true);
10643      if (GET_CODE (x) == CONST
10644	  && GET_CODE (XEXP (x, 0)) == PLUS
10645	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10646	  && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10647	{
10648	  rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10649	  return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10650	}
10651    }
10652
10653  if (flag_pic && SYMBOLIC_CONST (x))
10654    return legitimize_pic_address (x, 0);
10655
10656  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10657  if (GET_CODE (x) == ASHIFT
10658      && CONST_INT_P (XEXP (x, 1))
10659      && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10660    {
10661      changed = 1;
10662      log = INTVAL (XEXP (x, 1));
10663      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10664			GEN_INT (1 << log));
10665    }
10666
10667  if (GET_CODE (x) == PLUS)
10668    {
10669      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
10670
10671      if (GET_CODE (XEXP (x, 0)) == ASHIFT
10672	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10673	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10674	{
10675	  changed = 1;
10676	  log = INTVAL (XEXP (XEXP (x, 0), 1));
10677	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
10678				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10679				      GEN_INT (1 << log));
10680	}
10681
10682      if (GET_CODE (XEXP (x, 1)) == ASHIFT
10683	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10684	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10685	{
10686	  changed = 1;
10687	  log = INTVAL (XEXP (XEXP (x, 1), 1));
10688	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
10689				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10690				      GEN_INT (1 << log));
10691	}
10692
10693      /* Put multiply first if it isn't already.  */
10694      if (GET_CODE (XEXP (x, 1)) == MULT)
10695	{
10696	  rtx tmp = XEXP (x, 0);
10697	  XEXP (x, 0) = XEXP (x, 1);
10698	  XEXP (x, 1) = tmp;
10699	  changed = 1;
10700	}
10701
10702      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10703	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
10704	 created by virtual register instantiation, register elimination, and
10705	 similar optimizations.  */
10706      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10707	{
10708	  changed = 1;
10709	  x = gen_rtx_PLUS (Pmode,
10710			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
10711					  XEXP (XEXP (x, 1), 0)),
10712			    XEXP (XEXP (x, 1), 1));
10713	}
10714
10715      /* Canonicalize
10716	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10717	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
10718      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10719	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10720	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10721	       && CONSTANT_P (XEXP (x, 1)))
10722	{
10723	  rtx constant;
10724	  rtx other = NULL_RTX;
10725
10726	  if (CONST_INT_P (XEXP (x, 1)))
10727	    {
10728	      constant = XEXP (x, 1);
10729	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10730	    }
10731	  else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10732	    {
10733	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10734	      other = XEXP (x, 1);
10735	    }
10736	  else
10737	    constant = 0;
10738
10739	  if (constant)
10740	    {
10741	      changed = 1;
10742	      x = gen_rtx_PLUS (Pmode,
10743				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10744					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
10745				plus_constant (other, INTVAL (constant)));
10746	    }
10747	}
10748
10749      if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10750	return x;
10751
10752      if (GET_CODE (XEXP (x, 0)) == MULT)
10753	{
10754	  changed = 1;
10755	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10756	}
10757
10758      if (GET_CODE (XEXP (x, 1)) == MULT)
10759	{
10760	  changed = 1;
10761	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10762	}
10763
10764      if (changed
10765	  && REG_P (XEXP (x, 1))
10766	  && REG_P (XEXP (x, 0)))
10767	return x;
10768
10769      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10770	{
10771	  changed = 1;
10772	  x = legitimize_pic_address (x, 0);
10773	}
10774
10775      if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10776	return x;
10777
10778      if (REG_P (XEXP (x, 0)))
10779	{
10780	  rtx temp = gen_reg_rtx (Pmode);
10781	  rtx val  = force_operand (XEXP (x, 1), temp);
10782	  if (val != temp)
10783	    emit_move_insn (temp, val);
10784
10785	  XEXP (x, 1) = temp;
10786	  return x;
10787	}
10788
10789      else if (REG_P (XEXP (x, 1)))
10790	{
10791	  rtx temp = gen_reg_rtx (Pmode);
10792	  rtx val  = force_operand (XEXP (x, 0), temp);
10793	  if (val != temp)
10794	    emit_move_insn (temp, val);
10795
10796	  XEXP (x, 0) = temp;
10797	  return x;
10798	}
10799    }
10800
10801  return x;
10802}
10803
10804/* Print an integer constant expression in assembler syntax.  Addition
10805   and subtraction are the only arithmetic that may appear in these
10806   expressions.  FILE is the stdio stream to write to, X is the rtx, and
10807   CODE is the operand print code from the output string.  */
10808
10809static void
10810output_pic_addr_const (FILE *file, rtx x, int code)
10811{
10812  char buf[256];
10813
10814  switch (GET_CODE (x))
10815    {
10816    case PC:
10817      gcc_assert (flag_pic);
10818      putc ('.', file);
10819      break;
10820
10821    case SYMBOL_REF:
10822      if (! TARGET_MACHO || TARGET_64BIT)
10823	output_addr_const (file, x);
10824      else
10825	{
10826	  const char *name = XSTR (x, 0);
10827
10828	  /* Mark the decl as referenced so that cgraph will
10829	     output the function.  */
10830	  if (SYMBOL_REF_DECL (x))
10831	    mark_decl_referenced (SYMBOL_REF_DECL (x));
10832
10833#if TARGET_MACHO
10834	  if (MACHOPIC_INDIRECT
10835	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10836	    name = machopic_indirection_name (x, /*stub_p=*/true);
10837#endif
10838	  assemble_name (file, name);
10839	}
10840      if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10841	  && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10842	fputs ("@PLT", file);
10843      break;
10844
10845    case LABEL_REF:
10846      x = XEXP (x, 0);
10847      /* FALLTHRU */
10848    case CODE_LABEL:
10849      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10850      assemble_name (asm_out_file, buf);
10851      break;
10852
10853    case CONST_INT:
10854      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10855      break;
10856
10857    case CONST:
10858      /* This used to output parentheses around the expression,
10859	 but that does not work on the 386 (either ATT or BSD assembler).  */
10860      output_pic_addr_const (file, XEXP (x, 0), code);
10861      break;
10862
10863    case CONST_DOUBLE:
10864      if (GET_MODE (x) == VOIDmode)
10865	{
10866	  /* We can use %d if the number is <32 bits and positive.  */
10867	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10868	    fprintf (file, "0x%lx%08lx",
10869		     (unsigned long) CONST_DOUBLE_HIGH (x),
10870		     (unsigned long) CONST_DOUBLE_LOW (x));
10871	  else
10872	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10873	}
10874      else
10875	/* We can't handle floating point constants;
10876	   PRINT_OPERAND must handle them.  */
10877	output_operand_lossage ("floating constant misused");
10878      break;
10879
10880    case PLUS:
10881      /* Some assemblers need integer constants to appear first.  */
10882      if (CONST_INT_P (XEXP (x, 0)))
10883	{
10884	  output_pic_addr_const (file, XEXP (x, 0), code);
10885	  putc ('+', file);
10886	  output_pic_addr_const (file, XEXP (x, 1), code);
10887	}
10888      else
10889	{
10890	  gcc_assert (CONST_INT_P (XEXP (x, 1)));
10891	  output_pic_addr_const (file, XEXP (x, 1), code);
10892	  putc ('+', file);
10893	  output_pic_addr_const (file, XEXP (x, 0), code);
10894	}
10895      break;
10896
10897    case MINUS:
10898      if (!TARGET_MACHO)
10899	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10900      output_pic_addr_const (file, XEXP (x, 0), code);
10901      putc ('-', file);
10902      output_pic_addr_const (file, XEXP (x, 1), code);
10903      if (!TARGET_MACHO)
10904	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10905      break;
10906
10907     case UNSPEC:
10908       gcc_assert (XVECLEN (x, 0) == 1);
10909       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10910       switch (XINT (x, 1))
10911	{
10912	case UNSPEC_GOT:
10913	  fputs ("@GOT", file);
10914	  break;
10915	case UNSPEC_GOTOFF:
10916	  fputs ("@GOTOFF", file);
10917	  break;
10918	case UNSPEC_PLTOFF:
10919	  fputs ("@PLTOFF", file);
10920	  break;
10921	case UNSPEC_GOTPCREL:
10922	  fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10923		 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10924	  break;
10925	case UNSPEC_GOTTPOFF:
10926	  /* FIXME: This might be @TPOFF in Sun ld too.  */
10927	  fputs ("@gottpoff", file);
10928	  break;
10929	case UNSPEC_TPOFF:
10930	  fputs ("@tpoff", file);
10931	  break;
10932	case UNSPEC_NTPOFF:
10933	  if (TARGET_64BIT)
10934	    fputs ("@tpoff", file);
10935	  else
10936	    fputs ("@ntpoff", file);
10937	  break;
10938	case UNSPEC_DTPOFF:
10939	  fputs ("@dtpoff", file);
10940	  break;
10941	case UNSPEC_GOTNTPOFF:
10942	  if (TARGET_64BIT)
10943	    fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10944		   "@gottpoff(%rip)": "@gottpoff[rip]", file);
10945	  else
10946	    fputs ("@gotntpoff", file);
10947	  break;
10948	case UNSPEC_INDNTPOFF:
10949	  fputs ("@indntpoff", file);
10950	  break;
10951#if TARGET_MACHO
10952	case UNSPEC_MACHOPIC_OFFSET:
10953	  putc ('-', file);
10954	  machopic_output_function_base_name (file);
10955	  break;
10956#endif
10957	default:
10958	  output_operand_lossage ("invalid UNSPEC as operand");
10959	  break;
10960	}
10961       break;
10962
10963    default:
10964      output_operand_lossage ("invalid expression as operand");
10965    }
10966}
10967
10968/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10969   We need to emit DTP-relative relocations.  */
10970
10971static void ATTRIBUTE_UNUSED
10972i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10973{
10974  fputs (ASM_LONG, file);
10975  output_addr_const (file, x);
10976  fputs ("@dtpoff", file);
10977  switch (size)
10978    {
10979    case 4:
10980      break;
10981    case 8:
10982      fputs (", 0", file);
10983      break;
10984    default:
10985      gcc_unreachable ();
10986   }
10987}
10988
10989/* Return true if X is a representation of the PIC register.  This copes
10990   with calls from ix86_find_base_term, where the register might have
10991   been replaced by a cselib value.  */
10992
10993static bool
10994ix86_pic_register_p (rtx x)
10995{
10996  if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
10997    return (pic_offset_table_rtx
10998	    && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10999  else
11000    return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11001}
11002
11003/* In the name of slightly smaller debug output, and to cater to
11004   general assembler lossage, recognize PIC+GOTOFF and turn it back
11005   into a direct symbol reference.
11006
11007   On Darwin, this is necessary to avoid a crash, because Darwin
11008   has a different PIC label for each routine but the DWARF debugging
11009   information is not associated with any particular routine, so it's
11010   necessary to remove references to the PIC label from RTL stored by
11011   the DWARF output code.  */
11012
11013static rtx
11014ix86_delegitimize_address (rtx x)
11015{
11016  rtx orig_x = delegitimize_mem_from_attrs (x);
11017  /* addend is NULL or some rtx if x is something+GOTOFF where
11018     something doesn't include the PIC register.  */
11019  rtx addend = NULL_RTX;
11020  /* reg_addend is NULL or a multiple of some register.  */
11021  rtx reg_addend = NULL_RTX;
11022  /* const_addend is NULL or a const_int.  */
11023  rtx const_addend = NULL_RTX;
11024  /* This is the result, or NULL.  */
11025  rtx result = NULL_RTX;
11026
11027  x = orig_x;
11028
11029  if (MEM_P (x))
11030    x = XEXP (x, 0);
11031
11032  if (TARGET_64BIT)
11033    {
11034      if (GET_CODE (x) != CONST
11035	  || GET_CODE (XEXP (x, 0)) != UNSPEC
11036	  || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
11037	  || !MEM_P (orig_x))
11038	return orig_x;
11039      x = XVECEXP (XEXP (x, 0), 0, 0);
11040      if (GET_MODE (orig_x) != Pmode)
11041	{
11042	  x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
11043	  if (x == NULL_RTX)
11044	    return orig_x;
11045	}
11046      return x;
11047    }
11048
11049  if (GET_CODE (x) != PLUS
11050      || GET_CODE (XEXP (x, 1)) != CONST)
11051    return orig_x;
11052
11053  if (ix86_pic_register_p (XEXP (x, 0)))
11054    /* %ebx + GOT/GOTOFF */
11055    ;
11056  else if (GET_CODE (XEXP (x, 0)) == PLUS)
11057    {
11058      /* %ebx + %reg * scale + GOT/GOTOFF */
11059      reg_addend = XEXP (x, 0);
11060      if (ix86_pic_register_p (XEXP (reg_addend, 0)))
11061	reg_addend = XEXP (reg_addend, 1);
11062      else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
11063	reg_addend = XEXP (reg_addend, 0);
11064      else
11065	{
11066	  reg_addend = NULL_RTX;
11067	  addend = XEXP (x, 0);
11068	}
11069    }
11070  else
11071    addend = XEXP (x, 0);
11072
11073  x = XEXP (XEXP (x, 1), 0);
11074  if (GET_CODE (x) == PLUS
11075      && CONST_INT_P (XEXP (x, 1)))
11076    {
11077      const_addend = XEXP (x, 1);
11078      x = XEXP (x, 0);
11079    }
11080
11081  if (GET_CODE (x) == UNSPEC
11082      && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
11083	  || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
11084    result = XVECEXP (x, 0, 0);
11085
11086  if (TARGET_MACHO && darwin_local_data_pic (x)
11087      && !MEM_P (orig_x))
11088    result = XVECEXP (x, 0, 0);
11089
11090  if (! result)
11091    return orig_x;
11092
11093  if (const_addend)
11094    result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
11095  if (reg_addend)
11096    result = gen_rtx_PLUS (Pmode, reg_addend, result);
11097  if (addend)
11098    {
11099      /* If the rest of original X doesn't involve the PIC register, add
11100	 addend and subtract pic_offset_table_rtx.  This can happen e.g.
11101	 for code like:
11102	 leal (%ebx, %ecx, 4), %ecx
11103	 ...
11104	 movl foo@GOTOFF(%ecx), %edx
11105	 in which case we return (%ecx - %ebx) + foo.  */
11106      if (pic_offset_table_rtx)
11107        result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
11108						     pic_offset_table_rtx),
11109			       result);
11110      else
11111	return orig_x;
11112    }
11113  if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
11114    {
11115      result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
11116      if (result == NULL_RTX)
11117	return orig_x;
11118    }
11119  return result;
11120}
11121
11122/* If X is a machine specific address (i.e. a symbol or label being
11123   referenced as a displacement from the GOT implemented using an
11124   UNSPEC), then return the base term.  Otherwise return X.  */
11125
11126rtx
11127ix86_find_base_term (rtx x)
11128{
11129  rtx term;
11130
11131  if (TARGET_64BIT)
11132    {
11133      if (GET_CODE (x) != CONST)
11134	return x;
11135      term = XEXP (x, 0);
11136      if (GET_CODE (term) == PLUS
11137	  && (CONST_INT_P (XEXP (term, 1))
11138	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
11139	term = XEXP (term, 0);
11140      if (GET_CODE (term) != UNSPEC
11141	  || XINT (term, 1) != UNSPEC_GOTPCREL)
11142	return x;
11143
11144      return XVECEXP (term, 0, 0);
11145    }
11146
11147  return ix86_delegitimize_address (x);
11148}
11149
11150static void
11151put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
11152		    int fp, FILE *file)
11153{
11154  const char *suffix;
11155
11156  if (mode == CCFPmode || mode == CCFPUmode)
11157    {
11158      code = ix86_fp_compare_code_to_integer (code);
11159      mode = CCmode;
11160    }
11161  if (reverse)
11162    code = reverse_condition (code);
11163
11164  switch (code)
11165    {
11166    case EQ:
11167      switch (mode)
11168	{
11169	case CCAmode:
11170	  suffix = "a";
11171	  break;
11172
11173	case CCCmode:
11174	  suffix = "c";
11175	  break;
11176
11177	case CCOmode:
11178	  suffix = "o";
11179	  break;
11180
11181	case CCSmode:
11182	  suffix = "s";
11183	  break;
11184
11185	default:
11186	  suffix = "e";
11187	}
11188      break;
11189    case NE:
11190      switch (mode)
11191	{
11192	case CCAmode:
11193	  suffix = "na";
11194	  break;
11195
11196	case CCCmode:
11197	  suffix = "nc";
11198	  break;
11199
11200	case CCOmode:
11201	  suffix = "no";
11202	  break;
11203
11204	case CCSmode:
11205	  suffix = "ns";
11206	  break;
11207
11208	default:
11209	  suffix = "ne";
11210	}
11211      break;
11212    case GT:
11213      gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
11214      suffix = "g";
11215      break;
11216    case GTU:
11217      /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
11218	 Those same assemblers have the same but opposite lossage on cmov.  */
11219      if (mode == CCmode)
11220	suffix = fp ? "nbe" : "a";
11221      else if (mode == CCCmode)
11222	suffix = "b";
11223      else
11224	gcc_unreachable ();
11225      break;
11226    case LT:
11227      switch (mode)
11228	{
11229	case CCNOmode:
11230	case CCGOCmode:
11231	  suffix = "s";
11232	  break;
11233
11234	case CCmode:
11235	case CCGCmode:
11236	  suffix = "l";
11237	  break;
11238
11239	default:
11240	  gcc_unreachable ();
11241	}
11242      break;
11243    case LTU:
11244      gcc_assert (mode == CCmode || mode == CCCmode);
11245      suffix = "b";
11246      break;
11247    case GE:
11248      switch (mode)
11249	{
11250	case CCNOmode:
11251	case CCGOCmode:
11252	  suffix = "ns";
11253	  break;
11254
11255	case CCmode:
11256	case CCGCmode:
11257	  suffix = "ge";
11258	  break;
11259
11260	default:
11261	  gcc_unreachable ();
11262	}
11263      break;
11264    case GEU:
11265      /* ??? As above.  */
11266      gcc_assert (mode == CCmode || mode == CCCmode);
11267      suffix = fp ? "nb" : "ae";
11268      break;
11269    case LE:
11270      gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
11271      suffix = "le";
11272      break;
11273    case LEU:
11274      /* ??? As above.  */
11275      if (mode == CCmode)
11276	suffix = "be";
11277      else if (mode == CCCmode)
11278	suffix = fp ? "nb" : "ae";
11279      else
11280	gcc_unreachable ();
11281      break;
11282    case UNORDERED:
11283      suffix = fp ? "u" : "p";
11284      break;
11285    case ORDERED:
11286      suffix = fp ? "nu" : "np";
11287      break;
11288    default:
11289      gcc_unreachable ();
11290    }
11291  fputs (suffix, file);
11292}
11293
11294/* Print the name of register X to FILE based on its machine mode and number.
11295   If CODE is 'w', pretend the mode is HImode.
11296   If CODE is 'b', pretend the mode is QImode.
11297   If CODE is 'k', pretend the mode is SImode.
11298   If CODE is 'q', pretend the mode is DImode.
11299   If CODE is 'x', pretend the mode is V4SFmode.
11300   If CODE is 't', pretend the mode is V8SFmode.
11301   If CODE is 'h', pretend the reg is the 'high' byte register.
11302   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
11303   If CODE is 'd', duplicate the operand for AVX instruction.
11304 */
11305
11306void
11307print_reg (rtx x, int code, FILE *file)
11308{
11309  const char *reg;
11310  bool duplicated = code == 'd' && TARGET_AVX;
11311
11312  gcc_assert (x == pc_rtx
11313	      || (REGNO (x) != ARG_POINTER_REGNUM
11314		  && REGNO (x) != FRAME_POINTER_REGNUM
11315		  && REGNO (x) != FLAGS_REG
11316		  && REGNO (x) != FPSR_REG
11317		  && REGNO (x) != FPCR_REG));
11318
11319  if (ASSEMBLER_DIALECT == ASM_ATT)
11320    putc ('%', file);
11321
11322  if (x == pc_rtx)
11323    {
11324      gcc_assert (TARGET_64BIT);
11325      fputs ("rip", file);
11326      return;
11327    }
11328
11329  if (code == 'w' || MMX_REG_P (x))
11330    code = 2;
11331  else if (code == 'b')
11332    code = 1;
11333  else if (code == 'k')
11334    code = 4;
11335  else if (code == 'q')
11336    code = 8;
11337  else if (code == 'y')
11338    code = 3;
11339  else if (code == 'h')
11340    code = 0;
11341  else if (code == 'x')
11342    code = 16;
11343  else if (code == 't')
11344    code = 32;
11345  else
11346    code = GET_MODE_SIZE (GET_MODE (x));
11347
11348  /* Irritatingly, AMD extended registers use different naming convention
11349     from the normal registers.  */
11350  if (REX_INT_REG_P (x))
11351    {
11352      gcc_assert (TARGET_64BIT);
11353      switch (code)
11354	{
11355	  case 0:
11356	    error ("extended registers have no high halves");
11357	    break;
11358	  case 1:
11359	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
11360	    break;
11361	  case 2:
11362	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
11363	    break;
11364	  case 4:
11365	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
11366	    break;
11367	  case 8:
11368	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
11369	    break;
11370	  default:
11371	    error ("unsupported operand size for extended register");
11372	    break;
11373	}
11374      return;
11375    }
11376
11377  reg = NULL;
11378  switch (code)
11379    {
11380    case 3:
11381      if (STACK_TOP_P (x))
11382	{
11383	  reg = "st(0)";
11384	  break;
11385	}
11386      /* FALLTHRU */
11387    case 8:
11388    case 4:
11389    case 12:
11390      if (! ANY_FP_REG_P (x))
11391	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
11392      /* FALLTHRU */
11393    case 16:
11394    case 2:
11395    normal:
11396      reg = hi_reg_name[REGNO (x)];
11397      break;
11398    case 1:
11399      if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
11400	goto normal;
11401      reg = qi_reg_name[REGNO (x)];
11402      break;
11403    case 0:
11404      if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
11405	goto normal;
11406      reg = qi_high_reg_name[REGNO (x)];
11407      break;
11408    case 32:
11409      if (SSE_REG_P (x))
11410	{
11411	  gcc_assert (!duplicated);
11412	  putc ('y', file);
11413	  fputs (hi_reg_name[REGNO (x)] + 1, file);
11414	  return;
11415	}
11416      break;
11417    default:
11418      gcc_unreachable ();
11419    }
11420
11421  fputs (reg, file);
11422  if (duplicated)
11423    {
11424      if (ASSEMBLER_DIALECT == ASM_ATT)
11425	fprintf (file, ", %%%s", reg);
11426      else
11427	fprintf (file, ", %s", reg);
11428    }
11429}
11430
11431/* Locate some local-dynamic symbol still in use by this function
11432   so that we can print its name in some tls_local_dynamic_base
11433   pattern.  */
11434
11435static int
11436get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11437{
11438  rtx x = *px;
11439
11440  if (GET_CODE (x) == SYMBOL_REF
11441      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11442    {
11443      cfun->machine->some_ld_name = XSTR (x, 0);
11444      return 1;
11445    }
11446
11447  return 0;
11448}
11449
11450static const char *
11451get_some_local_dynamic_name (void)
11452{
11453  rtx insn;
11454
11455  if (cfun->machine->some_ld_name)
11456    return cfun->machine->some_ld_name;
11457
11458  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11459    if (NONDEBUG_INSN_P (insn)
11460	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11461      return cfun->machine->some_ld_name;
11462
11463  return NULL;
11464}
11465
11466/* Meaning of CODE:
11467   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
11468   C -- print opcode suffix for set/cmov insn.
11469   c -- like C, but print reversed condition
11470   F,f -- likewise, but for floating-point.
11471   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
11472        otherwise nothing
11473   R -- print the prefix for register names.
11474   z -- print the opcode suffix for the size of the current operand.
11475   Z -- likewise, with special suffixes for x87 instructions.
11476   * -- print a star (in certain assembler syntax)
11477   A -- print an absolute memory reference.
11478   w -- print the operand as if it's a "word" (HImode) even if it isn't.
11479   s -- print a shift double count, followed by the assemblers argument
11480	delimiter.
11481   b -- print the QImode name of the register for the indicated operand.
11482	%b0 would print %al if operands[0] is reg 0.
11483   w --  likewise, print the HImode name of the register.
11484   k --  likewise, print the SImode name of the register.
11485   q --  likewise, print the DImode name of the register.
11486   x --  likewise, print the V4SFmode name of the register.
11487   t --  likewise, print the V8SFmode name of the register.
11488   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
11489   y -- print "st(0)" instead of "st" as a register.
11490   d -- print duplicated register operand for AVX instruction.
11491   D -- print condition for SSE cmp instruction.
11492   P -- if PIC, print an @PLT suffix.
11493   X -- don't print any sort of PIC '@' suffix for a symbol.
11494   & -- print some in-use local-dynamic symbol name.
11495   H -- print a memory address offset by 8; used for sse high-parts
11496   Y -- print condition for XOP pcom* instruction.
11497   + -- print a branch hint as 'cs' or 'ds' prefix
11498   ; -- print a semicolon (after prefixes due to bug in older gas).
11499 */
11500
11501void
11502print_operand (FILE *file, rtx x, int code)
11503{
11504  if (code)
11505    {
11506      switch (code)
11507	{
11508	case '*':
11509	  if (ASSEMBLER_DIALECT == ASM_ATT)
11510	    putc ('*', file);
11511	  return;
11512
11513	case '&':
11514	  {
11515	    const char *name = get_some_local_dynamic_name ();
11516	    if (name == NULL)
11517	      output_operand_lossage ("'%%&' used without any "
11518				      "local dynamic TLS references");
11519	    else
11520	      assemble_name (file, name);
11521	    return;
11522	  }
11523
11524	case 'A':
11525	  switch (ASSEMBLER_DIALECT)
11526	    {
11527	    case ASM_ATT:
11528	      putc ('*', file);
11529	      break;
11530
11531	    case ASM_INTEL:
11532	      /* Intel syntax. For absolute addresses, registers should not
11533		 be surrounded by braces.  */
11534	      if (!REG_P (x))
11535		{
11536		  putc ('[', file);
11537		  PRINT_OPERAND (file, x, 0);
11538		  putc (']', file);
11539		  return;
11540		}
11541	      break;
11542
11543	    default:
11544	      gcc_unreachable ();
11545	    }
11546
11547	  PRINT_OPERAND (file, x, 0);
11548	  return;
11549
11550
11551	case 'L':
11552	  if (ASSEMBLER_DIALECT == ASM_ATT)
11553	    putc ('l', file);
11554	  return;
11555
11556	case 'W':
11557	  if (ASSEMBLER_DIALECT == ASM_ATT)
11558	    putc ('w', file);
11559	  return;
11560
11561	case 'B':
11562	  if (ASSEMBLER_DIALECT == ASM_ATT)
11563	    putc ('b', file);
11564	  return;
11565
11566	case 'Q':
11567	  if (ASSEMBLER_DIALECT == ASM_ATT)
11568	    putc ('l', file);
11569	  return;
11570
11571	case 'S':
11572	  if (ASSEMBLER_DIALECT == ASM_ATT)
11573	    putc ('s', file);
11574	  return;
11575
11576	case 'T':
11577	  if (ASSEMBLER_DIALECT == ASM_ATT)
11578	    putc ('t', file);
11579	  return;
11580
11581	case 'z':
11582	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11583	    {
11584	      /* Opcodes don't get size suffixes if using Intel opcodes.  */
11585	      if (ASSEMBLER_DIALECT == ASM_INTEL)
11586		return;
11587
11588	      switch (GET_MODE_SIZE (GET_MODE (x)))
11589		{
11590		case 1:
11591		  putc ('b', file);
11592		  return;
11593
11594		case 2:
11595		  putc ('w', file);
11596		  return;
11597
11598		case 4:
11599		  putc ('l', file);
11600		  return;
11601
11602		case 8:
11603		  putc ('q', file);
11604		  return;
11605
11606		default:
11607		  output_operand_lossage
11608		    ("invalid operand size for operand code '%c'", code);
11609		  return;
11610		}
11611	    }
11612
11613	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11614	    warning
11615	      (0, "non-integer operand used with operand code '%c'", code);
11616	  /* FALLTHRU */
11617
11618	case 'Z':
11619	  /* 387 opcodes don't get size suffixes if using Intel opcodes.  */
11620	  if (ASSEMBLER_DIALECT == ASM_INTEL)
11621	    return;
11622
11623	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11624	    {
11625	      switch (GET_MODE_SIZE (GET_MODE (x)))
11626		{
11627		case 2:
11628#ifdef HAVE_AS_IX86_FILDS
11629		  putc ('s', file);
11630#endif
11631		  return;
11632
11633		case 4:
11634		  putc ('l', file);
11635		  return;
11636
11637		case 8:
11638#ifdef HAVE_AS_IX86_FILDQ
11639		  putc ('q', file);
11640#else
11641		  fputs ("ll", file);
11642#endif
11643		  return;
11644
11645		default:
11646		  break;
11647		}
11648	    }
11649	  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11650	    {
11651	      /* 387 opcodes don't get size suffixes
11652		 if the operands are registers.  */
11653	      if (STACK_REG_P (x))
11654		return;
11655
11656	      switch (GET_MODE_SIZE (GET_MODE (x)))
11657		{
11658		case 4:
11659		  putc ('s', file);
11660		  return;
11661
11662		case 8:
11663		  putc ('l', file);
11664		  return;
11665
11666		case 12:
11667		case 16:
11668		  putc ('t', file);
11669		  return;
11670
11671		default:
11672		  break;
11673		}
11674	    }
11675	  else
11676	    {
11677	      output_operand_lossage
11678		("invalid operand type used with operand code '%c'", code);
11679	      return;
11680	    }
11681
11682	  output_operand_lossage
11683	    ("invalid operand size for operand code '%c'", code);
11684	  return;
11685
11686	case 'd':
11687	case 'b':
11688	case 'w':
11689	case 'k':
11690	case 'q':
11691	case 'h':
11692	case 't':
11693	case 'y':
11694	case 'x':
11695	case 'X':
11696	case 'P':
11697	  break;
11698
11699	case 's':
11700	  if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11701	    {
11702	      PRINT_OPERAND (file, x, 0);
11703	      fputs (", ", file);
11704	    }
11705	  return;
11706
11707	case 'D':
11708	  /* Little bit of braindamage here.  The SSE compare instructions
11709	     does use completely different names for the comparisons that the
11710	     fp conditional moves.  */
11711	  if (TARGET_AVX)
11712	    {
11713	      switch (GET_CODE (x))
11714		{
11715		case EQ:
11716		  fputs ("eq", file);
11717		  break;
11718		case UNEQ:
11719		  fputs ("eq_us", file);
11720		  break;
11721		case LT:
11722		  fputs ("lt", file);
11723		  break;
11724		case UNLT:
11725		  fputs ("nge", file);
11726		  break;
11727		case LE:
11728		  fputs ("le", file);
11729		  break;
11730		case UNLE:
11731		  fputs ("ngt", file);
11732		  break;
11733		case UNORDERED:
11734		  fputs ("unord", file);
11735		  break;
11736		case NE:
11737		  fputs ("neq", file);
11738		  break;
11739		case LTGT:
11740		  fputs ("neq_oq", file);
11741		  break;
11742		case GE:
11743		  fputs ("ge", file);
11744		  break;
11745		case UNGE:
11746		  fputs ("nlt", file);
11747		  break;
11748		case GT:
11749		  fputs ("gt", file);
11750		  break;
11751		case UNGT:
11752		  fputs ("nle", file);
11753		  break;
11754		case ORDERED:
11755		  fputs ("ord", file);
11756		  break;
11757		default:
11758		  output_operand_lossage ("operand is not a condition code, "
11759					  "invalid operand code 'D'");
11760		  return;
11761		}
11762	    }
11763	  else
11764	    {
11765	      switch (GET_CODE (x))
11766		{
11767		case EQ:
11768		case UNEQ:
11769		  fputs ("eq", file);
11770		  break;
11771		case LT:
11772		case UNLT:
11773		  fputs ("lt", file);
11774		  break;
11775		case LE:
11776		case UNLE:
11777		  fputs ("le", file);
11778		  break;
11779		case UNORDERED:
11780		  fputs ("unord", file);
11781		  break;
11782		case NE:
11783		case LTGT:
11784		  fputs ("neq", file);
11785		  break;
11786		case UNGE:
11787		case GE:
11788		  fputs ("nlt", file);
11789		  break;
11790		case UNGT:
11791		case GT:
11792		  fputs ("nle", file);
11793		  break;
11794		case ORDERED:
11795		  fputs ("ord", file);
11796		  break;
11797		default:
11798		  output_operand_lossage ("operand is not a condition code, "
11799					  "invalid operand code 'D'");
11800		  return;
11801		}
11802	    }
11803	  return;
11804	case 'O':
11805#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11806	  if (ASSEMBLER_DIALECT == ASM_ATT)
11807	    {
11808	      switch (GET_MODE (x))
11809		{
11810		case HImode: putc ('w', file); break;
11811		case SImode:
11812		case SFmode: putc ('l', file); break;
11813		case DImode:
11814		case DFmode: putc ('q', file); break;
11815		default: gcc_unreachable ();
11816		}
11817	      putc ('.', file);
11818	    }
11819#endif
11820	  return;
11821	case 'C':
11822	  if (!COMPARISON_P (x))
11823	    {
11824	      output_operand_lossage ("operand is neither a constant nor a "
11825				      "condition code, invalid operand code "
11826				      "'C'");
11827	      return;
11828	    }
11829	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11830	  return;
11831	case 'F':
11832	  if (!COMPARISON_P (x))
11833	    {
11834	      output_operand_lossage ("operand is neither a constant nor a "
11835				      "condition code, invalid operand code "
11836				      "'F'");
11837	      return;
11838	    }
11839#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11840	  if (ASSEMBLER_DIALECT == ASM_ATT)
11841	    putc ('.', file);
11842#endif
11843	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11844	  return;
11845
11846	  /* Like above, but reverse condition */
11847	case 'c':
11848	  /* Check to see if argument to %c is really a constant
11849	     and not a condition code which needs to be reversed.  */
11850	  if (!COMPARISON_P (x))
11851	    {
11852	      output_operand_lossage ("operand is neither a constant nor a "
11853				      "condition code, invalid operand "
11854				      "code 'c'");
11855	      return;
11856	    }
11857	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11858	  return;
11859	case 'f':
11860	  if (!COMPARISON_P (x))
11861	    {
11862	      output_operand_lossage ("operand is neither a constant nor a "
11863				      "condition code, invalid operand "
11864				      "code 'f'");
11865	      return;
11866	    }
11867#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11868	  if (ASSEMBLER_DIALECT == ASM_ATT)
11869	    putc ('.', file);
11870#endif
11871	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11872	  return;
11873
11874	case 'H':
11875	  /* It doesn't actually matter what mode we use here, as we're
11876	     only going to use this for printing.  */
11877	  x = adjust_address_nv (x, DImode, 8);
11878	  break;
11879
11880	case '+':
11881	  {
11882	    rtx x;
11883
11884	    if (!optimize
11885	        || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11886	      return;
11887
11888	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11889	    if (x)
11890	      {
11891		int pred_val = INTVAL (XEXP (x, 0));
11892
11893		if (pred_val < REG_BR_PROB_BASE * 45 / 100
11894		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
11895		  {
11896		    int taken = pred_val > REG_BR_PROB_BASE / 2;
11897		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
11898
11899		    /* Emit hints only in the case default branch prediction
11900		       heuristics would fail.  */
11901		    if (taken != cputaken)
11902		      {
11903			/* We use 3e (DS) prefix for taken branches and
11904			   2e (CS) prefix for not taken branches.  */
11905			if (taken)
11906			  fputs ("ds ; ", file);
11907			else
11908			  fputs ("cs ; ", file);
11909		      }
11910		  }
11911	      }
11912	    return;
11913	  }
11914
11915	case 'Y':
11916	  switch (GET_CODE (x))
11917	    {
11918	    case NE:
11919	      fputs ("neq", file);
11920	      break;
11921	    case EQ:
11922	      fputs ("eq", file);
11923	      break;
11924	    case GE:
11925	    case GEU:
11926	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11927	      break;
11928	    case GT:
11929	    case GTU:
11930	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11931	      break;
11932	    case LE:
11933	    case LEU:
11934	      fputs ("le", file);
11935	      break;
11936	    case LT:
11937	    case LTU:
11938	      fputs ("lt", file);
11939	      break;
11940	    case UNORDERED:
11941	      fputs ("unord", file);
11942	      break;
11943	    case ORDERED:
11944	      fputs ("ord", file);
11945	      break;
11946	    case UNEQ:
11947	      fputs ("ueq", file);
11948	      break;
11949	    case UNGE:
11950	      fputs ("nlt", file);
11951	      break;
11952	    case UNGT:
11953	      fputs ("nle", file);
11954	      break;
11955	    case UNLE:
11956	      fputs ("ule", file);
11957	      break;
11958	    case UNLT:
11959	      fputs ("ult", file);
11960	      break;
11961	    case LTGT:
11962	      fputs ("une", file);
11963	      break;
11964	    default:
11965	      output_operand_lossage ("operand is not a condition code, "
11966				      "invalid operand code 'Y'");
11967	      return;
11968	    }
11969	  return;
11970
11971	case ';':
11972#if TARGET_MACHO || !HAVE_AS_IX86_REP_LOCK_PREFIX
11973	  fputs (";", file);
11974#endif
11975	  return;
11976
11977	default:
11978	    output_operand_lossage ("invalid operand code '%c'", code);
11979	}
11980    }
11981
11982  if (REG_P (x))
11983    print_reg (x, code, file);
11984
11985  else if (MEM_P (x))
11986    {
11987      /* No `byte ptr' prefix for call instructions or BLKmode operands.  */
11988      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11989	  && GET_MODE (x) != BLKmode)
11990	{
11991	  const char * size;
11992	  switch (GET_MODE_SIZE (GET_MODE (x)))
11993	    {
11994	    case 1: size = "BYTE"; break;
11995	    case 2: size = "WORD"; break;
11996	    case 4: size = "DWORD"; break;
11997	    case 8: size = "QWORD"; break;
11998	    case 12: size = "TBYTE"; break;
11999	    case 16:
12000	      if (GET_MODE (x) == XFmode)
12001		size = "TBYTE";
12002              else
12003		size = "XMMWORD";
12004              break;
12005	    case 32: size = "YMMWORD"; break;
12006	    default:
12007	      gcc_unreachable ();
12008	    }
12009
12010	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
12011	  if (code == 'b')
12012	    size = "BYTE";
12013	  else if (code == 'w')
12014	    size = "WORD";
12015	  else if (code == 'k')
12016	    size = "DWORD";
12017
12018	  fputs (size, file);
12019	  fputs (" PTR ", file);
12020	}
12021
12022      x = XEXP (x, 0);
12023      /* Avoid (%rip) for call operands.  */
12024      if (CONSTANT_ADDRESS_P (x) && code == 'P'
12025	  && !CONST_INT_P (x))
12026	output_addr_const (file, x);
12027      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
12028	output_operand_lossage ("invalid constraints for operand");
12029      else
12030	output_address (x);
12031    }
12032
12033  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
12034    {
12035      REAL_VALUE_TYPE r;
12036      long l;
12037
12038      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12039      REAL_VALUE_TO_TARGET_SINGLE (r, l);
12040
12041      if (ASSEMBLER_DIALECT == ASM_ATT)
12042	putc ('$', file);
12043      fprintf (file, "0x%08lx", (long unsigned int) l);
12044    }
12045
12046  /* These float cases don't actually occur as immediate operands.  */
12047  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
12048    {
12049      char dstr[30];
12050
12051      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12052      fputs (dstr, file);
12053    }
12054
12055  else if (GET_CODE (x) == CONST_DOUBLE
12056	   && GET_MODE (x) == XFmode)
12057    {
12058      char dstr[30];
12059
12060      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12061      fputs (dstr, file);
12062    }
12063
12064  else
12065    {
12066      /* We have patterns that allow zero sets of memory, for instance.
12067	 In 64-bit mode, we should probably support all 8-byte vectors,
12068	 since we can in fact encode that into an immediate.  */
12069      if (GET_CODE (x) == CONST_VECTOR)
12070	{
12071	  gcc_assert (x == CONST0_RTX (GET_MODE (x)));
12072	  x = const0_rtx;
12073	}
12074
12075      if (code != 'P')
12076	{
12077	  if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
12078	    {
12079	      if (ASSEMBLER_DIALECT == ASM_ATT)
12080		putc ('$', file);
12081	    }
12082	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
12083		   || GET_CODE (x) == LABEL_REF)
12084	    {
12085	      if (ASSEMBLER_DIALECT == ASM_ATT)
12086		putc ('$', file);
12087	      else
12088		fputs ("OFFSET FLAT:", file);
12089	    }
12090	}
12091      if (CONST_INT_P (x))
12092	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12093      else if (flag_pic)
12094	output_pic_addr_const (file, x, code);
12095      else
12096	output_addr_const (file, x);
12097    }
12098}
12099
12100/* Print a memory operand whose address is ADDR.  */
12101
12102void
12103print_operand_address (FILE *file, rtx addr)
12104{
12105  struct ix86_address parts;
12106  rtx base, index, disp;
12107  int scale;
12108  int ok = ix86_decompose_address (addr, &parts);
12109
12110  gcc_assert (ok);
12111
12112  base = parts.base;
12113  index = parts.index;
12114  disp = parts.disp;
12115  scale = parts.scale;
12116
12117  switch (parts.seg)
12118    {
12119    case SEG_DEFAULT:
12120      break;
12121    case SEG_FS:
12122    case SEG_GS:
12123      if (ASSEMBLER_DIALECT == ASM_ATT)
12124	putc ('%', file);
12125      fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
12126      break;
12127    default:
12128      gcc_unreachable ();
12129    }
12130
12131  /* Use one byte shorter RIP relative addressing for 64bit mode.  */
12132  if (TARGET_64BIT && !base && !index)
12133    {
12134      rtx symbol = disp;
12135
12136      if (GET_CODE (disp) == CONST
12137	  && GET_CODE (XEXP (disp, 0)) == PLUS
12138	  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
12139	symbol = XEXP (XEXP (disp, 0), 0);
12140
12141      if (GET_CODE (symbol) == LABEL_REF
12142	  || (GET_CODE (symbol) == SYMBOL_REF
12143	      && SYMBOL_REF_TLS_MODEL (symbol) == 0))
12144	base = pc_rtx;
12145    }
12146  if (!base && !index)
12147    {
12148      /* Displacement only requires special attention.  */
12149
12150      if (CONST_INT_P (disp))
12151	{
12152	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
12153	    fputs ("ds:", file);
12154	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
12155	}
12156      else if (flag_pic)
12157	output_pic_addr_const (file, disp, 0);
12158      else
12159	output_addr_const (file, disp);
12160    }
12161  else
12162    {
12163      if (ASSEMBLER_DIALECT == ASM_ATT)
12164	{
12165	  if (disp)
12166	    {
12167	      if (flag_pic)
12168		output_pic_addr_const (file, disp, 0);
12169	      else if (GET_CODE (disp) == LABEL_REF)
12170		output_asm_label (disp);
12171	      else
12172		output_addr_const (file, disp);
12173	    }
12174
12175	  putc ('(', file);
12176	  if (base)
12177	    print_reg (base, 0, file);
12178	  if (index)
12179	    {
12180	      putc (',', file);
12181	      print_reg (index, 0, file);
12182	      if (scale != 1)
12183		fprintf (file, ",%d", scale);
12184	    }
12185	  putc (')', file);
12186	}
12187      else
12188	{
12189	  rtx offset = NULL_RTX;
12190
12191	  if (disp)
12192	    {
12193	      /* Pull out the offset of a symbol; print any symbol itself.  */
12194	      if (GET_CODE (disp) == CONST
12195		  && GET_CODE (XEXP (disp, 0)) == PLUS
12196		  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
12197		{
12198		  offset = XEXP (XEXP (disp, 0), 1);
12199		  disp = gen_rtx_CONST (VOIDmode,
12200					XEXP (XEXP (disp, 0), 0));
12201		}
12202
12203	      if (flag_pic)
12204		output_pic_addr_const (file, disp, 0);
12205	      else if (GET_CODE (disp) == LABEL_REF)
12206		output_asm_label (disp);
12207	      else if (CONST_INT_P (disp))
12208		offset = disp;
12209	      else
12210		output_addr_const (file, disp);
12211	    }
12212
12213	  putc ('[', file);
12214	  if (base)
12215	    {
12216	      print_reg (base, 0, file);
12217	      if (offset)
12218		{
12219		  if (INTVAL (offset) >= 0)
12220		    putc ('+', file);
12221		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12222		}
12223	    }
12224	  else if (offset)
12225	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12226	  else
12227	    putc ('0', file);
12228
12229	  if (index)
12230	    {
12231	      putc ('+', file);
12232	      print_reg (index, 0, file);
12233	      if (scale != 1)
12234		fprintf (file, "*%d", scale);
12235	    }
12236	  putc (']', file);
12237	}
12238    }
12239}
12240
12241bool
12242output_addr_const_extra (FILE *file, rtx x)
12243{
12244  rtx op;
12245
12246  if (GET_CODE (x) != UNSPEC)
12247    return false;
12248
12249  op = XVECEXP (x, 0, 0);
12250  switch (XINT (x, 1))
12251    {
12252    case UNSPEC_GOTTPOFF:
12253      output_addr_const (file, op);
12254      /* FIXME: This might be @TPOFF in Sun ld.  */
12255      fputs ("@gottpoff", file);
12256      break;
12257    case UNSPEC_TPOFF:
12258      output_addr_const (file, op);
12259      fputs ("@tpoff", file);
12260      break;
12261    case UNSPEC_NTPOFF:
12262      output_addr_const (file, op);
12263      if (TARGET_64BIT)
12264	fputs ("@tpoff", file);
12265      else
12266	fputs ("@ntpoff", file);
12267      break;
12268    case UNSPEC_DTPOFF:
12269      output_addr_const (file, op);
12270      fputs ("@dtpoff", file);
12271      break;
12272    case UNSPEC_GOTNTPOFF:
12273      output_addr_const (file, op);
12274      if (TARGET_64BIT)
12275	fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12276	       "@gottpoff(%rip)" : "@gottpoff[rip]", file);
12277      else
12278	fputs ("@gotntpoff", file);
12279      break;
12280    case UNSPEC_INDNTPOFF:
12281      output_addr_const (file, op);
12282      fputs ("@indntpoff", file);
12283      break;
12284#if TARGET_MACHO
12285    case UNSPEC_MACHOPIC_OFFSET:
12286      output_addr_const (file, op);
12287      putc ('-', file);
12288      machopic_output_function_base_name (file);
12289      break;
12290#endif
12291
12292    default:
12293      return false;
12294    }
12295
12296  return true;
12297}
12298
12299/* Split one or more DImode RTL references into pairs of SImode
12300   references.  The RTL can be REG, offsettable MEM, integer constant, or
12301   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
12302   split and "num" is its length.  lo_half and hi_half are output arrays
12303   that parallel "operands".  */
12304
12305void
12306split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12307{
12308  while (num--)
12309    {
12310      rtx op = operands[num];
12311
12312      /* simplify_subreg refuse to split volatile memory addresses,
12313         but we still have to handle it.  */
12314      if (MEM_P (op))
12315	{
12316	  lo_half[num] = adjust_address (op, SImode, 0);
12317	  hi_half[num] = adjust_address (op, SImode, 4);
12318	}
12319      else
12320	{
12321	  lo_half[num] = simplify_gen_subreg (SImode, op,
12322					      GET_MODE (op) == VOIDmode
12323					      ? DImode : GET_MODE (op), 0);
12324	  hi_half[num] = simplify_gen_subreg (SImode, op,
12325					      GET_MODE (op) == VOIDmode
12326					      ? DImode : GET_MODE (op), 4);
12327	}
12328    }
12329}
12330/* Split one or more TImode RTL references into pairs of DImode
12331   references.  The RTL can be REG, offsettable MEM, integer constant, or
12332   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
12333   split and "num" is its length.  lo_half and hi_half are output arrays
12334   that parallel "operands".  */
12335
12336void
12337split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12338{
12339  while (num--)
12340    {
12341      rtx op = operands[num];
12342
12343      /* simplify_subreg refuse to split volatile memory addresses, but we
12344         still have to handle it.  */
12345      if (MEM_P (op))
12346	{
12347	  lo_half[num] = adjust_address (op, DImode, 0);
12348	  hi_half[num] = adjust_address (op, DImode, 8);
12349	}
12350      else
12351	{
12352	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
12353	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
12354	}
12355    }
12356}
12357
12358/* Output code to perform a 387 binary operation in INSN, one of PLUS,
12359   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
12360   is the expression of the binary operation.  The output may either be
12361   emitted here, or returned to the caller, like all output_* functions.
12362
12363   There is no guarantee that the operands are the same mode, as they
12364   might be within FLOAT or FLOAT_EXTEND expressions.  */
12365
12366#ifndef SYSV386_COMPAT
12367/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
12368   wants to fix the assemblers because that causes incompatibility
12369   with gcc.  No-one wants to fix gcc because that causes
12370   incompatibility with assemblers...  You can use the option of
12371   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
12372#define SYSV386_COMPAT 1
12373#endif
12374
12375const char *
12376output_387_binary_op (rtx insn, rtx *operands)
12377{
12378  static char buf[40];
12379  const char *p;
12380  const char *ssep;
12381  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
12382
12383#ifdef ENABLE_CHECKING
12384  /* Even if we do not want to check the inputs, this documents input
12385     constraints.  Which helps in understanding the following code.  */
12386  if (STACK_REG_P (operands[0])
12387      && ((REG_P (operands[1])
12388	   && REGNO (operands[0]) == REGNO (operands[1])
12389	   && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
12390	  || (REG_P (operands[2])
12391	      && REGNO (operands[0]) == REGNO (operands[2])
12392	      && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
12393      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
12394    ; /* ok */
12395  else
12396    gcc_assert (is_sse);
12397#endif
12398
12399  switch (GET_CODE (operands[3]))
12400    {
12401    case PLUS:
12402      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12403	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12404	p = "fiadd";
12405      else
12406	p = "fadd";
12407      ssep = "vadd";
12408      break;
12409
12410    case MINUS:
12411      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12412	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12413	p = "fisub";
12414      else
12415	p = "fsub";
12416      ssep = "vsub";
12417      break;
12418
12419    case MULT:
12420      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12421	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12422	p = "fimul";
12423      else
12424	p = "fmul";
12425      ssep = "vmul";
12426      break;
12427
12428    case DIV:
12429      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12430	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12431	p = "fidiv";
12432      else
12433	p = "fdiv";
12434      ssep = "vdiv";
12435      break;
12436
12437    default:
12438      gcc_unreachable ();
12439    }
12440
12441  if (is_sse)
12442   {
12443     if (TARGET_AVX)
12444       {
12445	 strcpy (buf, ssep);
12446	 if (GET_MODE (operands[0]) == SFmode)
12447	   strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
12448	 else
12449	   strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
12450       }
12451     else
12452       {
12453	 strcpy (buf, ssep + 1);
12454	 if (GET_MODE (operands[0]) == SFmode)
12455	   strcat (buf, "ss\t{%2, %0|%0, %2}");
12456	 else
12457	   strcat (buf, "sd\t{%2, %0|%0, %2}");
12458       }
12459      return buf;
12460   }
12461  strcpy (buf, p);
12462
12463  switch (GET_CODE (operands[3]))
12464    {
12465    case MULT:
12466    case PLUS:
12467      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
12468	{
12469	  rtx temp = operands[2];
12470	  operands[2] = operands[1];
12471	  operands[1] = temp;
12472	}
12473
12474      /* know operands[0] == operands[1].  */
12475
12476      if (MEM_P (operands[2]))
12477	{
12478	  p = "%Z2\t%2";
12479	  break;
12480	}
12481
12482      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12483	{
12484	  if (STACK_TOP_P (operands[0]))
12485	    /* How is it that we are storing to a dead operand[2]?
12486	       Well, presumably operands[1] is dead too.  We can't
12487	       store the result to st(0) as st(0) gets popped on this
12488	       instruction.  Instead store to operands[2] (which I
12489	       think has to be st(1)).  st(1) will be popped later.
12490	       gcc <= 2.8.1 didn't have this check and generated
12491	       assembly code that the Unixware assembler rejected.  */
12492	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
12493	  else
12494	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
12495	  break;
12496	}
12497
12498      if (STACK_TOP_P (operands[0]))
12499	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
12500      else
12501	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
12502      break;
12503
12504    case MINUS:
12505    case DIV:
12506      if (MEM_P (operands[1]))
12507	{
12508	  p = "r%Z1\t%1";
12509	  break;
12510	}
12511
12512      if (MEM_P (operands[2]))
12513	{
12514	  p = "%Z2\t%2";
12515	  break;
12516	}
12517
12518      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12519	{
12520#if SYSV386_COMPAT
12521	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
12522	     derived assemblers, confusingly reverse the direction of
12523	     the operation for fsub{r} and fdiv{r} when the
12524	     destination register is not st(0).  The Intel assembler
12525	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
12526	     figure out what the hardware really does.  */
12527	  if (STACK_TOP_P (operands[0]))
12528	    p = "{p\t%0, %2|rp\t%2, %0}";
12529	  else
12530	    p = "{rp\t%2, %0|p\t%0, %2}";
12531#else
12532	  if (STACK_TOP_P (operands[0]))
12533	    /* As above for fmul/fadd, we can't store to st(0).  */
12534	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
12535	  else
12536	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
12537#endif
12538	  break;
12539	}
12540
12541      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
12542	{
12543#if SYSV386_COMPAT
12544	  if (STACK_TOP_P (operands[0]))
12545	    p = "{rp\t%0, %1|p\t%1, %0}";
12546	  else
12547	    p = "{p\t%1, %0|rp\t%0, %1}";
12548#else
12549	  if (STACK_TOP_P (operands[0]))
12550	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
12551	  else
12552	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
12553#endif
12554	  break;
12555	}
12556
12557      if (STACK_TOP_P (operands[0]))
12558	{
12559	  if (STACK_TOP_P (operands[1]))
12560	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
12561	  else
12562	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
12563	  break;
12564	}
12565      else if (STACK_TOP_P (operands[1]))
12566	{
12567#if SYSV386_COMPAT
12568	  p = "{\t%1, %0|r\t%0, %1}";
12569#else
12570	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
12571#endif
12572	}
12573      else
12574	{
12575#if SYSV386_COMPAT
12576	  p = "{r\t%2, %0|\t%0, %2}";
12577#else
12578	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
12579#endif
12580	}
12581      break;
12582
12583    default:
12584      gcc_unreachable ();
12585    }
12586
12587  strcat (buf, p);
12588  return buf;
12589}
12590
12591/* Return needed mode for entity in optimize_mode_switching pass.  */
12592
12593int
12594ix86_mode_needed (int entity, rtx insn)
12595{
12596  enum attr_i387_cw mode;
12597
12598  /* The mode UNINITIALIZED is used to store control word after a
12599     function call or ASM pattern.  The mode ANY specify that function
12600     has no requirements on the control word and make no changes in the
12601     bits we are interested in.  */
12602
12603  if (CALL_P (insn)
12604      || (NONJUMP_INSN_P (insn)
12605	  && (asm_noperands (PATTERN (insn)) >= 0
12606	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12607    return I387_CW_UNINITIALIZED;
12608
12609  if (recog_memoized (insn) < 0)
12610    return I387_CW_ANY;
12611
12612  mode = get_attr_i387_cw (insn);
12613
12614  switch (entity)
12615    {
12616    case I387_TRUNC:
12617      if (mode == I387_CW_TRUNC)
12618	return mode;
12619      break;
12620
12621    case I387_FLOOR:
12622      if (mode == I387_CW_FLOOR)
12623	return mode;
12624      break;
12625
12626    case I387_CEIL:
12627      if (mode == I387_CW_CEIL)
12628	return mode;
12629      break;
12630
12631    case I387_MASK_PM:
12632      if (mode == I387_CW_MASK_PM)
12633	return mode;
12634      break;
12635
12636    default:
12637      gcc_unreachable ();
12638    }
12639
12640  return I387_CW_ANY;
12641}
12642
12643/* Output code to initialize control word copies used by trunc?f?i and
12644   rounding patterns.  CURRENT_MODE is set to current control word,
12645   while NEW_MODE is set to new control word.  */
12646
12647void
12648emit_i387_cw_initialization (int mode)
12649{
12650  rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12651  rtx new_mode;
12652
12653  enum ix86_stack_slot slot;
12654
12655  rtx reg = gen_reg_rtx (HImode);
12656
12657  emit_insn (gen_x86_fnstcw_1 (stored_mode));
12658  emit_move_insn (reg, copy_rtx (stored_mode));
12659
12660  if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12661      || optimize_function_for_size_p (cfun))
12662    {
12663      switch (mode)
12664	{
12665	case I387_CW_TRUNC:
12666	  /* round toward zero (truncate) */
12667	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12668	  slot = SLOT_CW_TRUNC;
12669	  break;
12670
12671	case I387_CW_FLOOR:
12672	  /* round down toward -oo */
12673	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12674	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12675	  slot = SLOT_CW_FLOOR;
12676	  break;
12677
12678	case I387_CW_CEIL:
12679	  /* round up toward +oo */
12680	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12681	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12682	  slot = SLOT_CW_CEIL;
12683	  break;
12684
12685	case I387_CW_MASK_PM:
12686	  /* mask precision exception for nearbyint() */
12687	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12688	  slot = SLOT_CW_MASK_PM;
12689	  break;
12690
12691	default:
12692	  gcc_unreachable ();
12693	}
12694    }
12695  else
12696    {
12697      switch (mode)
12698	{
12699	case I387_CW_TRUNC:
12700	  /* round toward zero (truncate) */
12701	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12702	  slot = SLOT_CW_TRUNC;
12703	  break;
12704
12705	case I387_CW_FLOOR:
12706	  /* round down toward -oo */
12707	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12708	  slot = SLOT_CW_FLOOR;
12709	  break;
12710
12711	case I387_CW_CEIL:
12712	  /* round up toward +oo */
12713	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12714	  slot = SLOT_CW_CEIL;
12715	  break;
12716
12717	case I387_CW_MASK_PM:
12718	  /* mask precision exception for nearbyint() */
12719	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12720	  slot = SLOT_CW_MASK_PM;
12721	  break;
12722
12723	default:
12724	  gcc_unreachable ();
12725	}
12726    }
12727
12728  gcc_assert (slot < MAX_386_STACK_LOCALS);
12729
12730  new_mode = assign_386_stack_local (HImode, slot);
12731  emit_move_insn (new_mode, reg);
12732}
12733
12734/* Output code for INSN to convert a float to a signed int.  OPERANDS
12735   are the insn operands.  The output may be [HSD]Imode and the input
12736   operand may be [SDX]Fmode.  */
12737
12738const char *
12739output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12740{
12741  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12742  int dimode_p = GET_MODE (operands[0]) == DImode;
12743  int round_mode = get_attr_i387_cw (insn);
12744
12745  /* Jump through a hoop or two for DImode, since the hardware has no
12746     non-popping instruction.  We used to do this a different way, but
12747     that was somewhat fragile and broke with post-reload splitters.  */
12748  if ((dimode_p || fisttp) && !stack_top_dies)
12749    output_asm_insn ("fld\t%y1", operands);
12750
12751  gcc_assert (STACK_TOP_P (operands[1]));
12752  gcc_assert (MEM_P (operands[0]));
12753  gcc_assert (GET_MODE (operands[1]) != TFmode);
12754
12755  if (fisttp)
12756      output_asm_insn ("fisttp%Z0\t%0", operands);
12757  else
12758    {
12759      if (round_mode != I387_CW_ANY)
12760	output_asm_insn ("fldcw\t%3", operands);
12761      if (stack_top_dies || dimode_p)
12762	output_asm_insn ("fistp%Z0\t%0", operands);
12763      else
12764	output_asm_insn ("fist%Z0\t%0", operands);
12765      if (round_mode != I387_CW_ANY)
12766	output_asm_insn ("fldcw\t%2", operands);
12767    }
12768
12769  return "";
12770}
12771
12772/* Output code for x87 ffreep insn.  The OPNO argument, which may only
12773   have the values zero or one, indicates the ffreep insn's operand
12774   from the OPERANDS array.  */
12775
12776static const char *
12777output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12778{
12779  if (TARGET_USE_FFREEP)
12780#ifdef HAVE_AS_IX86_FFREEP
12781    return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12782#else
12783    {
12784      static char retval[32];
12785      int regno = REGNO (operands[opno]);
12786
12787      gcc_assert (FP_REGNO_P (regno));
12788
12789      regno -= FIRST_STACK_REG;
12790
12791      snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
12792      return retval;
12793    }
12794#endif
12795
12796  return opno ? "fstp\t%y1" : "fstp\t%y0";
12797}
12798
12799
12800/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
12801   should be used.  UNORDERED_P is true when fucom should be used.  */
12802
12803const char *
12804output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12805{
12806  int stack_top_dies;
12807  rtx cmp_op0, cmp_op1;
12808  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12809
12810  if (eflags_p)
12811    {
12812      cmp_op0 = operands[0];
12813      cmp_op1 = operands[1];
12814    }
12815  else
12816    {
12817      cmp_op0 = operands[1];
12818      cmp_op1 = operands[2];
12819    }
12820
12821  if (is_sse)
12822    {
12823      static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12824      static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12825      static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12826      static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12827
12828      if (GET_MODE (operands[0]) == SFmode)
12829	if (unordered_p)
12830	  return &ucomiss[TARGET_AVX ? 0 : 1];
12831	else
12832	  return &comiss[TARGET_AVX ? 0 : 1];
12833      else
12834	if (unordered_p)
12835	  return &ucomisd[TARGET_AVX ? 0 : 1];
12836	else
12837	  return &comisd[TARGET_AVX ? 0 : 1];
12838    }
12839
12840  gcc_assert (STACK_TOP_P (cmp_op0));
12841
12842  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12843
12844  if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12845    {
12846      if (stack_top_dies)
12847	{
12848	  output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12849	  return output_387_ffreep (operands, 1);
12850	}
12851      else
12852	return "ftst\n\tfnstsw\t%0";
12853    }
12854
12855  if (STACK_REG_P (cmp_op1)
12856      && stack_top_dies
12857      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12858      && REGNO (cmp_op1) != FIRST_STACK_REG)
12859    {
12860      /* If both the top of the 387 stack dies, and the other operand
12861	 is also a stack register that dies, then this must be a
12862	 `fcompp' float compare */
12863
12864      if (eflags_p)
12865	{
12866	  /* There is no double popping fcomi variant.  Fortunately,
12867	     eflags is immune from the fstp's cc clobbering.  */
12868	  if (unordered_p)
12869	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12870	  else
12871	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12872	  return output_387_ffreep (operands, 0);
12873	}
12874      else
12875	{
12876	  if (unordered_p)
12877	    return "fucompp\n\tfnstsw\t%0";
12878	  else
12879	    return "fcompp\n\tfnstsw\t%0";
12880	}
12881    }
12882  else
12883    {
12884      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
12885
12886      static const char * const alt[16] =
12887      {
12888	"fcom%Z2\t%y2\n\tfnstsw\t%0",
12889	"fcomp%Z2\t%y2\n\tfnstsw\t%0",
12890	"fucom%Z2\t%y2\n\tfnstsw\t%0",
12891	"fucomp%Z2\t%y2\n\tfnstsw\t%0",
12892
12893	"ficom%Z2\t%y2\n\tfnstsw\t%0",
12894	"ficomp%Z2\t%y2\n\tfnstsw\t%0",
12895	NULL,
12896	NULL,
12897
12898	"fcomi\t{%y1, %0|%0, %y1}",
12899	"fcomip\t{%y1, %0|%0, %y1}",
12900	"fucomi\t{%y1, %0|%0, %y1}",
12901	"fucomip\t{%y1, %0|%0, %y1}",
12902
12903	NULL,
12904	NULL,
12905	NULL,
12906	NULL
12907      };
12908
12909      int mask;
12910      const char *ret;
12911
12912      mask  = eflags_p << 3;
12913      mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12914      mask |= unordered_p << 1;
12915      mask |= stack_top_dies;
12916
12917      gcc_assert (mask < 16);
12918      ret = alt[mask];
12919      gcc_assert (ret);
12920
12921      return ret;
12922    }
12923}
12924
12925void
12926ix86_output_addr_vec_elt (FILE *file, int value)
12927{
12928  const char *directive = ASM_LONG;
12929
12930#ifdef ASM_QUAD
12931  if (TARGET_64BIT)
12932    directive = ASM_QUAD;
12933#else
12934  gcc_assert (!TARGET_64BIT);
12935#endif
12936
12937  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12938}
12939
12940void
12941ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12942{
12943  const char *directive = ASM_LONG;
12944
12945#ifdef ASM_QUAD
12946  if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12947    directive = ASM_QUAD;
12948#else
12949  gcc_assert (!TARGET_64BIT);
12950#endif
12951  /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
12952  if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12953    fprintf (file, "%s%s%d-%s%d\n",
12954	     directive, LPREFIX, value, LPREFIX, rel);
12955  else if (HAVE_AS_GOTOFF_IN_DATA)
12956    fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
12957#if TARGET_MACHO
12958  else if (TARGET_MACHO)
12959    {
12960      fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
12961      machopic_output_function_base_name (file);
12962      putc ('\n', file);
12963    }
12964#endif
12965  else
12966    asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
12967		 GOT_SYMBOL_NAME, LPREFIX, value);
12968}
12969
12970/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12971   for the target.  */
12972
12973void
12974ix86_expand_clear (rtx dest)
12975{
12976  rtx tmp;
12977
12978  /* We play register width games, which are only valid after reload.  */
12979  gcc_assert (reload_completed);
12980
12981  /* Avoid HImode and its attendant prefix byte.  */
12982  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12983    dest = gen_rtx_REG (SImode, REGNO (dest));
12984  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12985
12986  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
12987  if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
12988    {
12989      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12990      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12991    }
12992
12993  emit_insn (tmp);
12994}
12995
12996/* X is an unchanging MEM.  If it is a constant pool reference, return
12997   the constant pool rtx, else NULL.  */
12998
12999rtx
13000maybe_get_pool_constant (rtx x)
13001{
13002  x = ix86_delegitimize_address (XEXP (x, 0));
13003
13004  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
13005    return get_pool_constant (x);
13006
13007  return NULL_RTX;
13008}
13009
13010void
13011ix86_expand_move (enum machine_mode mode, rtx operands[])
13012{
13013  rtx op0, op1;
13014  enum tls_model model;
13015
13016  op0 = operands[0];
13017  op1 = operands[1];
13018
13019  if (GET_CODE (op1) == SYMBOL_REF)
13020    {
13021      model = SYMBOL_REF_TLS_MODEL (op1);
13022      if (model)
13023	{
13024	  op1 = legitimize_tls_address (op1, model, true);
13025	  op1 = force_operand (op1, op0);
13026	  if (op1 == op0)
13027	    return;
13028	}
13029      else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13030	       && SYMBOL_REF_DLLIMPORT_P (op1))
13031	op1 = legitimize_dllimport_symbol (op1, false);
13032    }
13033  else if (GET_CODE (op1) == CONST
13034	   && GET_CODE (XEXP (op1, 0)) == PLUS
13035	   && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
13036    {
13037      rtx addend = XEXP (XEXP (op1, 0), 1);
13038      rtx symbol = XEXP (XEXP (op1, 0), 0);
13039      rtx tmp = NULL;
13040
13041      model = SYMBOL_REF_TLS_MODEL (symbol);
13042      if (model)
13043	tmp = legitimize_tls_address (symbol, model, true);
13044      else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13045	       && SYMBOL_REF_DLLIMPORT_P (symbol))
13046	tmp = legitimize_dllimport_symbol (symbol, true);
13047
13048      if (tmp)
13049	{
13050	  tmp = force_operand (tmp, NULL);
13051	  tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
13052				     op0, 1, OPTAB_DIRECT);
13053	  if (tmp == op0)
13054	    return;
13055	}
13056    }
13057
13058  if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
13059    {
13060      if (TARGET_MACHO && !TARGET_64BIT)
13061	{
13062#if TARGET_MACHO
13063	  if (MACHOPIC_PURE)
13064	    {
13065	      rtx temp = ((reload_in_progress
13066			   || ((op0 && REG_P (op0))
13067			       && mode == Pmode))
13068			  ? op0 : gen_reg_rtx (Pmode));
13069	      op1 = machopic_indirect_data_reference (op1, temp);
13070	      op1 = machopic_legitimize_pic_address (op1, mode,
13071						     temp == op1 ? 0 : temp);
13072	    }
13073	  else if (MACHOPIC_INDIRECT)
13074	    op1 = machopic_indirect_data_reference (op1, 0);
13075	  if (op0 == op1)
13076	    return;
13077#endif
13078	}
13079      else
13080	{
13081	  if (MEM_P (op0))
13082	    op1 = force_reg (Pmode, op1);
13083	  else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
13084	    {
13085	      rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
13086	      op1 = legitimize_pic_address (op1, reg);
13087	      if (op0 == op1)
13088		return;
13089	    }
13090	}
13091    }
13092  else
13093    {
13094      if (MEM_P (op0)
13095	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
13096	      || !push_operand (op0, mode))
13097	  && MEM_P (op1))
13098	op1 = force_reg (mode, op1);
13099
13100      if (push_operand (op0, mode)
13101	  && ! general_no_elim_operand (op1, mode))
13102	op1 = copy_to_mode_reg (mode, op1);
13103
13104      /* Force large constants in 64bit compilation into register
13105	 to get them CSEed.  */
13106      if (can_create_pseudo_p ()
13107	  && (mode == DImode) && TARGET_64BIT
13108	  && immediate_operand (op1, mode)
13109	  && !x86_64_zext_immediate_operand (op1, VOIDmode)
13110	  && !register_operand (op0, mode)
13111	  && optimize)
13112	op1 = copy_to_mode_reg (mode, op1);
13113
13114      if (can_create_pseudo_p ()
13115	  && FLOAT_MODE_P (mode)
13116	  && GET_CODE (op1) == CONST_DOUBLE)
13117	{
13118	  /* If we are loading a floating point constant to a register,
13119	     force the value to memory now, since we'll get better code
13120	     out the back end.  */
13121
13122	  op1 = validize_mem (force_const_mem (mode, op1));
13123	  if (!register_operand (op0, mode))
13124	    {
13125	      rtx temp = gen_reg_rtx (mode);
13126	      emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
13127	      emit_move_insn (op0, temp);
13128	      return;
13129	    }
13130	}
13131    }
13132
13133  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
13134}
13135
13136void
13137ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
13138{
13139  rtx op0 = operands[0], op1 = operands[1];
13140  unsigned int align = GET_MODE_ALIGNMENT (mode);
13141
13142  /* Force constants other than zero into memory.  We do not know how
13143     the instructions used to build constants modify the upper 64 bits
13144     of the register, once we have that information we may be able
13145     to handle some of them more efficiently.  */
13146  if (can_create_pseudo_p ()
13147      && register_operand (op0, mode)
13148      && (CONSTANT_P (op1)
13149	  || (GET_CODE (op1) == SUBREG
13150	      && CONSTANT_P (SUBREG_REG (op1))))
13151      && !standard_sse_constant_p (op1))
13152    op1 = validize_mem (force_const_mem (mode, op1));
13153
13154  /* We need to check memory alignment for SSE mode since attribute
13155     can make operands unaligned.  */
13156  if (can_create_pseudo_p ()
13157      && SSE_REG_MODE_P (mode)
13158      && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
13159	  || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
13160    {
13161      rtx tmp[2];
13162
13163      /* ix86_expand_vector_move_misalign() does not like constants ... */
13164      if (CONSTANT_P (op1)
13165	  || (GET_CODE (op1) == SUBREG
13166	      && CONSTANT_P (SUBREG_REG (op1))))
13167	op1 = validize_mem (force_const_mem (mode, op1));
13168
13169      /* ... nor both arguments in memory.  */
13170      if (!register_operand (op0, mode)
13171	  && !register_operand (op1, mode))
13172	op1 = force_reg (mode, op1);
13173
13174      tmp[0] = op0; tmp[1] = op1;
13175      ix86_expand_vector_move_misalign (mode, tmp);
13176      return;
13177    }
13178
13179  /* Make operand1 a register if it isn't already.  */
13180  if (can_create_pseudo_p ()
13181      && !register_operand (op0, mode)
13182      && !register_operand (op1, mode))
13183    {
13184      emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
13185      return;
13186    }
13187
13188  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
13189}
13190
13191/* Implement the movmisalign patterns for SSE.  Non-SSE modes go
13192   straight to ix86_expand_vector_move.  */
13193/* Code generation for scalar reg-reg moves of single and double precision data:
13194     if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
13195       movaps reg, reg
13196     else
13197       movss reg, reg
13198     if (x86_sse_partial_reg_dependency == true)
13199       movapd reg, reg
13200     else
13201       movsd reg, reg
13202
13203   Code generation for scalar loads of double precision data:
13204     if (x86_sse_split_regs == true)
13205       movlpd mem, reg      (gas syntax)
13206     else
13207       movsd mem, reg
13208
13209   Code generation for unaligned packed loads of single precision data
13210   (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
13211     if (x86_sse_unaligned_move_optimal)
13212       movups mem, reg
13213
13214     if (x86_sse_partial_reg_dependency == true)
13215       {
13216         xorps  reg, reg
13217         movlps mem, reg
13218         movhps mem+8, reg
13219       }
13220     else
13221       {
13222         movlps mem, reg
13223         movhps mem+8, reg
13224       }
13225
13226   Code generation for unaligned packed loads of double precision data
13227   (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
13228     if (x86_sse_unaligned_move_optimal)
13229       movupd mem, reg
13230
13231     if (x86_sse_split_regs == true)
13232       {
13233         movlpd mem, reg
13234         movhpd mem+8, reg
13235       }
13236     else
13237       {
13238         movsd  mem, reg
13239         movhpd mem+8, reg
13240       }
13241 */
13242
13243void
13244ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
13245{
13246  rtx op0, op1, m;
13247
13248  op0 = operands[0];
13249  op1 = operands[1];
13250
13251  if (TARGET_AVX)
13252    {
13253      switch (GET_MODE_CLASS (mode))
13254	{
13255	case MODE_VECTOR_INT:
13256	case MODE_INT:
13257	  switch (GET_MODE_SIZE (mode))
13258	    {
13259	    case 16:
13260	      op0 = gen_lowpart (V16QImode, op0);
13261	      op1 = gen_lowpart (V16QImode, op1);
13262	      emit_insn (gen_avx_movdqu (op0, op1));
13263	      break;
13264	    case 32:
13265	      op0 = gen_lowpart (V32QImode, op0);
13266	      op1 = gen_lowpart (V32QImode, op1);
13267	      emit_insn (gen_avx_movdqu256 (op0, op1));
13268	      break;
13269	    default:
13270	      gcc_unreachable ();
13271	    }
13272	  break;
13273	case MODE_VECTOR_FLOAT:
13274	  op0 = gen_lowpart (mode, op0);
13275	  op1 = gen_lowpart (mode, op1);
13276
13277	  switch (mode)
13278	    {
13279	    case V4SFmode:
13280	      emit_insn (gen_avx_movups (op0, op1));
13281	      break;
13282	    case V8SFmode:
13283	      emit_insn (gen_avx_movups256 (op0, op1));
13284	      break;
13285	    case V2DFmode:
13286	      emit_insn (gen_avx_movupd (op0, op1));
13287	      break;
13288	    case V4DFmode:
13289	      emit_insn (gen_avx_movupd256 (op0, op1));
13290	      break;
13291	    default:
13292	      gcc_unreachable ();
13293	    }
13294	  break;
13295
13296	default:
13297	  gcc_unreachable ();
13298	}
13299
13300      return;
13301    }
13302
13303  if (MEM_P (op1))
13304    {
13305      /* If we're optimizing for size, movups is the smallest.  */
13306      if (optimize_insn_for_size_p ())
13307	{
13308	  op0 = gen_lowpart (V4SFmode, op0);
13309	  op1 = gen_lowpart (V4SFmode, op1);
13310	  emit_insn (gen_sse_movups (op0, op1));
13311	  return;
13312	}
13313
13314      /* ??? If we have typed data, then it would appear that using
13315	 movdqu is the only way to get unaligned data loaded with
13316	 integer type.  */
13317      if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13318	{
13319	  op0 = gen_lowpart (V16QImode, op0);
13320	  op1 = gen_lowpart (V16QImode, op1);
13321	  emit_insn (gen_sse2_movdqu (op0, op1));
13322	  return;
13323	}
13324
13325      if (TARGET_SSE2 && mode == V2DFmode)
13326        {
13327          rtx zero;
13328
13329          if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13330            {
13331              op0 = gen_lowpart (V2DFmode, op0);
13332              op1 = gen_lowpart (V2DFmode, op1);
13333              emit_insn (gen_sse2_movupd (op0, op1));
13334              return;
13335            }
13336
13337	  /* When SSE registers are split into halves, we can avoid
13338	     writing to the top half twice.  */
13339	  if (TARGET_SSE_SPLIT_REGS)
13340	    {
13341	      emit_clobber (op0);
13342	      zero = op0;
13343	    }
13344	  else
13345	    {
13346	      /* ??? Not sure about the best option for the Intel chips.
13347		 The following would seem to satisfy; the register is
13348		 entirely cleared, breaking the dependency chain.  We
13349		 then store to the upper half, with a dependency depth
13350		 of one.  A rumor has it that Intel recommends two movsd
13351		 followed by an unpacklpd, but this is unconfirmed.  And
13352		 given that the dependency depth of the unpacklpd would
13353		 still be one, I'm not sure why this would be better.  */
13354	      zero = CONST0_RTX (V2DFmode);
13355	    }
13356
13357	  m = adjust_address (op1, DFmode, 0);
13358	  emit_insn (gen_sse2_loadlpd (op0, zero, m));
13359	  m = adjust_address (op1, DFmode, 8);
13360	  emit_insn (gen_sse2_loadhpd (op0, op0, m));
13361	}
13362      else
13363        {
13364          if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13365            {
13366              op0 = gen_lowpart (V4SFmode, op0);
13367              op1 = gen_lowpart (V4SFmode, op1);
13368              emit_insn (gen_sse_movups (op0, op1));
13369              return;
13370            }
13371
13372	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
13373	    emit_move_insn (op0, CONST0_RTX (mode));
13374	  else
13375	    emit_clobber (op0);
13376
13377	  if (mode != V4SFmode)
13378	    op0 = gen_lowpart (V4SFmode, op0);
13379	  m = adjust_address (op1, V2SFmode, 0);
13380	  emit_insn (gen_sse_loadlps (op0, op0, m));
13381	  m = adjust_address (op1, V2SFmode, 8);
13382	  emit_insn (gen_sse_loadhps (op0, op0, m));
13383	}
13384    }
13385  else if (MEM_P (op0))
13386    {
13387      /* If we're optimizing for size, movups is the smallest.  */
13388      if (optimize_insn_for_size_p ())
13389	{
13390	  op0 = gen_lowpart (V4SFmode, op0);
13391	  op1 = gen_lowpart (V4SFmode, op1);
13392	  emit_insn (gen_sse_movups (op0, op1));
13393	  return;
13394	}
13395
13396      /* ??? Similar to above, only less clear because of quote
13397	 typeless stores unquote.  */
13398      if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
13399	  && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13400        {
13401	  op0 = gen_lowpart (V16QImode, op0);
13402	  op1 = gen_lowpart (V16QImode, op1);
13403	  emit_insn (gen_sse2_movdqu (op0, op1));
13404	  return;
13405	}
13406
13407      if (TARGET_SSE2 && mode == V2DFmode)
13408	{
13409	  m = adjust_address (op0, DFmode, 0);
13410	  emit_insn (gen_sse2_storelpd (m, op1));
13411	  m = adjust_address (op0, DFmode, 8);
13412	  emit_insn (gen_sse2_storehpd (m, op1));
13413	}
13414      else
13415	{
13416	  if (mode != V4SFmode)
13417	    op1 = gen_lowpart (V4SFmode, op1);
13418	  m = adjust_address (op0, V2SFmode, 0);
13419	  emit_insn (gen_sse_storelps (m, op1));
13420	  m = adjust_address (op0, V2SFmode, 8);
13421	  emit_insn (gen_sse_storehps (m, op1));
13422	}
13423    }
13424  else
13425    gcc_unreachable ();
13426}
13427
13428/* Expand a push in MODE.  This is some mode for which we do not support
13429   proper push instructions, at least from the registers that we expect
13430   the value to live in.  */
13431
13432void
13433ix86_expand_push (enum machine_mode mode, rtx x)
13434{
13435  rtx tmp;
13436
13437  tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
13438			     GEN_INT (-GET_MODE_SIZE (mode)),
13439			     stack_pointer_rtx, 1, OPTAB_DIRECT);
13440  if (tmp != stack_pointer_rtx)
13441    emit_move_insn (stack_pointer_rtx, tmp);
13442
13443  tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
13444
13445  /* When we push an operand onto stack, it has to be aligned at least
13446     at the function argument boundary.  However since we don't have
13447     the argument type, we can't determine the actual argument
13448     boundary.  */
13449  emit_move_insn (tmp, x);
13450}
13451
13452/* Helper function of ix86_fixup_binary_operands to canonicalize
13453   operand order.  Returns true if the operands should be swapped.  */
13454
13455static bool
13456ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
13457			     rtx operands[])
13458{
13459  rtx dst = operands[0];
13460  rtx src1 = operands[1];
13461  rtx src2 = operands[2];
13462
13463  /* If the operation is not commutative, we can't do anything.  */
13464  if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
13465    return false;
13466
13467  /* Highest priority is that src1 should match dst.  */
13468  if (rtx_equal_p (dst, src1))
13469    return false;
13470  if (rtx_equal_p (dst, src2))
13471    return true;
13472
13473  /* Next highest priority is that immediate constants come second.  */
13474  if (immediate_operand (src2, mode))
13475    return false;
13476  if (immediate_operand (src1, mode))
13477    return true;
13478
13479  /* Lowest priority is that memory references should come second.  */
13480  if (MEM_P (src2))
13481    return false;
13482  if (MEM_P (src1))
13483    return true;
13484
13485  return false;
13486}
13487
13488
13489/* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
13490   destination to use for the operation.  If different from the true
13491   destination in operands[0], a copy operation will be required.  */
13492
13493rtx
13494ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
13495			    rtx operands[])
13496{
13497  rtx dst = operands[0];
13498  rtx src1 = operands[1];
13499  rtx src2 = operands[2];
13500
13501  /* Canonicalize operand order.  */
13502  if (ix86_swap_binary_operands_p (code, mode, operands))
13503    {
13504      rtx temp;
13505
13506      /* It is invalid to swap operands of different modes.  */
13507      gcc_assert (GET_MODE (src1) == GET_MODE (src2));
13508
13509      temp = src1;
13510      src1 = src2;
13511      src2 = temp;
13512    }
13513
13514  /* Both source operands cannot be in memory.  */
13515  if (MEM_P (src1) && MEM_P (src2))
13516    {
13517      /* Optimization: Only read from memory once.  */
13518      if (rtx_equal_p (src1, src2))
13519	{
13520	  src2 = force_reg (mode, src2);
13521	  src1 = src2;
13522	}
13523      else
13524	src2 = force_reg (mode, src2);
13525    }
13526
13527  /* If the destination is memory, and we do not have matching source
13528     operands, do things in registers.  */
13529  if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13530    dst = gen_reg_rtx (mode);
13531
13532  /* Source 1 cannot be a constant.  */
13533  if (CONSTANT_P (src1))
13534    src1 = force_reg (mode, src1);
13535
13536  /* Source 1 cannot be a non-matching memory.  */
13537  if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13538    src1 = force_reg (mode, src1);
13539
13540  operands[1] = src1;
13541  operands[2] = src2;
13542  return dst;
13543}
13544
13545/* Similarly, but assume that the destination has already been
13546   set up properly.  */
13547
13548void
13549ix86_fixup_binary_operands_no_copy (enum rtx_code code,
13550				    enum machine_mode mode, rtx operands[])
13551{
13552  rtx dst = ix86_fixup_binary_operands (code, mode, operands);
13553  gcc_assert (dst == operands[0]);
13554}
13555
13556/* Attempt to expand a binary operator.  Make the expansion closer to the
13557   actual machine, then just general_operand, which will allow 3 separate
13558   memory references (one output, two input) in a single insn.  */
13559
13560void
13561ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
13562			     rtx operands[])
13563{
13564  rtx src1, src2, dst, op, clob;
13565
13566  dst = ix86_fixup_binary_operands (code, mode, operands);
13567  src1 = operands[1];
13568  src2 = operands[2];
13569
13570 /* Emit the instruction.  */
13571
13572  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
13573  if (reload_in_progress)
13574    {
13575      /* Reload doesn't know about the flags register, and doesn't know that
13576         it doesn't want to clobber it.  We can only do this with PLUS.  */
13577      gcc_assert (code == PLUS);
13578      emit_insn (op);
13579    }
13580  else
13581    {
13582      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13583      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13584    }
13585
13586  /* Fix up the destination if needed.  */
13587  if (dst != operands[0])
13588    emit_move_insn (operands[0], dst);
13589}
13590
13591/* Return TRUE or FALSE depending on whether the binary operator meets the
13592   appropriate constraints.  */
13593
13594int
13595ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
13596			 rtx operands[3])
13597{
13598  rtx dst = operands[0];
13599  rtx src1 = operands[1];
13600  rtx src2 = operands[2];
13601
13602  /* Both source operands cannot be in memory.  */
13603  if (MEM_P (src1) && MEM_P (src2))
13604    return 0;
13605
13606  /* Canonicalize operand order for commutative operators.  */
13607  if (ix86_swap_binary_operands_p (code, mode, operands))
13608    {
13609      rtx temp = src1;
13610      src1 = src2;
13611      src2 = temp;
13612    }
13613
13614  /* If the destination is memory, we must have a matching source operand.  */
13615  if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13616      return 0;
13617
13618  /* Source 1 cannot be a constant.  */
13619  if (CONSTANT_P (src1))
13620    return 0;
13621
13622  /* Source 1 cannot be a non-matching memory.  */
13623  if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13624    return 0;
13625
13626  return 1;
13627}
13628
13629/* Attempt to expand a unary operator.  Make the expansion closer to the
13630   actual machine, then just general_operand, which will allow 2 separate
13631   memory references (one output, one input) in a single insn.  */
13632
13633void
13634ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13635			    rtx operands[])
13636{
13637  int matching_memory;
13638  rtx src, dst, op, clob;
13639
13640  dst = operands[0];
13641  src = operands[1];
13642
13643  /* If the destination is memory, and we do not have matching source
13644     operands, do things in registers.  */
13645  matching_memory = 0;
13646  if (MEM_P (dst))
13647    {
13648      if (rtx_equal_p (dst, src))
13649	matching_memory = 1;
13650      else
13651	dst = gen_reg_rtx (mode);
13652    }
13653
13654  /* When source operand is memory, destination must match.  */
13655  if (MEM_P (src) && !matching_memory)
13656    src = force_reg (mode, src);
13657
13658  /* Emit the instruction.  */
13659
13660  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13661  if (reload_in_progress || code == NOT)
13662    {
13663      /* Reload doesn't know about the flags register, and doesn't know that
13664         it doesn't want to clobber it.  */
13665      gcc_assert (code == NOT);
13666      emit_insn (op);
13667    }
13668  else
13669    {
13670      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13671      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13672    }
13673
13674  /* Fix up the destination if needed.  */
13675  if (dst != operands[0])
13676    emit_move_insn (operands[0], dst);
13677}
13678
13679#define LEA_SEARCH_THRESHOLD 12
13680
13681/* Search backward for non-agu definition of register number REGNO1
13682   or register number REGNO2 in INSN's basic block until
13683   1. Pass LEA_SEARCH_THRESHOLD instructions, or
13684   2. Reach BB boundary, or
13685   3. Reach agu definition.
13686   Returns the distance between the non-agu definition point and INSN.
13687   If no definition point, returns -1.  */
13688
13689static int
13690distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13691			 rtx insn)
13692{
13693  basic_block bb = BLOCK_FOR_INSN (insn);
13694  int distance = 0;
13695  df_ref *def_rec;
13696  enum attr_type insn_type;
13697
13698  if (insn != BB_HEAD (bb))
13699    {
13700      rtx prev = PREV_INSN (insn);
13701      while (prev && distance < LEA_SEARCH_THRESHOLD)
13702	{
13703	  if (NONDEBUG_INSN_P (prev))
13704	    {
13705	      distance++;
13706              for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13707                if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13708                    && !DF_REF_IS_ARTIFICIAL (*def_rec)
13709                    && (regno1 == DF_REF_REGNO (*def_rec)
13710			|| regno2 == DF_REF_REGNO (*def_rec)))
13711		  {
13712		    insn_type = get_attr_type (prev);
13713		    if (insn_type != TYPE_LEA)
13714		      goto done;
13715		  }
13716	    }
13717	  if (prev == BB_HEAD (bb))
13718	    break;
13719	  prev = PREV_INSN (prev);
13720	}
13721    }
13722
13723  if (distance < LEA_SEARCH_THRESHOLD)
13724    {
13725      edge e;
13726      edge_iterator ei;
13727      bool simple_loop = false;
13728
13729      FOR_EACH_EDGE (e, ei, bb->preds)
13730	if (e->src == bb)
13731	  {
13732	    simple_loop = true;
13733	    break;
13734	  }
13735
13736      if (simple_loop)
13737	{
13738	  rtx prev = BB_END (bb);
13739	  while (prev
13740		 && prev != insn
13741		 && distance < LEA_SEARCH_THRESHOLD)
13742	    {
13743	      if (NONDEBUG_INSN_P (prev))
13744		{
13745		  distance++;
13746		  for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13747		    if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13748			&& !DF_REF_IS_ARTIFICIAL (*def_rec)
13749			&& (regno1 == DF_REF_REGNO (*def_rec)
13750			    || regno2 == DF_REF_REGNO (*def_rec)))
13751		      {
13752			insn_type = get_attr_type (prev);
13753			if (insn_type != TYPE_LEA)
13754			  goto done;
13755		      }
13756		}
13757	      prev = PREV_INSN (prev);
13758	    }
13759	}
13760    }
13761
13762  distance = -1;
13763
13764done:
13765  /* get_attr_type may modify recog data.  We want to make sure
13766     that recog data is valid for instruction INSN, on which
13767     distance_non_agu_define is called.  INSN is unchanged here.  */
13768  extract_insn_cached (insn);
13769  return distance;
13770}
13771
13772/* Return the distance between INSN and the next insn that uses
13773   register number REGNO0 in memory address.  Return -1 if no such
13774   a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set.  */
13775
13776static int
13777distance_agu_use (unsigned int regno0, rtx insn)
13778{
13779  basic_block bb = BLOCK_FOR_INSN (insn);
13780  int distance = 0;
13781  df_ref *def_rec;
13782  df_ref *use_rec;
13783
13784  if (insn != BB_END (bb))
13785    {
13786      rtx next = NEXT_INSN (insn);
13787      while (next && distance < LEA_SEARCH_THRESHOLD)
13788	{
13789	  if (NONDEBUG_INSN_P (next))
13790	    {
13791	      distance++;
13792
13793	      for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13794		if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13795		     || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13796		    && regno0 == DF_REF_REGNO (*use_rec))
13797		  {
13798		    /* Return DISTANCE if OP0 is used in memory
13799		       address in NEXT.  */
13800		    return distance;
13801		  }
13802
13803	      for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13804		if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13805		    && !DF_REF_IS_ARTIFICIAL (*def_rec)
13806		    && regno0 == DF_REF_REGNO (*def_rec))
13807		  {
13808		    /* Return -1 if OP0 is set in NEXT.  */
13809		    return -1;
13810		  }
13811	    }
13812	  if (next == BB_END (bb))
13813	    break;
13814	  next = NEXT_INSN (next);
13815	}
13816    }
13817
13818  if (distance < LEA_SEARCH_THRESHOLD)
13819    {
13820      edge e;
13821      edge_iterator ei;
13822      bool simple_loop = false;
13823
13824      FOR_EACH_EDGE (e, ei, bb->succs)
13825        if (e->dest == bb)
13826	  {
13827	    simple_loop = true;
13828	    break;
13829	  }
13830
13831      if (simple_loop)
13832	{
13833	  rtx next = BB_HEAD (bb);
13834	  while (next
13835		 && next != insn
13836		 && distance < LEA_SEARCH_THRESHOLD)
13837	    {
13838	      if (NONDEBUG_INSN_P (next))
13839		{
13840		  distance++;
13841
13842		  for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13843		    if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13844			 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13845			&& regno0 == DF_REF_REGNO (*use_rec))
13846		      {
13847			/* Return DISTANCE if OP0 is used in memory
13848			   address in NEXT.  */
13849			return distance;
13850		      }
13851
13852		  for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13853		    if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13854			&& !DF_REF_IS_ARTIFICIAL (*def_rec)
13855			&& regno0 == DF_REF_REGNO (*def_rec))
13856		      {
13857			/* Return -1 if OP0 is set in NEXT.  */
13858			return -1;
13859		      }
13860
13861		}
13862	      next = NEXT_INSN (next);
13863	    }
13864	}
13865    }
13866
13867  return -1;
13868}
13869
13870/* Define this macro to tune LEA priority vs ADD, it take effect when
13871   there is a dilemma of choicing LEA or ADD
13872   Negative value: ADD is more preferred than LEA
13873   Zero: Netrual
13874   Positive value: LEA is more preferred than ADD*/
13875#define IX86_LEA_PRIORITY 2
13876
13877/* Return true if it is ok to optimize an ADD operation to LEA
13878   operation to avoid flag register consumation.  For the processors
13879   like ATOM, if the destination register of LEA holds an actual
13880   address which will be used soon, LEA is better and otherwise ADD
13881   is better.  */
13882
13883bool
13884ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13885                     rtx insn, rtx operands[])
13886{
13887  unsigned int regno0 = true_regnum (operands[0]);
13888  unsigned int regno1 = true_regnum (operands[1]);
13889  unsigned int regno2;
13890
13891  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13892    return regno0 != regno1;
13893
13894  regno2 = true_regnum (operands[2]);
13895
13896  /* If a = b + c, (a!=b && a!=c), must use lea form. */
13897  if (regno0 != regno1 && regno0 != regno2)
13898    return true;
13899  else
13900    {
13901      int dist_define, dist_use;
13902      dist_define = distance_non_agu_define (regno1, regno2, insn);
13903      if (dist_define <= 0)
13904        return true;
13905
13906      /* If this insn has both backward non-agu dependence and forward
13907         agu dependence, the one with short distance take effect. */
13908      dist_use = distance_agu_use (regno0, insn);
13909      if (dist_use <= 0
13910	  || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13911        return false;
13912
13913      return true;
13914    }
13915}
13916
13917/* Return true if destination reg of SET_BODY is shift count of
13918   USE_BODY.  */
13919
13920static bool
13921ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13922{
13923  rtx set_dest;
13924  rtx shift_rtx;
13925  int i;
13926
13927  /* Retrieve destination of SET_BODY.  */
13928  switch (GET_CODE (set_body))
13929    {
13930    case SET:
13931      set_dest = SET_DEST (set_body);
13932      if (!set_dest || !REG_P (set_dest))
13933	return false;
13934      break;
13935    case PARALLEL:
13936      for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13937	if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13938					  use_body))
13939	  return true;
13940    default:
13941      return false;
13942      break;
13943    }
13944
13945  /* Retrieve shift count of USE_BODY.  */
13946  switch (GET_CODE (use_body))
13947    {
13948    case SET:
13949      shift_rtx = XEXP (use_body, 1);
13950      break;
13951    case PARALLEL:
13952      for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13953	if (ix86_dep_by_shift_count_body (set_body,
13954					  XVECEXP (use_body, 0, i)))
13955	  return true;
13956    default:
13957      return false;
13958      break;
13959    }
13960
13961  if (shift_rtx
13962      && (GET_CODE (shift_rtx) == ASHIFT
13963	  || GET_CODE (shift_rtx) == LSHIFTRT
13964	  || GET_CODE (shift_rtx) == ASHIFTRT
13965	  || GET_CODE (shift_rtx) == ROTATE
13966	  || GET_CODE (shift_rtx) == ROTATERT))
13967    {
13968      rtx shift_count = XEXP (shift_rtx, 1);
13969
13970      /* Return true if shift count is dest of SET_BODY.  */
13971      if (REG_P (shift_count)
13972	  && true_regnum (set_dest) == true_regnum (shift_count))
13973	return true;
13974    }
13975
13976  return false;
13977}
13978
13979/* Return true if destination reg of SET_INSN is shift count of
13980   USE_INSN.  */
13981
13982bool
13983ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13984{
13985  return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13986				       PATTERN (use_insn));
13987}
13988
13989/* Return TRUE or FALSE depending on whether the unary operator meets the
13990   appropriate constraints.  */
13991
13992int
13993ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13994			enum machine_mode mode ATTRIBUTE_UNUSED,
13995			rtx operands[2] ATTRIBUTE_UNUSED)
13996{
13997  /* If one of operands is memory, source and destination must match.  */
13998  if ((MEM_P (operands[0])
13999       || MEM_P (operands[1]))
14000      && ! rtx_equal_p (operands[0], operands[1]))
14001    return FALSE;
14002  return TRUE;
14003}
14004
14005/* Return TRUE if the operands to a vec_interleave_{high,low}v2df
14006   are ok, keeping in mind the possible movddup alternative.  */
14007
14008bool
14009ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
14010{
14011  if (MEM_P (operands[0]))
14012    return rtx_equal_p (operands[0], operands[1 + high]);
14013  if (MEM_P (operands[1]) && MEM_P (operands[2]))
14014    return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
14015  return true;
14016}
14017
14018/* Post-reload splitter for converting an SF or DFmode value in an
14019   SSE register into an unsigned SImode.  */
14020
14021void
14022ix86_split_convert_uns_si_sse (rtx operands[])
14023{
14024  enum machine_mode vecmode;
14025  rtx value, large, zero_or_two31, input, two31, x;
14026
14027  large = operands[1];
14028  zero_or_two31 = operands[2];
14029  input = operands[3];
14030  two31 = operands[4];
14031  vecmode = GET_MODE (large);
14032  value = gen_rtx_REG (vecmode, REGNO (operands[0]));
14033
14034  /* Load up the value into the low element.  We must ensure that the other
14035     elements are valid floats -- zero is the easiest such value.  */
14036  if (MEM_P (input))
14037    {
14038      if (vecmode == V4SFmode)
14039	emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
14040      else
14041	emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
14042    }
14043  else
14044    {
14045      input = gen_rtx_REG (vecmode, REGNO (input));
14046      emit_move_insn (value, CONST0_RTX (vecmode));
14047      if (vecmode == V4SFmode)
14048	emit_insn (gen_sse_movss (value, value, input));
14049      else
14050	emit_insn (gen_sse2_movsd (value, value, input));
14051    }
14052
14053  emit_move_insn (large, two31);
14054  emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
14055
14056  x = gen_rtx_fmt_ee (LE, vecmode, large, value);
14057  emit_insn (gen_rtx_SET (VOIDmode, large, x));
14058
14059  x = gen_rtx_AND (vecmode, zero_or_two31, large);
14060  emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
14061
14062  x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
14063  emit_insn (gen_rtx_SET (VOIDmode, value, x));
14064
14065  large = gen_rtx_REG (V4SImode, REGNO (large));
14066  emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
14067
14068  x = gen_rtx_REG (V4SImode, REGNO (value));
14069  if (vecmode == V4SFmode)
14070    emit_insn (gen_sse2_cvttps2dq (x, value));
14071  else
14072    emit_insn (gen_sse2_cvttpd2dq (x, value));
14073  value = x;
14074
14075  emit_insn (gen_xorv4si3 (value, value, large));
14076}
14077
14078/* Convert an unsigned DImode value into a DFmode, using only SSE.
14079   Expects the 64-bit DImode to be supplied in a pair of integral
14080   registers.  Requires SSE2; will use SSE3 if available.  For x86_32,
14081   -mfpmath=sse, !optimize_size only.  */
14082
14083void
14084ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
14085{
14086  REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
14087  rtx int_xmm, fp_xmm;
14088  rtx biases, exponents;
14089  rtx x;
14090
14091  int_xmm = gen_reg_rtx (V4SImode);
14092  if (TARGET_INTER_UNIT_MOVES)
14093    emit_insn (gen_movdi_to_sse (int_xmm, input));
14094  else if (TARGET_SSE_SPLIT_REGS)
14095    {
14096      emit_clobber (int_xmm);
14097      emit_move_insn (gen_lowpart (DImode, int_xmm), input);
14098    }
14099  else
14100    {
14101      x = gen_reg_rtx (V2DImode);
14102      ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
14103      emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
14104    }
14105
14106  x = gen_rtx_CONST_VECTOR (V4SImode,
14107			    gen_rtvec (4, GEN_INT (0x43300000UL),
14108				       GEN_INT (0x45300000UL),
14109				       const0_rtx, const0_rtx));
14110  exponents = validize_mem (force_const_mem (V4SImode, x));
14111
14112  /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
14113  emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
14114
14115  /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
14116     yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
14117     Similarly (0x45300000UL ## fp_value_hi_xmm) yields
14118     (0x1.0p84 + double(fp_value_hi_xmm)).
14119     Note these exponents differ by 32.  */
14120
14121  fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
14122
14123  /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
14124     in [0,2**32-1] and [0]+[2**32,2**64-1] respectively.  */
14125  real_ldexp (&bias_lo_rvt, &dconst1, 52);
14126  real_ldexp (&bias_hi_rvt, &dconst1, 84);
14127  biases = const_double_from_real_value (bias_lo_rvt, DFmode);
14128  x = const_double_from_real_value (bias_hi_rvt, DFmode);
14129  biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
14130  biases = validize_mem (force_const_mem (V2DFmode, biases));
14131  emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
14132
14133  /* Add the upper and lower DFmode values together.  */
14134  if (TARGET_SSE3)
14135    emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
14136  else
14137    {
14138      x = copy_to_mode_reg (V2DFmode, fp_xmm);
14139      emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
14140      emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
14141    }
14142
14143  ix86_expand_vector_extract (false, target, fp_xmm, 0);
14144}
14145
14146/* Not used, but eases macroization of patterns.  */
14147void
14148ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
14149				  rtx input ATTRIBUTE_UNUSED)
14150{
14151  gcc_unreachable ();
14152}
14153
14154/* Convert an unsigned SImode value into a DFmode.  Only currently used
14155   for SSE, but applicable anywhere.  */
14156
14157void
14158ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
14159{
14160  REAL_VALUE_TYPE TWO31r;
14161  rtx x, fp;
14162
14163  x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
14164			   NULL, 1, OPTAB_DIRECT);
14165
14166  fp = gen_reg_rtx (DFmode);
14167  emit_insn (gen_floatsidf2 (fp, x));
14168
14169  real_ldexp (&TWO31r, &dconst1, 31);
14170  x = const_double_from_real_value (TWO31r, DFmode);
14171
14172  x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
14173  if (x != target)
14174    emit_move_insn (target, x);
14175}
14176
14177/* Convert a signed DImode value into a DFmode.  Only used for SSE in
14178   32-bit mode; otherwise we have a direct convert instruction.  */
14179
14180void
14181ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
14182{
14183  REAL_VALUE_TYPE TWO32r;
14184  rtx fp_lo, fp_hi, x;
14185
14186  fp_lo = gen_reg_rtx (DFmode);
14187  fp_hi = gen_reg_rtx (DFmode);
14188
14189  emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
14190
14191  real_ldexp (&TWO32r, &dconst1, 32);
14192  x = const_double_from_real_value (TWO32r, DFmode);
14193  fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
14194
14195  ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
14196
14197  x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
14198			   0, OPTAB_DIRECT);
14199  if (x != target)
14200    emit_move_insn (target, x);
14201}
14202
14203/* Convert an unsigned SImode value into a SFmode, using only SSE.
14204   For x86_32, -mfpmath=sse, !optimize_size only.  */
14205void
14206ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
14207{
14208  REAL_VALUE_TYPE ONE16r;
14209  rtx fp_hi, fp_lo, int_hi, int_lo, x;
14210
14211  real_ldexp (&ONE16r, &dconst1, 16);
14212  x = const_double_from_real_value (ONE16r, SFmode);
14213  int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
14214				      NULL, 0, OPTAB_DIRECT);
14215  int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
14216				      NULL, 0, OPTAB_DIRECT);
14217  fp_hi = gen_reg_rtx (SFmode);
14218  fp_lo = gen_reg_rtx (SFmode);
14219  emit_insn (gen_floatsisf2 (fp_hi, int_hi));
14220  emit_insn (gen_floatsisf2 (fp_lo, int_lo));
14221  fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
14222			       0, OPTAB_DIRECT);
14223  fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
14224			       0, OPTAB_DIRECT);
14225  if (!rtx_equal_p (target, fp_hi))
14226    emit_move_insn (target, fp_hi);
14227}
14228
14229/* A subroutine of ix86_build_signbit_mask.  If VECT is true,
14230   then replicate the value for all elements of the vector
14231   register.  */
14232
14233rtx
14234ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
14235{
14236  rtvec v;
14237  switch (mode)
14238    {
14239    case SImode:
14240      gcc_assert (vect);
14241      v = gen_rtvec (4, value, value, value, value);
14242      return gen_rtx_CONST_VECTOR (V4SImode, v);
14243
14244    case DImode:
14245      gcc_assert (vect);
14246      v = gen_rtvec (2, value, value);
14247      return gen_rtx_CONST_VECTOR (V2DImode, v);
14248
14249    case SFmode:
14250      if (vect)
14251	v = gen_rtvec (4, value, value, value, value);
14252      else
14253	v = gen_rtvec (4, value, CONST0_RTX (SFmode),
14254		       CONST0_RTX (SFmode), CONST0_RTX (SFmode));
14255      return gen_rtx_CONST_VECTOR (V4SFmode, v);
14256
14257    case DFmode:
14258      if (vect)
14259	v = gen_rtvec (2, value, value);
14260      else
14261	v = gen_rtvec (2, value, CONST0_RTX (DFmode));
14262      return gen_rtx_CONST_VECTOR (V2DFmode, v);
14263
14264    default:
14265      gcc_unreachable ();
14266    }
14267}
14268
14269/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
14270   and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
14271   for an SSE register.  If VECT is true, then replicate the mask for
14272   all elements of the vector register.  If INVERT is true, then create
14273   a mask excluding the sign bit.  */
14274
14275rtx
14276ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
14277{
14278  enum machine_mode vec_mode, imode;
14279  HOST_WIDE_INT hi, lo;
14280  int shift = 63;
14281  rtx v;
14282  rtx mask;
14283
14284  /* Find the sign bit, sign extended to 2*HWI.  */
14285  switch (mode)
14286    {
14287    case SImode:
14288    case SFmode:
14289      imode = SImode;
14290      vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
14291      lo = 0x80000000, hi = lo < 0;
14292      break;
14293
14294    case DImode:
14295    case DFmode:
14296      imode = DImode;
14297      vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
14298      if (HOST_BITS_PER_WIDE_INT >= 64)
14299	lo = (HOST_WIDE_INT)1 << shift, hi = -1;
14300      else
14301	lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
14302      break;
14303
14304    case TImode:
14305    case TFmode:
14306      vec_mode = VOIDmode;
14307      if (HOST_BITS_PER_WIDE_INT >= 64)
14308	{
14309	  imode = TImode;
14310	  lo = 0, hi = (HOST_WIDE_INT)1 << shift;
14311	}
14312      else
14313	{
14314	  rtvec vec;
14315
14316	  imode = DImode;
14317	  lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
14318
14319	  if (invert)
14320	    {
14321	      lo = ~lo, hi = ~hi;
14322	      v = constm1_rtx;
14323	    }
14324	  else
14325	    v = const0_rtx;
14326
14327	  mask = immed_double_const (lo, hi, imode);
14328
14329	  vec = gen_rtvec (2, v, mask);
14330	  v = gen_rtx_CONST_VECTOR (V2DImode, vec);
14331	  v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
14332
14333	  return v;
14334	}
14335     break;
14336
14337    default:
14338      gcc_unreachable ();
14339    }
14340
14341  if (invert)
14342    lo = ~lo, hi = ~hi;
14343
14344  /* Force this value into the low part of a fp vector constant.  */
14345  mask = immed_double_const (lo, hi, imode);
14346  mask = gen_lowpart (mode, mask);
14347
14348  if (vec_mode == VOIDmode)
14349    return force_reg (mode, mask);
14350
14351  v = ix86_build_const_vector (mode, vect, mask);
14352  return force_reg (vec_mode, v);
14353}
14354
14355/* Generate code for floating point ABS or NEG.  */
14356
14357void
14358ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
14359				rtx operands[])
14360{
14361  rtx mask, set, use, clob, dst, src;
14362  bool use_sse = false;
14363  bool vector_mode = VECTOR_MODE_P (mode);
14364  enum machine_mode elt_mode = mode;
14365
14366  if (vector_mode)
14367    {
14368      elt_mode = GET_MODE_INNER (mode);
14369      use_sse = true;
14370    }
14371  else if (mode == TFmode)
14372    use_sse = true;
14373  else if (TARGET_SSE_MATH)
14374    use_sse = SSE_FLOAT_MODE_P (mode);
14375
14376  /* NEG and ABS performed with SSE use bitwise mask operations.
14377     Create the appropriate mask now.  */
14378  if (use_sse)
14379    mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
14380  else
14381    mask = NULL_RTX;
14382
14383  dst = operands[0];
14384  src = operands[1];
14385
14386  if (vector_mode)
14387    {
14388      set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
14389      set = gen_rtx_SET (VOIDmode, dst, set);
14390      emit_insn (set);
14391    }
14392  else
14393    {
14394      set = gen_rtx_fmt_e (code, mode, src);
14395      set = gen_rtx_SET (VOIDmode, dst, set);
14396      if (mask)
14397        {
14398          use = gen_rtx_USE (VOIDmode, mask);
14399          clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14400          emit_insn (gen_rtx_PARALLEL (VOIDmode,
14401				       gen_rtvec (3, set, use, clob)));
14402        }
14403      else
14404	emit_insn (set);
14405    }
14406}
14407
14408/* Expand a copysign operation.  Special case operand 0 being a constant.  */
14409
14410void
14411ix86_expand_copysign (rtx operands[])
14412{
14413  enum machine_mode mode;
14414  rtx dest, op0, op1, mask, nmask;
14415
14416  dest = operands[0];
14417  op0 = operands[1];
14418  op1 = operands[2];
14419
14420  mode = GET_MODE (dest);
14421
14422  if (GET_CODE (op0) == CONST_DOUBLE)
14423    {
14424      rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
14425
14426      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
14427	op0 = simplify_unary_operation (ABS, mode, op0, mode);
14428
14429      if (mode == SFmode || mode == DFmode)
14430	{
14431	  enum machine_mode vmode;
14432
14433	  vmode = mode == SFmode ? V4SFmode : V2DFmode;
14434
14435	  if (op0 == CONST0_RTX (mode))
14436	    op0 = CONST0_RTX (vmode);
14437	  else
14438	    {
14439	      rtx v = ix86_build_const_vector (mode, false, op0);
14440
14441	      op0 = force_reg (vmode, v);
14442	    }
14443	}
14444      else if (op0 != CONST0_RTX (mode))
14445	op0 = force_reg (mode, op0);
14446
14447      mask = ix86_build_signbit_mask (mode, 0, 0);
14448
14449      if (mode == SFmode)
14450	copysign_insn = gen_copysignsf3_const;
14451      else if (mode == DFmode)
14452	copysign_insn = gen_copysigndf3_const;
14453      else
14454	copysign_insn = gen_copysigntf3_const;
14455
14456	emit_insn (copysign_insn (dest, op0, op1, mask));
14457    }
14458  else
14459    {
14460      rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
14461
14462      nmask = ix86_build_signbit_mask (mode, 0, 1);
14463      mask = ix86_build_signbit_mask (mode, 0, 0);
14464
14465      if (mode == SFmode)
14466	copysign_insn = gen_copysignsf3_var;
14467      else if (mode == DFmode)
14468	copysign_insn = gen_copysigndf3_var;
14469      else
14470	copysign_insn = gen_copysigntf3_var;
14471
14472      emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
14473    }
14474}
14475
14476/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
14477   be a constant, and so has already been expanded into a vector constant.  */
14478
14479void
14480ix86_split_copysign_const (rtx operands[])
14481{
14482  enum machine_mode mode, vmode;
14483  rtx dest, op0, mask, x;
14484
14485  dest = operands[0];
14486  op0 = operands[1];
14487  mask = operands[3];
14488
14489  mode = GET_MODE (dest);
14490  vmode = GET_MODE (mask);
14491
14492  dest = simplify_gen_subreg (vmode, dest, mode, 0);
14493  x = gen_rtx_AND (vmode, dest, mask);
14494  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14495
14496  if (op0 != CONST0_RTX (vmode))
14497    {
14498      x = gen_rtx_IOR (vmode, dest, op0);
14499      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14500    }
14501}
14502
14503/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
14504   so we have to do two masks.  */
14505
14506void
14507ix86_split_copysign_var (rtx operands[])
14508{
14509  enum machine_mode mode, vmode;
14510  rtx dest, scratch, op0, op1, mask, nmask, x;
14511
14512  dest = operands[0];
14513  scratch = operands[1];
14514  op0 = operands[2];
14515  op1 = operands[3];
14516  nmask = operands[4];
14517  mask = operands[5];
14518
14519  mode = GET_MODE (dest);
14520  vmode = GET_MODE (mask);
14521
14522  if (rtx_equal_p (op0, op1))
14523    {
14524      /* Shouldn't happen often (it's useless, obviously), but when it does
14525	 we'd generate incorrect code if we continue below.  */
14526      emit_move_insn (dest, op0);
14527      return;
14528    }
14529
14530  if (REG_P (mask) && REGNO (dest) == REGNO (mask))	/* alternative 0 */
14531    {
14532      gcc_assert (REGNO (op1) == REGNO (scratch));
14533
14534      x = gen_rtx_AND (vmode, scratch, mask);
14535      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14536
14537      dest = mask;
14538      op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14539      x = gen_rtx_NOT (vmode, dest);
14540      x = gen_rtx_AND (vmode, x, op0);
14541      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14542    }
14543  else
14544    {
14545      if (REGNO (op1) == REGNO (scratch))		/* alternative 1,3 */
14546	{
14547	  x = gen_rtx_AND (vmode, scratch, mask);
14548	}
14549      else						/* alternative 2,4 */
14550	{
14551          gcc_assert (REGNO (mask) == REGNO (scratch));
14552          op1 = simplify_gen_subreg (vmode, op1, mode, 0);
14553	  x = gen_rtx_AND (vmode, scratch, op1);
14554	}
14555      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14556
14557      if (REGNO (op0) == REGNO (dest))			/* alternative 1,2 */
14558	{
14559	  dest = simplify_gen_subreg (vmode, op0, mode, 0);
14560	  x = gen_rtx_AND (vmode, dest, nmask);
14561	}
14562      else						/* alternative 3,4 */
14563	{
14564          gcc_assert (REGNO (nmask) == REGNO (dest));
14565	  dest = nmask;
14566	  op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14567	  x = gen_rtx_AND (vmode, dest, op0);
14568	}
14569      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14570    }
14571
14572  x = gen_rtx_IOR (vmode, dest, scratch);
14573  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14574}
14575
14576/* Return TRUE or FALSE depending on whether the first SET in INSN
14577   has source and destination with matching CC modes, and that the
14578   CC mode is at least as constrained as REQ_MODE.  */
14579
14580int
14581ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
14582{
14583  rtx set;
14584  enum machine_mode set_mode;
14585
14586  set = PATTERN (insn);
14587  if (GET_CODE (set) == PARALLEL)
14588    set = XVECEXP (set, 0, 0);
14589  gcc_assert (GET_CODE (set) == SET);
14590  gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
14591
14592  set_mode = GET_MODE (SET_DEST (set));
14593  switch (set_mode)
14594    {
14595    case CCNOmode:
14596      if (req_mode != CCNOmode
14597	  && (req_mode != CCmode
14598	      || XEXP (SET_SRC (set), 1) != const0_rtx))
14599	return 0;
14600      break;
14601    case CCmode:
14602      if (req_mode == CCGCmode)
14603	return 0;
14604      /* FALLTHRU */
14605    case CCGCmode:
14606      if (req_mode == CCGOCmode || req_mode == CCNOmode)
14607	return 0;
14608      /* FALLTHRU */
14609    case CCGOCmode:
14610      if (req_mode == CCZmode)
14611	return 0;
14612      /* FALLTHRU */
14613    case CCAmode:
14614    case CCCmode:
14615    case CCOmode:
14616    case CCSmode:
14617    case CCZmode:
14618      break;
14619
14620    default:
14621      gcc_unreachable ();
14622    }
14623
14624  return (GET_MODE (SET_SRC (set)) == set_mode);
14625}
14626
14627/* Generate insn patterns to do an integer compare of OPERANDS.  */
14628
14629static rtx
14630ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14631{
14632  enum machine_mode cmpmode;
14633  rtx tmp, flags;
14634
14635  cmpmode = SELECT_CC_MODE (code, op0, op1);
14636  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14637
14638  /* This is very simple, but making the interface the same as in the
14639     FP case makes the rest of the code easier.  */
14640  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14641  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14642
14643  /* Return the test that should be put into the flags user, i.e.
14644     the bcc, scc, or cmov instruction.  */
14645  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14646}
14647
14648/* Figure out whether to use ordered or unordered fp comparisons.
14649   Return the appropriate mode to use.  */
14650
14651enum machine_mode
14652ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14653{
14654  /* ??? In order to make all comparisons reversible, we do all comparisons
14655     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
14656     all forms trapping and nontrapping comparisons, we can make inequality
14657     comparisons trapping again, since it results in better code when using
14658     FCOM based compares.  */
14659  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14660}
14661
14662enum machine_mode
14663ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14664{
14665  enum machine_mode mode = GET_MODE (op0);
14666
14667  if (SCALAR_FLOAT_MODE_P (mode))
14668    {
14669      gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14670      return ix86_fp_compare_mode (code);
14671    }
14672
14673  switch (code)
14674    {
14675      /* Only zero flag is needed.  */
14676    case EQ:			/* ZF=0 */
14677    case NE:			/* ZF!=0 */
14678      return CCZmode;
14679      /* Codes needing carry flag.  */
14680    case GEU:			/* CF=0 */
14681    case LTU:			/* CF=1 */
14682      /* Detect overflow checks.  They need just the carry flag.  */
14683      if (GET_CODE (op0) == PLUS
14684	  && rtx_equal_p (op1, XEXP (op0, 0)))
14685	return CCCmode;
14686      else
14687	return CCmode;
14688    case GTU:			/* CF=0 & ZF=0 */
14689    case LEU:			/* CF=1 | ZF=1 */
14690      /* Detect overflow checks.  They need just the carry flag.  */
14691      if (GET_CODE (op0) == MINUS
14692	  && rtx_equal_p (op1, XEXP (op0, 0)))
14693	return CCCmode;
14694      else
14695	return CCmode;
14696      /* Codes possibly doable only with sign flag when
14697         comparing against zero.  */
14698    case GE:			/* SF=OF   or   SF=0 */
14699    case LT:			/* SF<>OF  or   SF=1 */
14700      if (op1 == const0_rtx)
14701	return CCGOCmode;
14702      else
14703	/* For other cases Carry flag is not required.  */
14704	return CCGCmode;
14705      /* Codes doable only with sign flag when comparing
14706         against zero, but we miss jump instruction for it
14707         so we need to use relational tests against overflow
14708         that thus needs to be zero.  */
14709    case GT:			/* ZF=0 & SF=OF */
14710    case LE:			/* ZF=1 | SF<>OF */
14711      if (op1 == const0_rtx)
14712	return CCNOmode;
14713      else
14714	return CCGCmode;
14715      /* strcmp pattern do (use flags) and combine may ask us for proper
14716	 mode.  */
14717    case USE:
14718      return CCmode;
14719    default:
14720      gcc_unreachable ();
14721    }
14722}
14723
14724/* Return the fixed registers used for condition codes.  */
14725
14726static bool
14727ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14728{
14729  *p1 = FLAGS_REG;
14730  *p2 = FPSR_REG;
14731  return true;
14732}
14733
14734/* If two condition code modes are compatible, return a condition code
14735   mode which is compatible with both.  Otherwise, return
14736   VOIDmode.  */
14737
14738static enum machine_mode
14739ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14740{
14741  if (m1 == m2)
14742    return m1;
14743
14744  if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14745    return VOIDmode;
14746
14747  if ((m1 == CCGCmode && m2 == CCGOCmode)
14748      || (m1 == CCGOCmode && m2 == CCGCmode))
14749    return CCGCmode;
14750
14751  switch (m1)
14752    {
14753    default:
14754      gcc_unreachable ();
14755
14756    case CCmode:
14757    case CCGCmode:
14758    case CCGOCmode:
14759    case CCNOmode:
14760    case CCAmode:
14761    case CCCmode:
14762    case CCOmode:
14763    case CCSmode:
14764    case CCZmode:
14765      switch (m2)
14766	{
14767	default:
14768	  return VOIDmode;
14769
14770	case CCmode:
14771	case CCGCmode:
14772	case CCGOCmode:
14773	case CCNOmode:
14774	case CCAmode:
14775	case CCCmode:
14776	case CCOmode:
14777	case CCSmode:
14778	case CCZmode:
14779	  return CCmode;
14780	}
14781
14782    case CCFPmode:
14783    case CCFPUmode:
14784      /* These are only compatible with themselves, which we already
14785	 checked above.  */
14786      return VOIDmode;
14787    }
14788}
14789
14790
14791/* Return a comparison we can do and that it is equivalent to
14792   swap_condition (code) apart possibly from orderedness.
14793   But, never change orderedness if TARGET_IEEE_FP, returning
14794   UNKNOWN in that case if necessary.  */
14795
14796static enum rtx_code
14797ix86_fp_swap_condition (enum rtx_code code)
14798{
14799  switch (code)
14800    {
14801    case GT:                   /* GTU - CF=0 & ZF=0 */
14802      return TARGET_IEEE_FP ? UNKNOWN : UNLT;
14803    case GE:                   /* GEU - CF=0 */
14804      return TARGET_IEEE_FP ? UNKNOWN : UNLE;
14805    case UNLT:                 /* LTU - CF=1 */
14806      return TARGET_IEEE_FP ? UNKNOWN : GT;
14807    case UNLE:                 /* LEU - CF=1 | ZF=1 */
14808      return TARGET_IEEE_FP ? UNKNOWN : GE;
14809    default:
14810      return swap_condition (code);
14811    }
14812}
14813
14814/* Return cost of comparison CODE using the best strategy for performance.
14815   All following functions do use number of instructions as a cost metrics.
14816   In future this should be tweaked to compute bytes for optimize_size and
14817   take into account performance of various instructions on various CPUs.  */
14818
14819static int
14820ix86_fp_comparison_cost (enum rtx_code code)
14821{
14822  int arith_cost;
14823
14824  /* The cost of code using bit-twiddling on %ah.  */
14825  switch (code)
14826    {
14827    case UNLE:
14828    case UNLT:
14829    case LTGT:
14830    case GT:
14831    case GE:
14832    case UNORDERED:
14833    case ORDERED:
14834    case UNEQ:
14835      arith_cost = 4;
14836      break;
14837    case LT:
14838    case NE:
14839    case EQ:
14840    case UNGE:
14841      arith_cost = TARGET_IEEE_FP ? 5 : 4;
14842      break;
14843    case LE:
14844    case UNGT:
14845      arith_cost = TARGET_IEEE_FP ? 6 : 4;
14846      break;
14847    default:
14848      gcc_unreachable ();
14849    }
14850
14851  switch (ix86_fp_comparison_strategy (code))
14852    {
14853    case IX86_FPCMP_COMI:
14854      return arith_cost > 4 ? 3 : 2;
14855    case IX86_FPCMP_SAHF:
14856      return arith_cost > 4 ? 4 : 3;
14857    default:
14858      return arith_cost;
14859    }
14860}
14861
14862/* Return strategy to use for floating-point.  We assume that fcomi is always
14863   preferrable where available, since that is also true when looking at size
14864   (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test).  */
14865
14866enum ix86_fpcmp_strategy
14867ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
14868{
14869  /* Do fcomi/sahf based test when profitable.  */
14870
14871  if (TARGET_CMOVE)
14872    return IX86_FPCMP_COMI;
14873
14874  if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
14875    return IX86_FPCMP_SAHF;
14876
14877  return IX86_FPCMP_ARITH;
14878}
14879
14880/* Swap, force into registers, or otherwise massage the two operands
14881   to a fp comparison.  The operands are updated in place; the new
14882   comparison code is returned.  */
14883
14884static enum rtx_code
14885ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14886{
14887  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14888  rtx op0 = *pop0, op1 = *pop1;
14889  enum machine_mode op_mode = GET_MODE (op0);
14890  int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14891
14892  /* All of the unordered compare instructions only work on registers.
14893     The same is true of the fcomi compare instructions.  The XFmode
14894     compare instructions require registers except when comparing
14895     against zero or when converting operand 1 from fixed point to
14896     floating point.  */
14897
14898  if (!is_sse
14899      && (fpcmp_mode == CCFPUmode
14900	  || (op_mode == XFmode
14901	      && ! (standard_80387_constant_p (op0) == 1
14902		    || standard_80387_constant_p (op1) == 1)
14903	      && GET_CODE (op1) != FLOAT)
14904	  || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
14905    {
14906      op0 = force_reg (op_mode, op0);
14907      op1 = force_reg (op_mode, op1);
14908    }
14909  else
14910    {
14911      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
14912	 things around if they appear profitable, otherwise force op0
14913	 into a register.  */
14914
14915      if (standard_80387_constant_p (op0) == 0
14916	  || (MEM_P (op0)
14917	      && ! (standard_80387_constant_p (op1) == 0
14918		    || MEM_P (op1))))
14919	{
14920	  enum rtx_code new_code = ix86_fp_swap_condition (code);
14921	  if (new_code != UNKNOWN)
14922	    {
14923	      rtx tmp;
14924	      tmp = op0, op0 = op1, op1 = tmp;
14925	      code = new_code;
14926	    }
14927	}
14928
14929      if (!REG_P (op0))
14930	op0 = force_reg (op_mode, op0);
14931
14932      if (CONSTANT_P (op1))
14933	{
14934	  int tmp = standard_80387_constant_p (op1);
14935	  if (tmp == 0)
14936	    op1 = validize_mem (force_const_mem (op_mode, op1));
14937	  else if (tmp == 1)
14938	    {
14939	      if (TARGET_CMOVE)
14940		op1 = force_reg (op_mode, op1);
14941	    }
14942	  else
14943	    op1 = force_reg (op_mode, op1);
14944	}
14945    }
14946
14947  /* Try to rearrange the comparison to make it cheaper.  */
14948  if (ix86_fp_comparison_cost (code)
14949      > ix86_fp_comparison_cost (swap_condition (code))
14950      && (REG_P (op1) || can_create_pseudo_p ()))
14951    {
14952      rtx tmp;
14953      tmp = op0, op0 = op1, op1 = tmp;
14954      code = swap_condition (code);
14955      if (!REG_P (op0))
14956	op0 = force_reg (op_mode, op0);
14957    }
14958
14959  *pop0 = op0;
14960  *pop1 = op1;
14961  return code;
14962}
14963
14964/* Convert comparison codes we use to represent FP comparison to integer
14965   code that will result in proper branch.  Return UNKNOWN if no such code
14966   is available.  */
14967
14968enum rtx_code
14969ix86_fp_compare_code_to_integer (enum rtx_code code)
14970{
14971  switch (code)
14972    {
14973    case GT:
14974      return GTU;
14975    case GE:
14976      return GEU;
14977    case ORDERED:
14978    case UNORDERED:
14979      return code;
14980      break;
14981    case UNEQ:
14982      return EQ;
14983      break;
14984    case UNLT:
14985      return LTU;
14986      break;
14987    case UNLE:
14988      return LEU;
14989      break;
14990    case LTGT:
14991      return NE;
14992      break;
14993    default:
14994      return UNKNOWN;
14995    }
14996}
14997
14998/* Generate insn patterns to do a floating point compare of OPERANDS.  */
14999
15000static rtx
15001ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
15002{
15003  enum machine_mode fpcmp_mode, intcmp_mode;
15004  rtx tmp, tmp2;
15005
15006  fpcmp_mode = ix86_fp_compare_mode (code);
15007  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
15008
15009  /* Do fcomi/sahf based test when profitable.  */
15010  switch (ix86_fp_comparison_strategy (code))
15011    {
15012    case IX86_FPCMP_COMI:
15013      intcmp_mode = fpcmp_mode;
15014      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15015      tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15016			 tmp);
15017      emit_insn (tmp);
15018      break;
15019
15020    case IX86_FPCMP_SAHF:
15021      intcmp_mode = fpcmp_mode;
15022      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15023      tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15024			 tmp);
15025
15026      if (!scratch)
15027	scratch = gen_reg_rtx (HImode);
15028      tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
15029      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
15030      break;
15031
15032    case IX86_FPCMP_ARITH:
15033      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
15034      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15035      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
15036      if (!scratch)
15037	scratch = gen_reg_rtx (HImode);
15038      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
15039
15040      /* In the unordered case, we have to check C2 for NaN's, which
15041	 doesn't happen to work out to anything nice combination-wise.
15042	 So do some bit twiddling on the value we've got in AH to come
15043	 up with an appropriate set of condition codes.  */
15044
15045      intcmp_mode = CCNOmode;
15046      switch (code)
15047	{
15048	case GT:
15049	case UNGT:
15050	  if (code == GT || !TARGET_IEEE_FP)
15051	    {
15052	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
15053	      code = EQ;
15054	    }
15055	  else
15056	    {
15057	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15058	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
15059	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
15060	      intcmp_mode = CCmode;
15061	      code = GEU;
15062	    }
15063	  break;
15064	case LT:
15065	case UNLT:
15066	  if (code == LT && TARGET_IEEE_FP)
15067	    {
15068	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15069	      emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
15070	      intcmp_mode = CCmode;
15071	      code = EQ;
15072	    }
15073	  else
15074	    {
15075	      emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
15076	      code = NE;
15077	    }
15078	  break;
15079	case GE:
15080	case UNGE:
15081	  if (code == GE || !TARGET_IEEE_FP)
15082	    {
15083	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
15084	      code = EQ;
15085	    }
15086	  else
15087	    {
15088	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15089	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
15090	      code = NE;
15091	    }
15092	  break;
15093	case LE:
15094	case UNLE:
15095	  if (code == LE && TARGET_IEEE_FP)
15096	    {
15097	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15098	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
15099	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
15100	      intcmp_mode = CCmode;
15101	      code = LTU;
15102	    }
15103	  else
15104	    {
15105	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
15106	      code = NE;
15107	    }
15108	  break;
15109	case EQ:
15110	case UNEQ:
15111	  if (code == EQ && TARGET_IEEE_FP)
15112	    {
15113	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15114	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
15115	      intcmp_mode = CCmode;
15116	      code = EQ;
15117	    }
15118	  else
15119	    {
15120	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
15121	      code = NE;
15122	    }
15123	  break;
15124	case NE:
15125	case LTGT:
15126	  if (code == NE && TARGET_IEEE_FP)
15127	    {
15128	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15129	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
15130					     GEN_INT (0x40)));
15131	      code = NE;
15132	    }
15133	  else
15134	    {
15135	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
15136	      code = EQ;
15137	    }
15138	  break;
15139
15140	case UNORDERED:
15141	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
15142	  code = NE;
15143	  break;
15144	case ORDERED:
15145	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
15146	  code = EQ;
15147	  break;
15148
15149	default:
15150	  gcc_unreachable ();
15151	}
15152	break;
15153
15154    default:
15155      gcc_unreachable();
15156    }
15157
15158  /* Return the test that should be put into the flags user, i.e.
15159     the bcc, scc, or cmov instruction.  */
15160  return gen_rtx_fmt_ee (code, VOIDmode,
15161			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
15162			 const0_rtx);
15163}
15164
15165rtx
15166ix86_expand_compare (enum rtx_code code)
15167{
15168  rtx op0, op1, ret;
15169  op0 = ix86_compare_op0;
15170  op1 = ix86_compare_op1;
15171
15172  if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
15173    ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
15174
15175  else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
15176    {
15177      gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
15178      ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15179    }
15180  else
15181    ret = ix86_expand_int_compare (code, op0, op1);
15182
15183  return ret;
15184}
15185
15186void
15187ix86_expand_branch (enum rtx_code code, rtx label)
15188{
15189  rtx tmp;
15190
15191  switch (GET_MODE (ix86_compare_op0))
15192    {
15193    case SFmode:
15194    case DFmode:
15195    case XFmode:
15196    case QImode:
15197    case HImode:
15198    case SImode:
15199      simple:
15200      tmp = ix86_expand_compare (code);
15201      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15202				  gen_rtx_LABEL_REF (VOIDmode, label),
15203				  pc_rtx);
15204      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15205      return;
15206
15207    case DImode:
15208      if (TARGET_64BIT)
15209	goto simple;
15210    case TImode:
15211      /* Expand DImode branch into multiple compare+branch.  */
15212      {
15213	rtx lo[2], hi[2], label2;
15214	enum rtx_code code1, code2, code3;
15215	enum machine_mode submode;
15216
15217	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
15218	  {
15219	    tmp = ix86_compare_op0;
15220	    ix86_compare_op0 = ix86_compare_op1;
15221	    ix86_compare_op1 = tmp;
15222	    code = swap_condition (code);
15223	  }
15224	if (GET_MODE (ix86_compare_op0) == DImode)
15225	  {
15226	    split_di (&ix86_compare_op0, 1, lo+0, hi+0);
15227	    split_di (&ix86_compare_op1, 1, lo+1, hi+1);
15228	    submode = SImode;
15229	  }
15230	else
15231	  {
15232	    split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
15233	    split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
15234	    submode = DImode;
15235	  }
15236
15237	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
15238	   avoid two branches.  This costs one extra insn, so disable when
15239	   optimizing for size.  */
15240
15241	if ((code == EQ || code == NE)
15242	    && (!optimize_insn_for_size_p ()
15243	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
15244	  {
15245	    rtx xor0, xor1;
15246
15247	    xor1 = hi[0];
15248	    if (hi[1] != const0_rtx)
15249	      xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
15250				   NULL_RTX, 0, OPTAB_WIDEN);
15251
15252	    xor0 = lo[0];
15253	    if (lo[1] != const0_rtx)
15254	      xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
15255				   NULL_RTX, 0, OPTAB_WIDEN);
15256
15257	    tmp = expand_binop (submode, ior_optab, xor1, xor0,
15258				NULL_RTX, 0, OPTAB_WIDEN);
15259
15260	    ix86_compare_op0 = tmp;
15261	    ix86_compare_op1 = const0_rtx;
15262	    ix86_expand_branch (code, label);
15263	    return;
15264	  }
15265
15266	/* Otherwise, if we are doing less-than or greater-or-equal-than,
15267	   op1 is a constant and the low word is zero, then we can just
15268	   examine the high word.  Similarly for low word -1 and
15269	   less-or-equal-than or greater-than.  */
15270
15271	if (CONST_INT_P (hi[1]))
15272	  switch (code)
15273	    {
15274	    case LT: case LTU: case GE: case GEU:
15275	      if (lo[1] == const0_rtx)
15276		{
15277		  ix86_compare_op0 = hi[0];
15278		  ix86_compare_op1 = hi[1];
15279		  ix86_expand_branch (code, label);
15280		  return;
15281		}
15282	      break;
15283	    case LE: case LEU: case GT: case GTU:
15284	      if (lo[1] == constm1_rtx)
15285		{
15286		  ix86_compare_op0 = hi[0];
15287		  ix86_compare_op1 = hi[1];
15288		  ix86_expand_branch (code, label);
15289		  return;
15290		}
15291	      break;
15292	    default:
15293	      break;
15294	    }
15295
15296	/* Otherwise, we need two or three jumps.  */
15297
15298	label2 = gen_label_rtx ();
15299
15300	code1 = code;
15301	code2 = swap_condition (code);
15302	code3 = unsigned_condition (code);
15303
15304	switch (code)
15305	  {
15306	  case LT: case GT: case LTU: case GTU:
15307	    break;
15308
15309	  case LE:   code1 = LT;  code2 = GT;  break;
15310	  case GE:   code1 = GT;  code2 = LT;  break;
15311	  case LEU:  code1 = LTU; code2 = GTU; break;
15312	  case GEU:  code1 = GTU; code2 = LTU; break;
15313
15314	  case EQ:   code1 = UNKNOWN; code2 = NE;  break;
15315	  case NE:   code2 = UNKNOWN; break;
15316
15317	  default:
15318	    gcc_unreachable ();
15319	  }
15320
15321	/*
15322	 * a < b =>
15323	 *    if (hi(a) < hi(b)) goto true;
15324	 *    if (hi(a) > hi(b)) goto false;
15325	 *    if (lo(a) < lo(b)) goto true;
15326	 *  false:
15327	 */
15328
15329	ix86_compare_op0 = hi[0];
15330	ix86_compare_op1 = hi[1];
15331
15332	if (code1 != UNKNOWN)
15333	  ix86_expand_branch (code1, label);
15334	if (code2 != UNKNOWN)
15335	  ix86_expand_branch (code2, label2);
15336
15337	ix86_compare_op0 = lo[0];
15338	ix86_compare_op1 = lo[1];
15339	ix86_expand_branch (code3, label);
15340
15341	if (code2 != UNKNOWN)
15342	  emit_label (label2);
15343	return;
15344      }
15345
15346    default:
15347      /* If we have already emitted a compare insn, go straight to simple.
15348         ix86_expand_compare won't emit anything if ix86_compare_emitted
15349         is non NULL.  */
15350      gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
15351      goto simple;
15352    }
15353}
15354
15355/* Split branch based on floating point condition.  */
15356void
15357ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
15358		      rtx target1, rtx target2, rtx tmp, rtx pushed)
15359{
15360  rtx condition;
15361  rtx i;
15362
15363  if (target2 != pc_rtx)
15364    {
15365      rtx tmp = target2;
15366      code = reverse_condition_maybe_unordered (code);
15367      target2 = target1;
15368      target1 = tmp;
15369    }
15370
15371  condition = ix86_expand_fp_compare (code, op1, op2,
15372				      tmp);
15373
15374  /* Remove pushed operand from stack.  */
15375  if (pushed)
15376    ix86_free_from_memory (GET_MODE (pushed));
15377
15378  i = emit_jump_insn (gen_rtx_SET
15379		      (VOIDmode, pc_rtx,
15380		       gen_rtx_IF_THEN_ELSE (VOIDmode,
15381					     condition, target1, target2)));
15382  if (split_branch_probability >= 0)
15383    add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
15384}
15385
15386void
15387ix86_expand_setcc (enum rtx_code code, rtx dest)
15388{
15389  rtx ret;
15390
15391  gcc_assert (GET_MODE (dest) == QImode);
15392
15393  ret = ix86_expand_compare (code);
15394  PUT_MODE (ret, QImode);
15395  emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
15396}
15397
15398/* Expand comparison setting or clearing carry flag.  Return true when
15399   successful and set pop for the operation.  */
15400static bool
15401ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15402{
15403  enum machine_mode mode =
15404    GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15405
15406  /* Do not handle DImode compares that go through special path.  */
15407  if (mode == (TARGET_64BIT ? TImode : DImode))
15408    return false;
15409
15410  if (SCALAR_FLOAT_MODE_P (mode))
15411    {
15412      rtx compare_op, compare_seq;
15413
15414      gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15415
15416      /* Shortcut:  following common codes never translate
15417	 into carry flag compares.  */
15418      if (code == EQ || code == NE || code == UNEQ || code == LTGT
15419	  || code == ORDERED || code == UNORDERED)
15420	return false;
15421
15422      /* These comparisons require zero flag; swap operands so they won't.  */
15423      if ((code == GT || code == UNLE || code == LE || code == UNGT)
15424	  && !TARGET_IEEE_FP)
15425	{
15426	  rtx tmp = op0;
15427	  op0 = op1;
15428	  op1 = tmp;
15429	  code = swap_condition (code);
15430	}
15431
15432      /* Try to expand the comparison and verify that we end up with
15433	 carry flag based comparison.  This fails to be true only when
15434	 we decide to expand comparison using arithmetic that is not
15435	 too common scenario.  */
15436      start_sequence ();
15437      compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15438      compare_seq = get_insns ();
15439      end_sequence ();
15440
15441      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15442	  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15443        code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15444      else
15445	code = GET_CODE (compare_op);
15446
15447      if (code != LTU && code != GEU)
15448	return false;
15449
15450      emit_insn (compare_seq);
15451      *pop = compare_op;
15452      return true;
15453    }
15454
15455  if (!INTEGRAL_MODE_P (mode))
15456    return false;
15457
15458  switch (code)
15459    {
15460    case LTU:
15461    case GEU:
15462      break;
15463
15464    /* Convert a==0 into (unsigned)a<1.  */
15465    case EQ:
15466    case NE:
15467      if (op1 != const0_rtx)
15468	return false;
15469      op1 = const1_rtx;
15470      code = (code == EQ ? LTU : GEU);
15471      break;
15472
15473    /* Convert a>b into b<a or a>=b-1.  */
15474    case GTU:
15475    case LEU:
15476      if (CONST_INT_P (op1))
15477	{
15478	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15479	  /* Bail out on overflow.  We still can swap operands but that
15480	     would force loading of the constant into register.  */
15481	  if (op1 == const0_rtx
15482	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15483	    return false;
15484	  code = (code == GTU ? GEU : LTU);
15485	}
15486      else
15487	{
15488	  rtx tmp = op1;
15489	  op1 = op0;
15490	  op0 = tmp;
15491	  code = (code == GTU ? LTU : GEU);
15492	}
15493      break;
15494
15495    /* Convert a>=0 into (unsigned)a<0x80000000.  */
15496    case LT:
15497    case GE:
15498      if (mode == DImode || op1 != const0_rtx)
15499	return false;
15500      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15501      code = (code == LT ? GEU : LTU);
15502      break;
15503    case LE:
15504    case GT:
15505      if (mode == DImode || op1 != constm1_rtx)
15506	return false;
15507      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15508      code = (code == LE ? GEU : LTU);
15509      break;
15510
15511    default:
15512      return false;
15513    }
15514  /* Swapping operands may cause constant to appear as first operand.  */
15515  if (!nonimmediate_operand (op0, VOIDmode))
15516    {
15517      if (!can_create_pseudo_p ())
15518	return false;
15519      op0 = force_reg (mode, op0);
15520    }
15521  ix86_compare_op0 = op0;
15522  ix86_compare_op1 = op1;
15523  *pop = ix86_expand_compare (code);
15524  gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15525  return true;
15526}
15527
15528int
15529ix86_expand_int_movcc (rtx operands[])
15530{
15531  enum rtx_code code = GET_CODE (operands[1]), compare_code;
15532  rtx compare_seq, compare_op;
15533  enum machine_mode mode = GET_MODE (operands[0]);
15534  bool sign_bit_compare_p = false;
15535
15536  start_sequence ();
15537  ix86_compare_op0 = XEXP (operands[1], 0);
15538  ix86_compare_op1 = XEXP (operands[1], 1);
15539  compare_op = ix86_expand_compare (code);
15540  compare_seq = get_insns ();
15541  end_sequence ();
15542
15543  compare_code = GET_CODE (compare_op);
15544
15545  if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15546      || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15547    sign_bit_compare_p = true;
15548
15549  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15550     HImode insns, we'd be swallowed in word prefix ops.  */
15551
15552  if ((mode != HImode || TARGET_FAST_PREFIX)
15553      && (mode != (TARGET_64BIT ? TImode : DImode))
15554      && CONST_INT_P (operands[2])
15555      && CONST_INT_P (operands[3]))
15556    {
15557      rtx out = operands[0];
15558      HOST_WIDE_INT ct = INTVAL (operands[2]);
15559      HOST_WIDE_INT cf = INTVAL (operands[3]);
15560      HOST_WIDE_INT diff;
15561
15562      diff = ct - cf;
15563      /*  Sign bit compares are better done using shifts than we do by using
15564	  sbb.  */
15565      if (sign_bit_compare_p
15566	  || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15567					     ix86_compare_op1, &compare_op))
15568	{
15569	  /* Detect overlap between destination and compare sources.  */
15570	  rtx tmp = out;
15571
15572          if (!sign_bit_compare_p)
15573	    {
15574	      rtx flags;
15575	      bool fpcmp = false;
15576
15577	      compare_code = GET_CODE (compare_op);
15578
15579	      flags = XEXP (compare_op, 0);
15580
15581	      if (GET_MODE (flags) == CCFPmode
15582		  || GET_MODE (flags) == CCFPUmode)
15583		{
15584		  fpcmp = true;
15585		  compare_code
15586		    = ix86_fp_compare_code_to_integer (compare_code);
15587		}
15588
15589	      /* To simplify rest of code, restrict to the GEU case.  */
15590	      if (compare_code == LTU)
15591		{
15592		  HOST_WIDE_INT tmp = ct;
15593		  ct = cf;
15594		  cf = tmp;
15595		  compare_code = reverse_condition (compare_code);
15596		  code = reverse_condition (code);
15597		}
15598	      else
15599		{
15600		  if (fpcmp)
15601		    PUT_CODE (compare_op,
15602			      reverse_condition_maybe_unordered
15603			        (GET_CODE (compare_op)));
15604		  else
15605		    PUT_CODE (compare_op,
15606			      reverse_condition (GET_CODE (compare_op)));
15607		}
15608	      diff = ct - cf;
15609
15610	      if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15611		  || reg_overlap_mentioned_p (out, ix86_compare_op1))
15612		tmp = gen_reg_rtx (mode);
15613
15614	      if (mode == DImode)
15615		emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
15616	      else
15617		emit_insn (gen_x86_movsicc_0_m1	(gen_lowpart (SImode, tmp),
15618						 flags, compare_op));
15619	    }
15620	  else
15621	    {
15622	      if (code == GT || code == GE)
15623		code = reverse_condition (code);
15624	      else
15625		{
15626		  HOST_WIDE_INT tmp = ct;
15627		  ct = cf;
15628		  cf = tmp;
15629		  diff = ct - cf;
15630		}
15631	      tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15632				     ix86_compare_op1, VOIDmode, 0, -1);
15633	    }
15634
15635	  if (diff == 1)
15636	    {
15637	      /*
15638	       * cmpl op0,op1
15639	       * sbbl dest,dest
15640	       * [addl dest, ct]
15641	       *
15642	       * Size 5 - 8.
15643	       */
15644	      if (ct)
15645		tmp = expand_simple_binop (mode, PLUS,
15646					   tmp, GEN_INT (ct),
15647					   copy_rtx (tmp), 1, OPTAB_DIRECT);
15648	    }
15649	  else if (cf == -1)
15650	    {
15651	      /*
15652	       * cmpl op0,op1
15653	       * sbbl dest,dest
15654	       * orl $ct, dest
15655	       *
15656	       * Size 8.
15657	       */
15658	      tmp = expand_simple_binop (mode, IOR,
15659					 tmp, GEN_INT (ct),
15660					 copy_rtx (tmp), 1, OPTAB_DIRECT);
15661	    }
15662	  else if (diff == -1 && ct)
15663	    {
15664	      /*
15665	       * cmpl op0,op1
15666	       * sbbl dest,dest
15667	       * notl dest
15668	       * [addl dest, cf]
15669	       *
15670	       * Size 8 - 11.
15671	       */
15672	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15673	      if (cf)
15674		tmp = expand_simple_binop (mode, PLUS,
15675					   copy_rtx (tmp), GEN_INT (cf),
15676					   copy_rtx (tmp), 1, OPTAB_DIRECT);
15677	    }
15678	  else
15679	    {
15680	      /*
15681	       * cmpl op0,op1
15682	       * sbbl dest,dest
15683	       * [notl dest]
15684	       * andl cf - ct, dest
15685	       * [addl dest, ct]
15686	       *
15687	       * Size 8 - 11.
15688	       */
15689
15690	      if (cf == 0)
15691		{
15692		  cf = ct;
15693		  ct = 0;
15694		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15695		}
15696
15697	      tmp = expand_simple_binop (mode, AND,
15698					 copy_rtx (tmp),
15699					 gen_int_mode (cf - ct, mode),
15700					 copy_rtx (tmp), 1, OPTAB_DIRECT);
15701	      if (ct)
15702		tmp = expand_simple_binop (mode, PLUS,
15703					   copy_rtx (tmp), GEN_INT (ct),
15704					   copy_rtx (tmp), 1, OPTAB_DIRECT);
15705	    }
15706
15707	  if (!rtx_equal_p (tmp, out))
15708	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15709
15710	  return 1; /* DONE */
15711	}
15712
15713      if (diff < 0)
15714	{
15715	  enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15716
15717	  HOST_WIDE_INT tmp;
15718	  tmp = ct, ct = cf, cf = tmp;
15719	  diff = -diff;
15720
15721	  if (SCALAR_FLOAT_MODE_P (cmp_mode))
15722	    {
15723	      gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15724
15725	      /* We may be reversing unordered compare to normal compare, that
15726		 is not valid in general (we may convert non-trapping condition
15727		 to trapping one), however on i386 we currently emit all
15728		 comparisons unordered.  */
15729	      compare_code = reverse_condition_maybe_unordered (compare_code);
15730	      code = reverse_condition_maybe_unordered (code);
15731	    }
15732	  else
15733	    {
15734	      compare_code = reverse_condition (compare_code);
15735	      code = reverse_condition (code);
15736	    }
15737	}
15738
15739      compare_code = UNKNOWN;
15740      if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15741	  && CONST_INT_P (ix86_compare_op1))
15742	{
15743	  if (ix86_compare_op1 == const0_rtx
15744	      && (code == LT || code == GE))
15745	    compare_code = code;
15746	  else if (ix86_compare_op1 == constm1_rtx)
15747	    {
15748	      if (code == LE)
15749		compare_code = LT;
15750	      else if (code == GT)
15751		compare_code = GE;
15752	    }
15753	}
15754
15755      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
15756      if (compare_code != UNKNOWN
15757	  && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15758	  && (cf == -1 || ct == -1))
15759	{
15760	  /* If lea code below could be used, only optimize
15761	     if it results in a 2 insn sequence.  */
15762
15763	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15764		 || diff == 3 || diff == 5 || diff == 9)
15765	      || (compare_code == LT && ct == -1)
15766	      || (compare_code == GE && cf == -1))
15767	    {
15768	      /*
15769	       * notl op1	(if necessary)
15770	       * sarl $31, op1
15771	       * orl cf, op1
15772	       */
15773	      if (ct != -1)
15774		{
15775		  cf = ct;
15776		  ct = -1;
15777		  code = reverse_condition (code);
15778		}
15779
15780	      out = emit_store_flag (out, code, ix86_compare_op0,
15781				     ix86_compare_op1, VOIDmode, 0, -1);
15782
15783	      out = expand_simple_binop (mode, IOR,
15784					 out, GEN_INT (cf),
15785					 out, 1, OPTAB_DIRECT);
15786	      if (out != operands[0])
15787		emit_move_insn (operands[0], out);
15788
15789	      return 1; /* DONE */
15790	    }
15791	}
15792
15793
15794      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15795	   || diff == 3 || diff == 5 || diff == 9)
15796	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15797	  && (mode != DImode
15798	      || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15799	{
15800	  /*
15801	   * xorl dest,dest
15802	   * cmpl op1,op2
15803	   * setcc dest
15804	   * lea cf(dest*(ct-cf)),dest
15805	   *
15806	   * Size 14.
15807	   *
15808	   * This also catches the degenerate setcc-only case.
15809	   */
15810
15811	  rtx tmp;
15812	  int nops;
15813
15814	  out = emit_store_flag (out, code, ix86_compare_op0,
15815				 ix86_compare_op1, VOIDmode, 0, 1);
15816
15817	  nops = 0;
15818	  /* On x86_64 the lea instruction operates on Pmode, so we need
15819	     to get arithmetics done in proper mode to match.  */
15820	  if (diff == 1)
15821	    tmp = copy_rtx (out);
15822	  else
15823	    {
15824	      rtx out1;
15825	      out1 = copy_rtx (out);
15826	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15827	      nops++;
15828	      if (diff & 1)
15829		{
15830		  tmp = gen_rtx_PLUS (mode, tmp, out1);
15831		  nops++;
15832		}
15833	    }
15834	  if (cf != 0)
15835	    {
15836	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15837	      nops++;
15838	    }
15839	  if (!rtx_equal_p (tmp, out))
15840	    {
15841	      if (nops == 1)
15842		out = force_operand (tmp, copy_rtx (out));
15843	      else
15844		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15845	    }
15846	  if (!rtx_equal_p (out, operands[0]))
15847	    emit_move_insn (operands[0], copy_rtx (out));
15848
15849	  return 1; /* DONE */
15850	}
15851
15852      /*
15853       * General case:			Jumpful:
15854       *   xorl dest,dest		cmpl op1, op2
15855       *   cmpl op1, op2		movl ct, dest
15856       *   setcc dest			jcc 1f
15857       *   decl dest			movl cf, dest
15858       *   andl (cf-ct),dest		1:
15859       *   addl ct,dest
15860       *
15861       * Size 20.			Size 14.
15862       *
15863       * This is reasonably steep, but branch mispredict costs are
15864       * high on modern cpus, so consider failing only if optimizing
15865       * for space.
15866       */
15867
15868      if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15869	  && BRANCH_COST (optimize_insn_for_speed_p (),
15870		  	  false) >= 2)
15871	{
15872	  if (cf == 0)
15873	    {
15874	      enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15875
15876	      cf = ct;
15877	      ct = 0;
15878
15879	      if (SCALAR_FLOAT_MODE_P (cmp_mode))
15880		{
15881		  gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15882
15883		  /* We may be reversing unordered compare to normal compare,
15884		     that is not valid in general (we may convert non-trapping
15885		     condition to trapping one), however on i386 we currently
15886		     emit all comparisons unordered.  */
15887		  code = reverse_condition_maybe_unordered (code);
15888		}
15889	      else
15890		{
15891		  code = reverse_condition (code);
15892		  if (compare_code != UNKNOWN)
15893		    compare_code = reverse_condition (compare_code);
15894		}
15895	    }
15896
15897	  if (compare_code != UNKNOWN)
15898	    {
15899	      /* notl op1	(if needed)
15900		 sarl $31, op1
15901		 andl (cf-ct), op1
15902		 addl ct, op1
15903
15904		 For x < 0 (resp. x <= -1) there will be no notl,
15905		 so if possible swap the constants to get rid of the
15906		 complement.
15907		 True/false will be -1/0 while code below (store flag
15908		 followed by decrement) is 0/-1, so the constants need
15909		 to be exchanged once more.  */
15910
15911	      if (compare_code == GE || !cf)
15912		{
15913		  code = reverse_condition (code);
15914		  compare_code = LT;
15915		}
15916	      else
15917		{
15918		  HOST_WIDE_INT tmp = cf;
15919		  cf = ct;
15920		  ct = tmp;
15921		}
15922
15923	      out = emit_store_flag (out, code, ix86_compare_op0,
15924				     ix86_compare_op1, VOIDmode, 0, -1);
15925	    }
15926	  else
15927	    {
15928	      out = emit_store_flag (out, code, ix86_compare_op0,
15929				     ix86_compare_op1, VOIDmode, 0, 1);
15930
15931	      out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15932					 copy_rtx (out), 1, OPTAB_DIRECT);
15933	    }
15934
15935	  out = expand_simple_binop (mode, AND, copy_rtx (out),
15936				     gen_int_mode (cf - ct, mode),
15937				     copy_rtx (out), 1, OPTAB_DIRECT);
15938	  if (ct)
15939	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15940				       copy_rtx (out), 1, OPTAB_DIRECT);
15941	  if (!rtx_equal_p (out, operands[0]))
15942	    emit_move_insn (operands[0], copy_rtx (out));
15943
15944	  return 1; /* DONE */
15945	}
15946    }
15947
15948  if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15949    {
15950      /* Try a few things more with specific constants and a variable.  */
15951
15952      optab op;
15953      rtx var, orig_out, out, tmp;
15954
15955      if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15956	return 0; /* FAIL */
15957
15958      /* If one of the two operands is an interesting constant, load a
15959	 constant with the above and mask it in with a logical operation.  */
15960
15961      if (CONST_INT_P (operands[2]))
15962	{
15963	  var = operands[3];
15964	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15965	    operands[3] = constm1_rtx, op = and_optab;
15966	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15967	    operands[3] = const0_rtx, op = ior_optab;
15968	  else
15969	    return 0; /* FAIL */
15970	}
15971      else if (CONST_INT_P (operands[3]))
15972	{
15973	  var = operands[2];
15974	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15975	    operands[2] = constm1_rtx, op = and_optab;
15976	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15977	    operands[2] = const0_rtx, op = ior_optab;
15978	  else
15979	    return 0; /* FAIL */
15980	}
15981      else
15982        return 0; /* FAIL */
15983
15984      orig_out = operands[0];
15985      tmp = gen_reg_rtx (mode);
15986      operands[0] = tmp;
15987
15988      /* Recurse to get the constant loaded.  */
15989      if (ix86_expand_int_movcc (operands) == 0)
15990        return 0; /* FAIL */
15991
15992      /* Mask in the interesting variable.  */
15993      out = expand_binop (mode, op, var, tmp, orig_out, 0,
15994			  OPTAB_WIDEN);
15995      if (!rtx_equal_p (out, orig_out))
15996	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15997
15998      return 1; /* DONE */
15999    }
16000
16001  /*
16002   * For comparison with above,
16003   *
16004   * movl cf,dest
16005   * movl ct,tmp
16006   * cmpl op1,op2
16007   * cmovcc tmp,dest
16008   *
16009   * Size 15.
16010   */
16011
16012  if (! nonimmediate_operand (operands[2], mode))
16013    operands[2] = force_reg (mode, operands[2]);
16014  if (! nonimmediate_operand (operands[3], mode))
16015    operands[3] = force_reg (mode, operands[3]);
16016
16017  if (! register_operand (operands[2], VOIDmode)
16018      && (mode == QImode
16019          || ! register_operand (operands[3], VOIDmode)))
16020    operands[2] = force_reg (mode, operands[2]);
16021
16022  if (mode == QImode
16023      && ! register_operand (operands[3], VOIDmode))
16024    operands[3] = force_reg (mode, operands[3]);
16025
16026  emit_insn (compare_seq);
16027  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16028			  gen_rtx_IF_THEN_ELSE (mode,
16029						compare_op, operands[2],
16030						operands[3])));
16031
16032  return 1; /* DONE */
16033}
16034
16035/* Swap, force into registers, or otherwise massage the two operands
16036   to an sse comparison with a mask result.  Thus we differ a bit from
16037   ix86_prepare_fp_compare_args which expects to produce a flags result.
16038
16039   The DEST operand exists to help determine whether to commute commutative
16040   operators.  The POP0/POP1 operands are updated in place.  The new
16041   comparison code is returned, or UNKNOWN if not implementable.  */
16042
16043static enum rtx_code
16044ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
16045				  rtx *pop0, rtx *pop1)
16046{
16047  rtx tmp;
16048
16049  switch (code)
16050    {
16051    case LTGT:
16052    case UNEQ:
16053      /* We have no LTGT as an operator.  We could implement it with
16054	 NE & ORDERED, but this requires an extra temporary.  It's
16055	 not clear that it's worth it.  */
16056      return UNKNOWN;
16057
16058    case LT:
16059    case LE:
16060    case UNGT:
16061    case UNGE:
16062      /* These are supported directly.  */
16063      break;
16064
16065    case EQ:
16066    case NE:
16067    case UNORDERED:
16068    case ORDERED:
16069      /* For commutative operators, try to canonicalize the destination
16070	 operand to be first in the comparison - this helps reload to
16071	 avoid extra moves.  */
16072      if (!dest || !rtx_equal_p (dest, *pop1))
16073	break;
16074      /* FALLTHRU */
16075
16076    case GE:
16077    case GT:
16078    case UNLE:
16079    case UNLT:
16080      /* These are not supported directly.  Swap the comparison operands
16081	 to transform into something that is supported.  */
16082      tmp = *pop0;
16083      *pop0 = *pop1;
16084      *pop1 = tmp;
16085      code = swap_condition (code);
16086      break;
16087
16088    default:
16089      gcc_unreachable ();
16090    }
16091
16092  return code;
16093}
16094
16095/* Detect conditional moves that exactly match min/max operational
16096   semantics.  Note that this is IEEE safe, as long as we don't
16097   interchange the operands.
16098
16099   Returns FALSE if this conditional move doesn't match a MIN/MAX,
16100   and TRUE if the operation is successful and instructions are emitted.  */
16101
16102static bool
16103ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
16104			   rtx cmp_op1, rtx if_true, rtx if_false)
16105{
16106  enum machine_mode mode;
16107  bool is_min;
16108  rtx tmp;
16109
16110  if (code == LT)
16111    ;
16112  else if (code == UNGE)
16113    {
16114      tmp = if_true;
16115      if_true = if_false;
16116      if_false = tmp;
16117    }
16118  else
16119    return false;
16120
16121  if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
16122    is_min = true;
16123  else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
16124    is_min = false;
16125  else
16126    return false;
16127
16128  mode = GET_MODE (dest);
16129
16130  /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
16131     but MODE may be a vector mode and thus not appropriate.  */
16132  if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
16133    {
16134      int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
16135      rtvec v;
16136
16137      if_true = force_reg (mode, if_true);
16138      v = gen_rtvec (2, if_true, if_false);
16139      tmp = gen_rtx_UNSPEC (mode, v, u);
16140    }
16141  else
16142    {
16143      code = is_min ? SMIN : SMAX;
16144      tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
16145    }
16146
16147  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
16148  return true;
16149}
16150
16151/* Expand an sse vector comparison.  Return the register with the result.  */
16152
16153static rtx
16154ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
16155		     rtx op_true, rtx op_false)
16156{
16157  enum machine_mode mode = GET_MODE (dest);
16158  rtx x;
16159
16160  cmp_op0 = force_reg (mode, cmp_op0);
16161  if (!nonimmediate_operand (cmp_op1, mode))
16162    cmp_op1 = force_reg (mode, cmp_op1);
16163
16164  if (optimize
16165      || reg_overlap_mentioned_p (dest, op_true)
16166      || reg_overlap_mentioned_p (dest, op_false))
16167    dest = gen_reg_rtx (mode);
16168
16169  x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
16170  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16171
16172  return dest;
16173}
16174
16175/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
16176   operations.  This is used for both scalar and vector conditional moves.  */
16177
16178static void
16179ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
16180{
16181  enum machine_mode mode = GET_MODE (dest);
16182  rtx t2, t3, x;
16183
16184  if (op_false == CONST0_RTX (mode))
16185    {
16186      op_true = force_reg (mode, op_true);
16187      x = gen_rtx_AND (mode, cmp, op_true);
16188      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16189    }
16190  else if (op_true == CONST0_RTX (mode))
16191    {
16192      op_false = force_reg (mode, op_false);
16193      x = gen_rtx_NOT (mode, cmp);
16194      x = gen_rtx_AND (mode, x, op_false);
16195      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16196    }
16197  else if (TARGET_XOP)
16198    {
16199      rtx pcmov = gen_rtx_SET (mode, dest,
16200			       gen_rtx_IF_THEN_ELSE (mode, cmp,
16201						     op_true,
16202						     op_false));
16203      emit_insn (pcmov);
16204    }
16205  else
16206    {
16207      op_true = force_reg (mode, op_true);
16208      op_false = force_reg (mode, op_false);
16209
16210      t2 = gen_reg_rtx (mode);
16211      if (optimize)
16212	t3 = gen_reg_rtx (mode);
16213      else
16214	t3 = dest;
16215
16216      x = gen_rtx_AND (mode, op_true, cmp);
16217      emit_insn (gen_rtx_SET (VOIDmode, t2, x));
16218
16219      x = gen_rtx_NOT (mode, cmp);
16220      x = gen_rtx_AND (mode, x, op_false);
16221      emit_insn (gen_rtx_SET (VOIDmode, t3, x));
16222
16223      x = gen_rtx_IOR (mode, t3, t2);
16224      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16225    }
16226}
16227
16228/* Expand a floating-point conditional move.  Return true if successful.  */
16229
16230int
16231ix86_expand_fp_movcc (rtx operands[])
16232{
16233  enum machine_mode mode = GET_MODE (operands[0]);
16234  enum rtx_code code = GET_CODE (operands[1]);
16235  rtx tmp, compare_op;
16236
16237  ix86_compare_op0 = XEXP (operands[1], 0);
16238  ix86_compare_op1 = XEXP (operands[1], 1);
16239  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16240    {
16241      enum machine_mode cmode;
16242
16243      /* Since we've no cmove for sse registers, don't force bad register
16244	 allocation just to gain access to it.  Deny movcc when the
16245	 comparison mode doesn't match the move mode.  */
16246      cmode = GET_MODE (ix86_compare_op0);
16247      if (cmode == VOIDmode)
16248	cmode = GET_MODE (ix86_compare_op1);
16249      if (cmode != mode)
16250	return 0;
16251
16252      code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16253					       &ix86_compare_op0,
16254					       &ix86_compare_op1);
16255      if (code == UNKNOWN)
16256	return 0;
16257
16258      if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
16259				     ix86_compare_op1, operands[2],
16260				     operands[3]))
16261	return 1;
16262
16263      tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
16264				 ix86_compare_op1, operands[2], operands[3]);
16265      ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
16266      return 1;
16267    }
16268
16269  /* The floating point conditional move instructions don't directly
16270     support conditions resulting from a signed integer comparison.  */
16271
16272  compare_op = ix86_expand_compare (code);
16273  if (!fcmov_comparison_operator (compare_op, VOIDmode))
16274    {
16275      tmp = gen_reg_rtx (QImode);
16276      ix86_expand_setcc (code, tmp);
16277      code = NE;
16278      ix86_compare_op0 = tmp;
16279      ix86_compare_op1 = const0_rtx;
16280      compare_op = ix86_expand_compare (code);
16281    }
16282
16283  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16284			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
16285						operands[2], operands[3])));
16286
16287  return 1;
16288}
16289
16290/* Expand a floating-point vector conditional move; a vcond operation
16291   rather than a movcc operation.  */
16292
16293bool
16294ix86_expand_fp_vcond (rtx operands[])
16295{
16296  enum rtx_code code = GET_CODE (operands[3]);
16297  rtx cmp;
16298
16299  code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16300					   &operands[4], &operands[5]);
16301  if (code == UNKNOWN)
16302    return false;
16303
16304  if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
16305				 operands[5], operands[1], operands[2]))
16306    return true;
16307
16308  cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
16309			     operands[1], operands[2]);
16310  ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
16311  return true;
16312}
16313
16314/* Expand a signed/unsigned integral vector conditional move.  */
16315
16316bool
16317ix86_expand_int_vcond (rtx operands[])
16318{
16319  enum machine_mode mode = GET_MODE (operands[0]);
16320  enum rtx_code code = GET_CODE (operands[3]);
16321  bool negate = false;
16322  rtx x, cop0, cop1;
16323
16324  cop0 = operands[4];
16325  cop1 = operands[5];
16326
16327  /* XOP supports all of the comparisons on all vector int types.  */
16328  if (!TARGET_XOP)
16329    {
16330      /* Canonicalize the comparison to EQ, GT, GTU.  */
16331      switch (code)
16332	{
16333	case EQ:
16334	case GT:
16335	case GTU:
16336	  break;
16337
16338	case NE:
16339	case LE:
16340	case LEU:
16341	  code = reverse_condition (code);
16342	  negate = true;
16343	  break;
16344
16345	case GE:
16346	case GEU:
16347	  code = reverse_condition (code);
16348	  negate = true;
16349	  /* FALLTHRU */
16350
16351	case LT:
16352	case LTU:
16353	  code = swap_condition (code);
16354	  x = cop0, cop0 = cop1, cop1 = x;
16355	  break;
16356
16357	default:
16358	  gcc_unreachable ();
16359	}
16360
16361      /* Only SSE4.1/SSE4.2 supports V2DImode.  */
16362      if (mode == V2DImode)
16363	{
16364	  switch (code)
16365	    {
16366	    case EQ:
16367	      /* SSE4.1 supports EQ.  */
16368	      if (!TARGET_SSE4_1)
16369		return false;
16370	      break;
16371
16372	    case GT:
16373	    case GTU:
16374	      /* SSE4.2 supports GT/GTU.  */
16375	      if (!TARGET_SSE4_2)
16376		return false;
16377	      break;
16378
16379	    default:
16380	      gcc_unreachable ();
16381	    }
16382	}
16383
16384      /* Unsigned parallel compare is not supported by the hardware.
16385	 Play some tricks to turn this into a signed comparison
16386	 against 0.  */
16387      if (code == GTU)
16388	{
16389	  cop0 = force_reg (mode, cop0);
16390
16391	  switch (mode)
16392	    {
16393	    case V4SImode:
16394	    case V2DImode:
16395		{
16396		  rtx t1, t2, mask;
16397		  rtx (*gen_sub3) (rtx, rtx, rtx);
16398
16399		  /* Subtract (-(INT MAX) - 1) from both operands to make
16400		     them signed.  */
16401		  mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16402						  true, false);
16403		  gen_sub3 = (mode == V4SImode
16404			      ? gen_subv4si3 : gen_subv2di3);
16405		  t1 = gen_reg_rtx (mode);
16406		  emit_insn (gen_sub3 (t1, cop0, mask));
16407
16408		  t2 = gen_reg_rtx (mode);
16409		  emit_insn (gen_sub3 (t2, cop1, mask));
16410
16411		  cop0 = t1;
16412		  cop1 = t2;
16413		  code = GT;
16414		}
16415	      break;
16416
16417	    case V16QImode:
16418	    case V8HImode:
16419	      /* Perform a parallel unsigned saturating subtraction.  */
16420	      x = gen_reg_rtx (mode);
16421	      emit_insn (gen_rtx_SET (VOIDmode, x,
16422				      gen_rtx_US_MINUS (mode, cop0, cop1)));
16423
16424	      cop0 = x;
16425	      cop1 = CONST0_RTX (mode);
16426	      code = EQ;
16427	      negate = !negate;
16428	      break;
16429
16430	    default:
16431	      gcc_unreachable ();
16432	    }
16433	}
16434    }
16435
16436  x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16437			   operands[1+negate], operands[2-negate]);
16438
16439  ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16440			 operands[2-negate]);
16441  return true;
16442}
16443
16444/* Unpack OP[1] into the next wider integer vector type.  UNSIGNED_P is
16445   true if we should do zero extension, else sign extension.  HIGH_P is
16446   true if we want the N/2 high elements, else the low elements.  */
16447
16448void
16449ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16450{
16451  enum machine_mode imode = GET_MODE (operands[1]);
16452  rtx (*unpack)(rtx, rtx, rtx);
16453  rtx se, dest;
16454
16455  switch (imode)
16456    {
16457    case V16QImode:
16458      if (high_p)
16459        unpack = gen_vec_interleave_highv16qi;
16460      else
16461        unpack = gen_vec_interleave_lowv16qi;
16462      break;
16463    case V8HImode:
16464      if (high_p)
16465        unpack = gen_vec_interleave_highv8hi;
16466      else
16467        unpack = gen_vec_interleave_lowv8hi;
16468      break;
16469    case V4SImode:
16470      if (high_p)
16471        unpack = gen_vec_interleave_highv4si;
16472      else
16473        unpack = gen_vec_interleave_lowv4si;
16474      break;
16475    default:
16476      gcc_unreachable ();
16477    }
16478
16479  dest = gen_lowpart (imode, operands[0]);
16480
16481  if (unsigned_p)
16482    se = force_reg (imode, CONST0_RTX (imode));
16483  else
16484    se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16485                              operands[1], pc_rtx, pc_rtx);
16486
16487  emit_insn (unpack (dest, operands[1], se));
16488}
16489
16490/* This function performs the same task as ix86_expand_sse_unpack,
16491   but with SSE4.1 instructions.  */
16492
16493void
16494ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16495{
16496  enum machine_mode imode = GET_MODE (operands[1]);
16497  rtx (*unpack)(rtx, rtx);
16498  rtx src, dest;
16499
16500  switch (imode)
16501    {
16502    case V16QImode:
16503      if (unsigned_p)
16504	unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16505      else
16506	unpack = gen_sse4_1_extendv8qiv8hi2;
16507      break;
16508    case V8HImode:
16509      if (unsigned_p)
16510	unpack = gen_sse4_1_zero_extendv4hiv4si2;
16511      else
16512	unpack = gen_sse4_1_extendv4hiv4si2;
16513      break;
16514    case V4SImode:
16515      if (unsigned_p)
16516	unpack = gen_sse4_1_zero_extendv2siv2di2;
16517      else
16518	unpack = gen_sse4_1_extendv2siv2di2;
16519      break;
16520    default:
16521      gcc_unreachable ();
16522    }
16523
16524  dest = operands[0];
16525  if (high_p)
16526    {
16527      /* Shift higher 8 bytes to lower 8 bytes.  */
16528      src = gen_reg_rtx (imode);
16529      emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
16530				     gen_lowpart (V1TImode, operands[1]),
16531				     GEN_INT (64)));
16532    }
16533  else
16534    src = operands[1];
16535
16536  emit_insn (unpack (dest, src));
16537}
16538
16539/* Expand conditional increment or decrement using adb/sbb instructions.
16540   The default case using setcc followed by the conditional move can be
16541   done by generic code.  */
16542int
16543ix86_expand_int_addcc (rtx operands[])
16544{
16545  enum rtx_code code = GET_CODE (operands[1]);
16546  rtx flags;
16547  rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
16548  rtx compare_op;
16549  rtx val = const0_rtx;
16550  bool fpcmp = false;
16551  enum machine_mode mode;
16552
16553  ix86_compare_op0 = XEXP (operands[1], 0);
16554  ix86_compare_op1 = XEXP (operands[1], 1);
16555  if (operands[3] != const1_rtx
16556      && operands[3] != constm1_rtx)
16557    return 0;
16558  if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16559				       ix86_compare_op1, &compare_op))
16560     return 0;
16561  code = GET_CODE (compare_op);
16562
16563  flags = XEXP (compare_op, 0);
16564
16565  if (GET_MODE (flags) == CCFPmode
16566      || GET_MODE (flags) == CCFPUmode)
16567    {
16568      fpcmp = true;
16569      code = ix86_fp_compare_code_to_integer (code);
16570    }
16571
16572  if (code != LTU)
16573    {
16574      val = constm1_rtx;
16575      if (fpcmp)
16576	PUT_CODE (compare_op,
16577		  reverse_condition_maybe_unordered
16578		    (GET_CODE (compare_op)));
16579      else
16580	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16581    }
16582
16583  mode = GET_MODE (operands[0]);
16584
16585  /* Construct either adc or sbb insn.  */
16586  if ((code == LTU) == (operands[3] == constm1_rtx))
16587    {
16588      switch (mode)
16589	{
16590	  case QImode:
16591	    insn = gen_subqi3_carry;
16592	    break;
16593	  case HImode:
16594	    insn = gen_subhi3_carry;
16595	    break;
16596	  case SImode:
16597	    insn = gen_subsi3_carry;
16598	    break;
16599	  case DImode:
16600	    insn = gen_subdi3_carry;
16601	    break;
16602	  default:
16603	    gcc_unreachable ();
16604	}
16605    }
16606  else
16607    {
16608      switch (mode)
16609	{
16610	  case QImode:
16611	    insn = gen_addqi3_carry;
16612	    break;
16613	  case HImode:
16614	    insn = gen_addhi3_carry;
16615	    break;
16616	  case SImode:
16617	    insn = gen_addsi3_carry;
16618	    break;
16619	  case DImode:
16620	    insn = gen_adddi3_carry;
16621	    break;
16622	  default:
16623	    gcc_unreachable ();
16624	}
16625    }
16626  emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
16627
16628  return 1; /* DONE */
16629}
16630
16631
16632/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
16633   works for floating pointer parameters and nonoffsetable memories.
16634   For pushes, it returns just stack offsets; the values will be saved
16635   in the right order.  Maximally three parts are generated.  */
16636
16637static int
16638ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16639{
16640  int size;
16641
16642  if (!TARGET_64BIT)
16643    size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16644  else
16645    size = (GET_MODE_SIZE (mode) + 4) / 8;
16646
16647  gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16648  gcc_assert (size >= 2 && size <= 4);
16649
16650  /* Optimize constant pool reference to immediates.  This is used by fp
16651     moves, that force all constants to memory to allow combining.  */
16652  if (MEM_P (operand) && MEM_READONLY_P (operand))
16653    {
16654      rtx tmp = maybe_get_pool_constant (operand);
16655      if (tmp)
16656	operand = tmp;
16657    }
16658
16659  if (MEM_P (operand) && !offsettable_memref_p (operand))
16660    {
16661      /* The only non-offsetable memories we handle are pushes.  */
16662      int ok = push_operand (operand, VOIDmode);
16663
16664      gcc_assert (ok);
16665
16666      operand = copy_rtx (operand);
16667      PUT_MODE (operand, Pmode);
16668      parts[0] = parts[1] = parts[2] = parts[3] = operand;
16669      return size;
16670    }
16671
16672  if (GET_CODE (operand) == CONST_VECTOR)
16673    {
16674      enum machine_mode imode = int_mode_for_mode (mode);
16675      /* Caution: if we looked through a constant pool memory above,
16676	 the operand may actually have a different mode now.  That's
16677	 ok, since we want to pun this all the way back to an integer.  */
16678      operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16679      gcc_assert (operand != NULL);
16680      mode = imode;
16681    }
16682
16683  if (!TARGET_64BIT)
16684    {
16685      if (mode == DImode)
16686	split_di (&operand, 1, &parts[0], &parts[1]);
16687      else
16688	{
16689	  int i;
16690
16691	  if (REG_P (operand))
16692	    {
16693	      gcc_assert (reload_completed);
16694	      for (i = 0; i < size; i++)
16695		parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16696	    }
16697	  else if (offsettable_memref_p (operand))
16698	    {
16699	      operand = adjust_address (operand, SImode, 0);
16700	      parts[0] = operand;
16701	      for (i = 1; i < size; i++)
16702		parts[i] = adjust_address (operand, SImode, 4 * i);
16703	    }
16704	  else if (GET_CODE (operand) == CONST_DOUBLE)
16705	    {
16706	      REAL_VALUE_TYPE r;
16707	      long l[4];
16708
16709	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16710	      switch (mode)
16711		{
16712		case TFmode:
16713		  real_to_target (l, &r, mode);
16714		  parts[3] = gen_int_mode (l[3], SImode);
16715		  parts[2] = gen_int_mode (l[2], SImode);
16716		  break;
16717		case XFmode:
16718		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16719		  parts[2] = gen_int_mode (l[2], SImode);
16720		  break;
16721		case DFmode:
16722		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16723		  break;
16724		default:
16725		  gcc_unreachable ();
16726		}
16727	      parts[1] = gen_int_mode (l[1], SImode);
16728	      parts[0] = gen_int_mode (l[0], SImode);
16729	    }
16730	  else
16731	    gcc_unreachable ();
16732	}
16733    }
16734  else
16735    {
16736      if (mode == TImode)
16737	split_ti (&operand, 1, &parts[0], &parts[1]);
16738      if (mode == XFmode || mode == TFmode)
16739	{
16740	  enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16741	  if (REG_P (operand))
16742	    {
16743	      gcc_assert (reload_completed);
16744	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16745	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16746	    }
16747	  else if (offsettable_memref_p (operand))
16748	    {
16749	      operand = adjust_address (operand, DImode, 0);
16750	      parts[0] = operand;
16751	      parts[1] = adjust_address (operand, upper_mode, 8);
16752	    }
16753	  else if (GET_CODE (operand) == CONST_DOUBLE)
16754	    {
16755	      REAL_VALUE_TYPE r;
16756	      long l[4];
16757
16758	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16759	      real_to_target (l, &r, mode);
16760
16761	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
16762	      if (HOST_BITS_PER_WIDE_INT >= 64)
16763	        parts[0]
16764		  = gen_int_mode
16765		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16766		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16767		       DImode);
16768	      else
16769	        parts[0] = immed_double_const (l[0], l[1], DImode);
16770
16771	      if (upper_mode == SImode)
16772	        parts[1] = gen_int_mode (l[2], SImode);
16773	      else if (HOST_BITS_PER_WIDE_INT >= 64)
16774	        parts[1]
16775		  = gen_int_mode
16776		      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16777		       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16778		       DImode);
16779	      else
16780	        parts[1] = immed_double_const (l[2], l[3], DImode);
16781	    }
16782	  else
16783	    gcc_unreachable ();
16784	}
16785    }
16786
16787  return size;
16788}
16789
16790/* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16791   Return false when normal moves are needed; true when all required
16792   insns have been emitted.  Operands 2-4 contain the input values
16793   int the correct order; operands 5-7 contain the output values.  */
16794
16795void
16796ix86_split_long_move (rtx operands[])
16797{
16798  rtx part[2][4];
16799  int nparts, i, j;
16800  int push = 0;
16801  int collisions = 0;
16802  enum machine_mode mode = GET_MODE (operands[0]);
16803  bool collisionparts[4];
16804
16805  /* The DFmode expanders may ask us to move double.
16806     For 64bit target this is single move.  By hiding the fact
16807     here we simplify i386.md splitters.  */
16808  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16809    {
16810      /* Optimize constant pool reference to immediates.  This is used by
16811	 fp moves, that force all constants to memory to allow combining.  */
16812
16813      if (MEM_P (operands[1])
16814	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16815	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16816	operands[1] = get_pool_constant (XEXP (operands[1], 0));
16817      if (push_operand (operands[0], VOIDmode))
16818	{
16819	  operands[0] = copy_rtx (operands[0]);
16820	  PUT_MODE (operands[0], Pmode);
16821	}
16822      else
16823        operands[0] = gen_lowpart (DImode, operands[0]);
16824      operands[1] = gen_lowpart (DImode, operands[1]);
16825      emit_move_insn (operands[0], operands[1]);
16826      return;
16827    }
16828
16829  /* The only non-offsettable memory we handle is push.  */
16830  if (push_operand (operands[0], VOIDmode))
16831    push = 1;
16832  else
16833    gcc_assert (!MEM_P (operands[0])
16834		|| offsettable_memref_p (operands[0]));
16835
16836  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16837  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16838
16839  /* When emitting push, take care for source operands on the stack.  */
16840  if (push && MEM_P (operands[1])
16841      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16842    {
16843      rtx src_base = XEXP (part[1][nparts - 1], 0);
16844
16845      /* Compensate for the stack decrement by 4.  */
16846      if (!TARGET_64BIT && nparts == 3
16847	  && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
16848	src_base = plus_constant (src_base, 4);
16849
16850      /* src_base refers to the stack pointer and is
16851	 automatically decreased by emitted push.  */
16852      for (i = 0; i < nparts; i++)
16853	part[1][i] = change_address (part[1][i],
16854				     GET_MODE (part[1][i]), src_base);
16855    }
16856
16857  /* We need to do copy in the right order in case an address register
16858     of the source overlaps the destination.  */
16859  if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16860    {
16861      rtx tmp;
16862
16863      for (i = 0; i < nparts; i++)
16864	{
16865	  collisionparts[i]
16866	    = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16867	  if (collisionparts[i])
16868	    collisions++;
16869	}
16870
16871      /* Collision in the middle part can be handled by reordering.  */
16872      if (collisions == 1 && nparts == 3 && collisionparts [1])
16873	{
16874	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16875	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16876	}
16877      else if (collisions == 1
16878	       && nparts == 4
16879	       && (collisionparts [1] || collisionparts [2]))
16880	{
16881	  if (collisionparts [1])
16882	    {
16883	      tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16884	      tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16885	    }
16886	  else
16887	    {
16888	      tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16889	      tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16890	    }
16891	}
16892
16893      /* If there are more collisions, we can't handle it by reordering.
16894	 Do an lea to the last part and use only one colliding move.  */
16895      else if (collisions > 1)
16896	{
16897	  rtx base;
16898
16899	  collisions = 1;
16900
16901	  base = part[0][nparts - 1];
16902
16903	  /* Handle the case when the last part isn't valid for lea.
16904	     Happens in 64-bit mode storing the 12-byte XFmode.  */
16905	  if (GET_MODE (base) != Pmode)
16906	    base = gen_rtx_REG (Pmode, REGNO (base));
16907
16908	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16909	  part[1][0] = replace_equiv_address (part[1][0], base);
16910	  for (i = 1; i < nparts; i++)
16911	    {
16912	      tmp = plus_constant (base, UNITS_PER_WORD * i);
16913	      part[1][i] = replace_equiv_address (part[1][i], tmp);
16914	    }
16915	}
16916    }
16917
16918  if (push)
16919    {
16920      if (!TARGET_64BIT)
16921	{
16922	  if (nparts == 3)
16923	    {
16924	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16925                emit_insn (gen_addsi3 (stack_pointer_rtx,
16926				       stack_pointer_rtx, GEN_INT (-4)));
16927	      emit_move_insn (part[0][2], part[1][2]);
16928	    }
16929	  else if (nparts == 4)
16930	    {
16931	      emit_move_insn (part[0][3], part[1][3]);
16932	      emit_move_insn (part[0][2], part[1][2]);
16933	    }
16934	}
16935      else
16936	{
16937	  /* In 64bit mode we don't have 32bit push available.  In case this is
16938	     register, it is OK - we will just use larger counterpart.  We also
16939	     retype memory - these comes from attempt to avoid REX prefix on
16940	     moving of second half of TFmode value.  */
16941	  if (GET_MODE (part[1][1]) == SImode)
16942	    {
16943	      switch (GET_CODE (part[1][1]))
16944		{
16945		case MEM:
16946		  part[1][1] = adjust_address (part[1][1], DImode, 0);
16947		  break;
16948
16949		case REG:
16950		  part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16951		  break;
16952
16953		default:
16954		  gcc_unreachable ();
16955		}
16956
16957	      if (GET_MODE (part[1][0]) == SImode)
16958		part[1][0] = part[1][1];
16959	    }
16960	}
16961      emit_move_insn (part[0][1], part[1][1]);
16962      emit_move_insn (part[0][0], part[1][0]);
16963      return;
16964    }
16965
16966  /* Choose correct order to not overwrite the source before it is copied.  */
16967  if ((REG_P (part[0][0])
16968       && REG_P (part[1][1])
16969       && (REGNO (part[0][0]) == REGNO (part[1][1])
16970	   || (nparts == 3
16971	       && REGNO (part[0][0]) == REGNO (part[1][2]))
16972	   || (nparts == 4
16973	       && REGNO (part[0][0]) == REGNO (part[1][3]))))
16974      || (collisions > 0
16975	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16976    {
16977      for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16978	{
16979	  operands[2 + i] = part[0][j];
16980	  operands[6 + i] = part[1][j];
16981	}
16982    }
16983  else
16984    {
16985      for (i = 0; i < nparts; i++)
16986	{
16987	  operands[2 + i] = part[0][i];
16988	  operands[6 + i] = part[1][i];
16989	}
16990    }
16991
16992  /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
16993  if (optimize_insn_for_size_p ())
16994    {
16995      for (j = 0; j < nparts - 1; j++)
16996	if (CONST_INT_P (operands[6 + j])
16997	    && operands[6 + j] != const0_rtx
16998	    && REG_P (operands[2 + j]))
16999	  for (i = j; i < nparts - 1; i++)
17000	    if (CONST_INT_P (operands[7 + i])
17001		&& INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
17002	      operands[7 + i] = operands[2 + j];
17003    }
17004
17005  for (i = 0; i < nparts; i++)
17006    emit_move_insn (operands[2 + i], operands[6 + i]);
17007
17008  return;
17009}
17010
17011/* Helper function of ix86_split_ashl used to generate an SImode/DImode
17012   left shift by a constant, either using a single shift or
17013   a sequence of add instructions.  */
17014
17015static void
17016ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
17017{
17018  if (count == 1)
17019    {
17020      emit_insn ((mode == DImode
17021		  ? gen_addsi3
17022		  : gen_adddi3) (operand, operand, operand));
17023    }
17024  else if (!optimize_insn_for_size_p ()
17025	   && count * ix86_cost->add <= ix86_cost->shift_const)
17026    {
17027      int i;
17028      for (i=0; i<count; i++)
17029	{
17030	  emit_insn ((mode == DImode
17031		      ? gen_addsi3
17032		      : gen_adddi3) (operand, operand, operand));
17033	}
17034    }
17035  else
17036    emit_insn ((mode == DImode
17037		? gen_ashlsi3
17038		: gen_ashldi3) (operand, operand, GEN_INT (count)));
17039}
17040
17041void
17042ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
17043{
17044  rtx low[2], high[2];
17045  int count;
17046  const int single_width = mode == DImode ? 32 : 64;
17047
17048  if (CONST_INT_P (operands[2]))
17049    {
17050      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17051      count = INTVAL (operands[2]) & (single_width * 2 - 1);
17052
17053      if (count >= single_width)
17054	{
17055	  emit_move_insn (high[0], low[1]);
17056	  emit_move_insn (low[0], const0_rtx);
17057
17058	  if (count > single_width)
17059	    ix86_expand_ashl_const (high[0], count - single_width, mode);
17060	}
17061      else
17062	{
17063	  if (!rtx_equal_p (operands[0], operands[1]))
17064	    emit_move_insn (operands[0], operands[1]);
17065	  emit_insn ((mode == DImode
17066		     ? gen_x86_shld
17067		     : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
17068	  ix86_expand_ashl_const (low[0], count, mode);
17069	}
17070      return;
17071    }
17072
17073  (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17074
17075  if (operands[1] == const1_rtx)
17076    {
17077      /* Assuming we've chosen a QImode capable registers, then 1 << N
17078	 can be done with two 32/64-bit shifts, no branches, no cmoves.  */
17079      if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
17080	{
17081	  rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
17082
17083	  ix86_expand_clear (low[0]);
17084	  ix86_expand_clear (high[0]);
17085	  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
17086
17087	  d = gen_lowpart (QImode, low[0]);
17088	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17089	  s = gen_rtx_EQ (QImode, flags, const0_rtx);
17090	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
17091
17092	  d = gen_lowpart (QImode, high[0]);
17093	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17094	  s = gen_rtx_NE (QImode, flags, const0_rtx);
17095	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
17096	}
17097
17098      /* Otherwise, we can get the same results by manually performing
17099	 a bit extract operation on bit 5/6, and then performing the two
17100	 shifts.  The two methods of getting 0/1 into low/high are exactly
17101	 the same size.  Avoiding the shift in the bit extract case helps
17102	 pentium4 a bit; no one else seems to care much either way.  */
17103      else
17104	{
17105	  rtx x;
17106
17107	  if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
17108	    x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
17109	  else
17110	    x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
17111	  emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
17112
17113	  emit_insn ((mode == DImode
17114		      ? gen_lshrsi3
17115		      : gen_lshrdi3) (high[0], high[0],
17116				      GEN_INT (mode == DImode ? 5 : 6)));
17117	  emit_insn ((mode == DImode
17118		      ? gen_andsi3
17119		      : gen_anddi3) (high[0], high[0], const1_rtx));
17120	  emit_move_insn (low[0], high[0]);
17121	  emit_insn ((mode == DImode
17122		      ? gen_xorsi3
17123		      : gen_xordi3) (low[0], low[0], const1_rtx));
17124	}
17125
17126      emit_insn ((mode == DImode
17127		    ? gen_ashlsi3
17128		    : gen_ashldi3) (low[0], low[0], operands[2]));
17129      emit_insn ((mode == DImode
17130		    ? gen_ashlsi3
17131		    : gen_ashldi3) (high[0], high[0], operands[2]));
17132      return;
17133    }
17134
17135  if (operands[1] == constm1_rtx)
17136    {
17137      /* For -1 << N, we can avoid the shld instruction, because we
17138	 know that we're shifting 0...31/63 ones into a -1.  */
17139      emit_move_insn (low[0], constm1_rtx);
17140      if (optimize_insn_for_size_p ())
17141	emit_move_insn (high[0], low[0]);
17142      else
17143	emit_move_insn (high[0], constm1_rtx);
17144    }
17145  else
17146    {
17147      if (!rtx_equal_p (operands[0], operands[1]))
17148	emit_move_insn (operands[0], operands[1]);
17149
17150      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17151      emit_insn ((mode == DImode
17152		  ? gen_x86_shld
17153		  : gen_x86_64_shld) (high[0], low[0], operands[2]));
17154    }
17155
17156  emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
17157
17158  if (TARGET_CMOVE && scratch)
17159    {
17160      ix86_expand_clear (scratch);
17161      emit_insn ((mode == DImode
17162		  ? gen_x86_shift_adj_1
17163		  : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
17164					     scratch));
17165    }
17166  else
17167    emit_insn ((mode == DImode
17168		? gen_x86_shift_adj_2
17169		: gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
17170}
17171
17172void
17173ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17174{
17175  rtx low[2], high[2];
17176  int count;
17177  const int single_width = mode == DImode ? 32 : 64;
17178
17179  if (CONST_INT_P (operands[2]))
17180    {
17181      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17182      count = INTVAL (operands[2]) & (single_width * 2 - 1);
17183
17184      if (count == single_width * 2 - 1)
17185	{
17186	  emit_move_insn (high[0], high[1]);
17187	  emit_insn ((mode == DImode
17188		      ? gen_ashrsi3
17189		      : gen_ashrdi3) (high[0], high[0],
17190				      GEN_INT (single_width - 1)));
17191	  emit_move_insn (low[0], high[0]);
17192
17193	}
17194      else if (count >= single_width)
17195	{
17196	  emit_move_insn (low[0], high[1]);
17197	  emit_move_insn (high[0], low[0]);
17198	  emit_insn ((mode == DImode
17199		      ? gen_ashrsi3
17200		      : gen_ashrdi3) (high[0], high[0],
17201				      GEN_INT (single_width - 1)));
17202	  if (count > single_width)
17203	    emit_insn ((mode == DImode
17204			? gen_ashrsi3
17205			: gen_ashrdi3) (low[0], low[0],
17206					GEN_INT (count - single_width)));
17207	}
17208      else
17209	{
17210	  if (!rtx_equal_p (operands[0], operands[1]))
17211	    emit_move_insn (operands[0], operands[1]);
17212	  emit_insn ((mode == DImode
17213		      ? gen_x86_shrd
17214		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17215	  emit_insn ((mode == DImode
17216		      ? gen_ashrsi3
17217		      : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17218	}
17219    }
17220  else
17221    {
17222      if (!rtx_equal_p (operands[0], operands[1]))
17223	emit_move_insn (operands[0], operands[1]);
17224
17225      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17226
17227      emit_insn ((mode == DImode
17228		  ? gen_x86_shrd
17229		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17230      emit_insn ((mode == DImode
17231		  ? gen_ashrsi3
17232		  : gen_ashrdi3)  (high[0], high[0], operands[2]));
17233
17234      if (TARGET_CMOVE && scratch)
17235	{
17236	  emit_move_insn (scratch, high[0]);
17237	  emit_insn ((mode == DImode
17238		      ? gen_ashrsi3
17239		      : gen_ashrdi3) (scratch, scratch,
17240				      GEN_INT (single_width - 1)));
17241	  emit_insn ((mode == DImode
17242		      ? gen_x86_shift_adj_1
17243		      : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17244						 scratch));
17245	}
17246      else
17247	emit_insn ((mode == DImode
17248		    ? gen_x86_shift_adj_3
17249		    : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17250    }
17251}
17252
17253void
17254ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17255{
17256  rtx low[2], high[2];
17257  int count;
17258  const int single_width = mode == DImode ? 32 : 64;
17259
17260  if (CONST_INT_P (operands[2]))
17261    {
17262      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17263      count = INTVAL (operands[2]) & (single_width * 2 - 1);
17264
17265      if (count >= single_width)
17266	{
17267	  emit_move_insn (low[0], high[1]);
17268	  ix86_expand_clear (high[0]);
17269
17270	  if (count > single_width)
17271	    emit_insn ((mode == DImode
17272			? gen_lshrsi3
17273			: gen_lshrdi3) (low[0], low[0],
17274					GEN_INT (count - single_width)));
17275	}
17276      else
17277	{
17278	  if (!rtx_equal_p (operands[0], operands[1]))
17279	    emit_move_insn (operands[0], operands[1]);
17280	  emit_insn ((mode == DImode
17281		      ? gen_x86_shrd
17282		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17283	  emit_insn ((mode == DImode
17284		      ? gen_lshrsi3
17285		      : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17286	}
17287    }
17288  else
17289    {
17290      if (!rtx_equal_p (operands[0], operands[1]))
17291	emit_move_insn (operands[0], operands[1]);
17292
17293      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17294
17295      emit_insn ((mode == DImode
17296		  ? gen_x86_shrd
17297		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17298      emit_insn ((mode == DImode
17299		  ? gen_lshrsi3
17300		  : gen_lshrdi3) (high[0], high[0], operands[2]));
17301
17302      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
17303      if (TARGET_CMOVE && scratch)
17304	{
17305	  ix86_expand_clear (scratch);
17306	  emit_insn ((mode == DImode
17307		      ? gen_x86_shift_adj_1
17308		      : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17309						 scratch));
17310	}
17311      else
17312	emit_insn ((mode == DImode
17313		    ? gen_x86_shift_adj_2
17314		    : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17315    }
17316}
17317
17318/* Predict just emitted jump instruction to be taken with probability PROB.  */
17319static void
17320predict_jump (int prob)
17321{
17322  rtx insn = get_last_insn ();
17323  gcc_assert (JUMP_P (insn));
17324  add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17325}
17326
17327/* Helper function for the string operations below.  Dest VARIABLE whether
17328   it is aligned to VALUE bytes.  If true, jump to the label.  */
17329static rtx
17330ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17331{
17332  rtx label = gen_label_rtx ();
17333  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17334  if (GET_MODE (variable) == DImode)
17335    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17336  else
17337    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17338  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17339			   1, label);
17340  if (epilogue)
17341    predict_jump (REG_BR_PROB_BASE * 50 / 100);
17342  else
17343    predict_jump (REG_BR_PROB_BASE * 90 / 100);
17344  return label;
17345}
17346
17347/* Adjust COUNTER by the VALUE.  */
17348static void
17349ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17350{
17351  if (GET_MODE (countreg) == DImode)
17352    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17353  else
17354    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17355}
17356
17357/* Zero extend possibly SImode EXP to Pmode register.  */
17358rtx
17359ix86_zero_extend_to_Pmode (rtx exp)
17360{
17361  rtx r;
17362  if (GET_MODE (exp) == VOIDmode)
17363    return force_reg (Pmode, exp);
17364  if (GET_MODE (exp) == Pmode)
17365    return copy_to_mode_reg (Pmode, exp);
17366  r = gen_reg_rtx (Pmode);
17367  emit_insn (gen_zero_extendsidi2 (r, exp));
17368  return r;
17369}
17370
17371/* Divide COUNTREG by SCALE.  */
17372static rtx
17373scale_counter (rtx countreg, int scale)
17374{
17375  rtx sc;
17376
17377  if (scale == 1)
17378    return countreg;
17379  if (CONST_INT_P (countreg))
17380    return GEN_INT (INTVAL (countreg) / scale);
17381  gcc_assert (REG_P (countreg));
17382
17383  sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17384			    GEN_INT (exact_log2 (scale)),
17385			    NULL, 1, OPTAB_DIRECT);
17386  return sc;
17387}
17388
17389/* Return mode for the memcpy/memset loop counter.  Prefer SImode over
17390   DImode for constant loop counts.  */
17391
17392static enum machine_mode
17393counter_mode (rtx count_exp)
17394{
17395  if (GET_MODE (count_exp) != VOIDmode)
17396    return GET_MODE (count_exp);
17397  if (!CONST_INT_P (count_exp))
17398    return Pmode;
17399  if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17400    return DImode;
17401  return SImode;
17402}
17403
17404/* When SRCPTR is non-NULL, output simple loop to move memory
17405   pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17406   overall size is COUNT specified in bytes.  When SRCPTR is NULL, output the
17407   equivalent loop to set memory by VALUE (supposed to be in MODE).
17408
17409   The size is rounded down to whole number of chunk size moved at once.
17410   SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info.  */
17411
17412
17413static void
17414expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17415			       rtx destptr, rtx srcptr, rtx value,
17416			       rtx count, enum machine_mode mode, int unroll,
17417			       int expected_size)
17418{
17419  rtx out_label, top_label, iter, tmp;
17420  enum machine_mode iter_mode = counter_mode (count);
17421  rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17422  rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17423  rtx size;
17424  rtx x_addr;
17425  rtx y_addr;
17426  int i;
17427
17428  top_label = gen_label_rtx ();
17429  out_label = gen_label_rtx ();
17430  iter = gen_reg_rtx (iter_mode);
17431
17432  size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17433			      NULL, 1, OPTAB_DIRECT);
17434  /* Those two should combine.  */
17435  if (piece_size == const1_rtx)
17436    {
17437      emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17438			       true, out_label);
17439      predict_jump (REG_BR_PROB_BASE * 10 / 100);
17440    }
17441  emit_move_insn (iter, const0_rtx);
17442
17443  emit_label (top_label);
17444
17445  tmp = convert_modes (Pmode, iter_mode, iter, true);
17446  x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17447  destmem = change_address (destmem, mode, x_addr);
17448
17449  if (srcmem)
17450    {
17451      y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17452      srcmem = change_address (srcmem, mode, y_addr);
17453
17454      /* When unrolling for chips that reorder memory reads and writes,
17455	 we can save registers by using single temporary.
17456	 Also using 4 temporaries is overkill in 32bit mode.  */
17457      if (!TARGET_64BIT && 0)
17458	{
17459	  for (i = 0; i < unroll; i++)
17460	    {
17461	      if (i)
17462		{
17463		  destmem =
17464		    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17465		  srcmem =
17466		    adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17467		}
17468	      emit_move_insn (destmem, srcmem);
17469	    }
17470	}
17471      else
17472	{
17473	  rtx tmpreg[4];
17474	  gcc_assert (unroll <= 4);
17475	  for (i = 0; i < unroll; i++)
17476	    {
17477	      tmpreg[i] = gen_reg_rtx (mode);
17478	      if (i)
17479		{
17480		  srcmem =
17481		    adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17482		}
17483	      emit_move_insn (tmpreg[i], srcmem);
17484	    }
17485	  for (i = 0; i < unroll; i++)
17486	    {
17487	      if (i)
17488		{
17489		  destmem =
17490		    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17491		}
17492	      emit_move_insn (destmem, tmpreg[i]);
17493	    }
17494	}
17495    }
17496  else
17497    for (i = 0; i < unroll; i++)
17498      {
17499	if (i)
17500	  destmem =
17501	    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17502	emit_move_insn (destmem, value);
17503      }
17504
17505  tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17506			     true, OPTAB_LIB_WIDEN);
17507  if (tmp != iter)
17508    emit_move_insn (iter, tmp);
17509
17510  emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17511			   true, top_label);
17512  if (expected_size != -1)
17513    {
17514      expected_size /= GET_MODE_SIZE (mode) * unroll;
17515      if (expected_size == 0)
17516	predict_jump (0);
17517      else if (expected_size > REG_BR_PROB_BASE)
17518	predict_jump (REG_BR_PROB_BASE - 1);
17519      else
17520        predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17521    }
17522  else
17523    predict_jump (REG_BR_PROB_BASE * 80 / 100);
17524  iter = ix86_zero_extend_to_Pmode (iter);
17525  tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17526			     true, OPTAB_LIB_WIDEN);
17527  if (tmp != destptr)
17528    emit_move_insn (destptr, tmp);
17529  if (srcptr)
17530    {
17531      tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17532				 true, OPTAB_LIB_WIDEN);
17533      if (tmp != srcptr)
17534	emit_move_insn (srcptr, tmp);
17535    }
17536  emit_label (out_label);
17537}
17538
17539/* Output "rep; mov" instruction.
17540   Arguments have same meaning as for previous function */
17541static void
17542expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17543			   rtx destptr, rtx srcptr,
17544			   rtx count,
17545			   enum machine_mode mode)
17546{
17547  rtx destexp;
17548  rtx srcexp;
17549  rtx countreg;
17550
17551  /* If the size is known, it is shorter to use rep movs.  */
17552  if (mode == QImode && CONST_INT_P (count)
17553      && !(INTVAL (count) & 3))
17554    mode = SImode;
17555
17556  if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17557    destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17558  if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17559    srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17560  countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17561  if (mode != QImode)
17562    {
17563      destexp = gen_rtx_ASHIFT (Pmode, countreg,
17564				GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17565      destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17566      srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17567			       GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17568      srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17569    }
17570  else
17571    {
17572      destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17573      srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17574    }
17575  if (CONST_INT_P (count))
17576    {
17577      count = GEN_INT (INTVAL (count)
17578		       & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17579      destmem = shallow_copy_rtx (destmem);
17580      srcmem = shallow_copy_rtx (srcmem);
17581      set_mem_size (destmem, count);
17582      set_mem_size (srcmem, count);
17583    }
17584  else
17585    {
17586      if (MEM_SIZE (destmem))
17587	set_mem_size (destmem, NULL_RTX);
17588      if (MEM_SIZE (srcmem))
17589	set_mem_size (srcmem, NULL_RTX);
17590    }
17591  emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17592			  destexp, srcexp));
17593}
17594
17595/* Output "rep; stos" instruction.
17596   Arguments have same meaning as for previous function */
17597static void
17598expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17599			    rtx count, enum machine_mode mode,
17600			    rtx orig_value)
17601{
17602  rtx destexp;
17603  rtx countreg;
17604
17605  if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17606    destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17607  value = force_reg (mode, gen_lowpart (mode, value));
17608  countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17609  if (mode != QImode)
17610    {
17611      destexp = gen_rtx_ASHIFT (Pmode, countreg,
17612				GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17613      destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17614    }
17615  else
17616    destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17617  if (orig_value == const0_rtx && CONST_INT_P (count))
17618    {
17619      count = GEN_INT (INTVAL (count)
17620		       & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17621      destmem = shallow_copy_rtx (destmem);
17622      set_mem_size (destmem, count);
17623    }
17624  else if (MEM_SIZE (destmem))
17625    set_mem_size (destmem, NULL_RTX);
17626  emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17627}
17628
17629static void
17630emit_strmov (rtx destmem, rtx srcmem,
17631	     rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17632{
17633  rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17634  rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17635  emit_insn (gen_strmov (destptr, dest, srcptr, src));
17636}
17637
17638/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST.  */
17639static void
17640expand_movmem_epilogue (rtx destmem, rtx srcmem,
17641			rtx destptr, rtx srcptr, rtx count, int max_size)
17642{
17643  rtx src, dest;
17644  if (CONST_INT_P (count))
17645    {
17646      HOST_WIDE_INT countval = INTVAL (count);
17647      int offset = 0;
17648
17649      if ((countval & 0x10) && max_size > 16)
17650	{
17651	  if (TARGET_64BIT)
17652	    {
17653	      emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17654	      emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17655	    }
17656	  else
17657	    gcc_unreachable ();
17658	  offset += 16;
17659	}
17660      if ((countval & 0x08) && max_size > 8)
17661	{
17662	  if (TARGET_64BIT)
17663	    emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17664	  else
17665	    {
17666	      emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17667	      emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17668	    }
17669	  offset += 8;
17670	}
17671      if ((countval & 0x04) && max_size > 4)
17672	{
17673          emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17674	  offset += 4;
17675	}
17676      if ((countval & 0x02) && max_size > 2)
17677	{
17678          emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17679	  offset += 2;
17680	}
17681      if ((countval & 0x01) && max_size > 1)
17682	{
17683          emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17684	  offset += 1;
17685	}
17686      return;
17687    }
17688  if (max_size > 8)
17689    {
17690      count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17691				    count, 1, OPTAB_DIRECT);
17692      expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17693				     count, QImode, 1, 4);
17694      return;
17695    }
17696
17697  /* When there are stringops, we can cheaply increase dest and src pointers.
17698     Otherwise we save code size by maintaining offset (zero is readily
17699     available from preceding rep operation) and using x86 addressing modes.
17700   */
17701  if (TARGET_SINGLE_STRINGOP)
17702    {
17703      if (max_size > 4)
17704	{
17705	  rtx label = ix86_expand_aligntest (count, 4, true);
17706	  src = change_address (srcmem, SImode, srcptr);
17707	  dest = change_address (destmem, SImode, destptr);
17708	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
17709	  emit_label (label);
17710	  LABEL_NUSES (label) = 1;
17711	}
17712      if (max_size > 2)
17713	{
17714	  rtx label = ix86_expand_aligntest (count, 2, true);
17715	  src = change_address (srcmem, HImode, srcptr);
17716	  dest = change_address (destmem, HImode, destptr);
17717	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
17718	  emit_label (label);
17719	  LABEL_NUSES (label) = 1;
17720	}
17721      if (max_size > 1)
17722	{
17723	  rtx label = ix86_expand_aligntest (count, 1, true);
17724	  src = change_address (srcmem, QImode, srcptr);
17725	  dest = change_address (destmem, QImode, destptr);
17726	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
17727	  emit_label (label);
17728	  LABEL_NUSES (label) = 1;
17729	}
17730    }
17731  else
17732    {
17733      rtx offset = force_reg (Pmode, const0_rtx);
17734      rtx tmp;
17735
17736      if (max_size > 4)
17737	{
17738	  rtx label = ix86_expand_aligntest (count, 4, true);
17739	  src = change_address (srcmem, SImode, srcptr);
17740	  dest = change_address (destmem, SImode, destptr);
17741	  emit_move_insn (dest, src);
17742	  tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17743				     true, OPTAB_LIB_WIDEN);
17744	  if (tmp != offset)
17745	    emit_move_insn (offset, tmp);
17746	  emit_label (label);
17747	  LABEL_NUSES (label) = 1;
17748	}
17749      if (max_size > 2)
17750	{
17751	  rtx label = ix86_expand_aligntest (count, 2, true);
17752	  tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17753	  src = change_address (srcmem, HImode, tmp);
17754	  tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17755	  dest = change_address (destmem, HImode, tmp);
17756	  emit_move_insn (dest, src);
17757	  tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17758				     true, OPTAB_LIB_WIDEN);
17759	  if (tmp != offset)
17760	    emit_move_insn (offset, tmp);
17761	  emit_label (label);
17762	  LABEL_NUSES (label) = 1;
17763	}
17764      if (max_size > 1)
17765	{
17766	  rtx label = ix86_expand_aligntest (count, 1, true);
17767	  tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17768	  src = change_address (srcmem, QImode, tmp);
17769	  tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17770	  dest = change_address (destmem, QImode, tmp);
17771	  emit_move_insn (dest, src);
17772	  emit_label (label);
17773	  LABEL_NUSES (label) = 1;
17774	}
17775    }
17776}
17777
17778/* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
17779static void
17780expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17781				 rtx count, int max_size)
17782{
17783  count =
17784    expand_simple_binop (counter_mode (count), AND, count,
17785			 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17786  expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17787				 gen_lowpart (QImode, value), count, QImode,
17788				 1, max_size / 2);
17789}
17790
17791/* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
17792static void
17793expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17794{
17795  rtx dest;
17796
17797  if (CONST_INT_P (count))
17798    {
17799      HOST_WIDE_INT countval = INTVAL (count);
17800      int offset = 0;
17801
17802      if ((countval & 0x10) && max_size > 16)
17803	{
17804	  if (TARGET_64BIT)
17805	    {
17806	      dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17807	      emit_insn (gen_strset (destptr, dest, value));
17808	      dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17809	      emit_insn (gen_strset (destptr, dest, value));
17810	    }
17811	  else
17812	    gcc_unreachable ();
17813	  offset += 16;
17814	}
17815      if ((countval & 0x08) && max_size > 8)
17816	{
17817	  if (TARGET_64BIT)
17818	    {
17819	      dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17820	      emit_insn (gen_strset (destptr, dest, value));
17821	    }
17822	  else
17823	    {
17824	      dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17825	      emit_insn (gen_strset (destptr, dest, value));
17826	      dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17827	      emit_insn (gen_strset (destptr, dest, value));
17828	    }
17829	  offset += 8;
17830	}
17831      if ((countval & 0x04) && max_size > 4)
17832	{
17833	  dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17834	  emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17835	  offset += 4;
17836	}
17837      if ((countval & 0x02) && max_size > 2)
17838	{
17839	  dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17840	  emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17841	  offset += 2;
17842	}
17843      if ((countval & 0x01) && max_size > 1)
17844	{
17845	  dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17846	  emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17847	  offset += 1;
17848	}
17849      return;
17850    }
17851  if (max_size > 32)
17852    {
17853      expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17854      return;
17855    }
17856  if (max_size > 16)
17857    {
17858      rtx label = ix86_expand_aligntest (count, 16, true);
17859      if (TARGET_64BIT)
17860	{
17861	  dest = change_address (destmem, DImode, destptr);
17862	  emit_insn (gen_strset (destptr, dest, value));
17863	  emit_insn (gen_strset (destptr, dest, value));
17864	}
17865      else
17866	{
17867	  dest = change_address (destmem, SImode, destptr);
17868	  emit_insn (gen_strset (destptr, dest, value));
17869	  emit_insn (gen_strset (destptr, dest, value));
17870	  emit_insn (gen_strset (destptr, dest, value));
17871	  emit_insn (gen_strset (destptr, dest, value));
17872	}
17873      emit_label (label);
17874      LABEL_NUSES (label) = 1;
17875    }
17876  if (max_size > 8)
17877    {
17878      rtx label = ix86_expand_aligntest (count, 8, true);
17879      if (TARGET_64BIT)
17880	{
17881	  dest = change_address (destmem, DImode, destptr);
17882	  emit_insn (gen_strset (destptr, dest, value));
17883	}
17884      else
17885	{
17886	  dest = change_address (destmem, SImode, destptr);
17887	  emit_insn (gen_strset (destptr, dest, value));
17888	  emit_insn (gen_strset (destptr, dest, value));
17889	}
17890      emit_label (label);
17891      LABEL_NUSES (label) = 1;
17892    }
17893  if (max_size > 4)
17894    {
17895      rtx label = ix86_expand_aligntest (count, 4, true);
17896      dest = change_address (destmem, SImode, destptr);
17897      emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17898      emit_label (label);
17899      LABEL_NUSES (label) = 1;
17900    }
17901  if (max_size > 2)
17902    {
17903      rtx label = ix86_expand_aligntest (count, 2, true);
17904      dest = change_address (destmem, HImode, destptr);
17905      emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17906      emit_label (label);
17907      LABEL_NUSES (label) = 1;
17908    }
17909  if (max_size > 1)
17910    {
17911      rtx label = ix86_expand_aligntest (count, 1, true);
17912      dest = change_address (destmem, QImode, destptr);
17913      emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17914      emit_label (label);
17915      LABEL_NUSES (label) = 1;
17916    }
17917}
17918
17919/* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17920   DESIRED_ALIGNMENT.  */
17921static void
17922expand_movmem_prologue (rtx destmem, rtx srcmem,
17923			rtx destptr, rtx srcptr, rtx count,
17924			int align, int desired_alignment)
17925{
17926  if (align <= 1 && desired_alignment > 1)
17927    {
17928      rtx label = ix86_expand_aligntest (destptr, 1, false);
17929      srcmem = change_address (srcmem, QImode, srcptr);
17930      destmem = change_address (destmem, QImode, destptr);
17931      emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17932      ix86_adjust_counter (count, 1);
17933      emit_label (label);
17934      LABEL_NUSES (label) = 1;
17935    }
17936  if (align <= 2 && desired_alignment > 2)
17937    {
17938      rtx label = ix86_expand_aligntest (destptr, 2, false);
17939      srcmem = change_address (srcmem, HImode, srcptr);
17940      destmem = change_address (destmem, HImode, destptr);
17941      emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17942      ix86_adjust_counter (count, 2);
17943      emit_label (label);
17944      LABEL_NUSES (label) = 1;
17945    }
17946  if (align <= 4 && desired_alignment > 4)
17947    {
17948      rtx label = ix86_expand_aligntest (destptr, 4, false);
17949      srcmem = change_address (srcmem, SImode, srcptr);
17950      destmem = change_address (destmem, SImode, destptr);
17951      emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17952      ix86_adjust_counter (count, 4);
17953      emit_label (label);
17954      LABEL_NUSES (label) = 1;
17955    }
17956  gcc_assert (desired_alignment <= 8);
17957}
17958
17959/* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17960   ALIGN_BYTES is how many bytes need to be copied.  */
17961static rtx
17962expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17963				 int desired_align, int align_bytes)
17964{
17965  rtx src = *srcp;
17966  rtx src_size, dst_size;
17967  int off = 0;
17968  int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17969  if (src_align_bytes >= 0)
17970    src_align_bytes = desired_align - src_align_bytes;
17971  src_size = MEM_SIZE (src);
17972  dst_size = MEM_SIZE (dst);
17973  if (align_bytes & 1)
17974    {
17975      dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17976      src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17977      off = 1;
17978      emit_insn (gen_strmov (destreg, dst, srcreg, src));
17979    }
17980  if (align_bytes & 2)
17981    {
17982      dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17983      src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17984      if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17985	set_mem_align (dst, 2 * BITS_PER_UNIT);
17986      if (src_align_bytes >= 0
17987	  && (src_align_bytes & 1) == (align_bytes & 1)
17988	  && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17989	set_mem_align (src, 2 * BITS_PER_UNIT);
17990      off = 2;
17991      emit_insn (gen_strmov (destreg, dst, srcreg, src));
17992    }
17993  if (align_bytes & 4)
17994    {
17995      dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17996      src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17997      if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17998	set_mem_align (dst, 4 * BITS_PER_UNIT);
17999      if (src_align_bytes >= 0)
18000	{
18001	  unsigned int src_align = 0;
18002	  if ((src_align_bytes & 3) == (align_bytes & 3))
18003	    src_align = 4;
18004	  else if ((src_align_bytes & 1) == (align_bytes & 1))
18005	    src_align = 2;
18006	  if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18007	    set_mem_align (src, src_align * BITS_PER_UNIT);
18008	}
18009      off = 4;
18010      emit_insn (gen_strmov (destreg, dst, srcreg, src));
18011    }
18012  dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18013  src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
18014  if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18015    set_mem_align (dst, desired_align * BITS_PER_UNIT);
18016  if (src_align_bytes >= 0)
18017    {
18018      unsigned int src_align = 0;
18019      if ((src_align_bytes & 7) == (align_bytes & 7))
18020	src_align = 8;
18021      else if ((src_align_bytes & 3) == (align_bytes & 3))
18022	src_align = 4;
18023      else if ((src_align_bytes & 1) == (align_bytes & 1))
18024	src_align = 2;
18025      if (src_align > (unsigned int) desired_align)
18026	src_align = desired_align;
18027      if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18028	set_mem_align (src, src_align * BITS_PER_UNIT);
18029    }
18030  if (dst_size)
18031    set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18032  if (src_size)
18033    set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
18034  *srcp = src;
18035  return dst;
18036}
18037
18038/* Set enough from DEST to align DEST known to by aligned by ALIGN to
18039   DESIRED_ALIGNMENT.  */
18040static void
18041expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
18042			int align, int desired_alignment)
18043{
18044  if (align <= 1 && desired_alignment > 1)
18045    {
18046      rtx label = ix86_expand_aligntest (destptr, 1, false);
18047      destmem = change_address (destmem, QImode, destptr);
18048      emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
18049      ix86_adjust_counter (count, 1);
18050      emit_label (label);
18051      LABEL_NUSES (label) = 1;
18052    }
18053  if (align <= 2 && desired_alignment > 2)
18054    {
18055      rtx label = ix86_expand_aligntest (destptr, 2, false);
18056      destmem = change_address (destmem, HImode, destptr);
18057      emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
18058      ix86_adjust_counter (count, 2);
18059      emit_label (label);
18060      LABEL_NUSES (label) = 1;
18061    }
18062  if (align <= 4 && desired_alignment > 4)
18063    {
18064      rtx label = ix86_expand_aligntest (destptr, 4, false);
18065      destmem = change_address (destmem, SImode, destptr);
18066      emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
18067      ix86_adjust_counter (count, 4);
18068      emit_label (label);
18069      LABEL_NUSES (label) = 1;
18070    }
18071  gcc_assert (desired_alignment <= 8);
18072}
18073
18074/* Set enough from DST to align DST known to by aligned by ALIGN to
18075   DESIRED_ALIGN.  ALIGN_BYTES is how many bytes need to be stored.  */
18076static rtx
18077expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
18078				 int desired_align, int align_bytes)
18079{
18080  int off = 0;
18081  rtx dst_size = MEM_SIZE (dst);
18082  if (align_bytes & 1)
18083    {
18084      dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18085      off = 1;
18086      emit_insn (gen_strset (destreg, dst,
18087			     gen_lowpart (QImode, value)));
18088    }
18089  if (align_bytes & 2)
18090    {
18091      dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18092      if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18093	set_mem_align (dst, 2 * BITS_PER_UNIT);
18094      off = 2;
18095      emit_insn (gen_strset (destreg, dst,
18096			     gen_lowpart (HImode, value)));
18097    }
18098  if (align_bytes & 4)
18099    {
18100      dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18101      if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18102	set_mem_align (dst, 4 * BITS_PER_UNIT);
18103      off = 4;
18104      emit_insn (gen_strset (destreg, dst,
18105			     gen_lowpart (SImode, value)));
18106    }
18107  dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18108  if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18109    set_mem_align (dst, desired_align * BITS_PER_UNIT);
18110  if (dst_size)
18111    set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18112  return dst;
18113}
18114
18115/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation.  */
18116static enum stringop_alg
18117decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
18118	    int *dynamic_check)
18119{
18120  const struct stringop_algs * algs;
18121  bool optimize_for_speed;
18122  /* Algorithms using the rep prefix want at least edi and ecx;
18123     additionally, memset wants eax and memcpy wants esi.  Don't
18124     consider such algorithms if the user has appropriated those
18125     registers for their own purposes.	*/
18126  bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
18127                             || (memset
18128				 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
18129
18130#define ALG_USABLE_P(alg) (rep_prefix_usable			\
18131			   || (alg != rep_prefix_1_byte		\
18132			       && alg != rep_prefix_4_byte      \
18133			       && alg != rep_prefix_8_byte))
18134  const struct processor_costs *cost;
18135
18136  /* Even if the string operation call is cold, we still might spend a lot
18137     of time processing large blocks.  */
18138  if (optimize_function_for_size_p (cfun)
18139      || (optimize_insn_for_size_p ()
18140          && expected_size != -1 && expected_size < 256))
18141    optimize_for_speed = false;
18142  else
18143    optimize_for_speed = true;
18144
18145  cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
18146
18147  *dynamic_check = -1;
18148  if (memset)
18149    algs = &cost->memset[TARGET_64BIT != 0];
18150  else
18151    algs = &cost->memcpy[TARGET_64BIT != 0];
18152  if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
18153    return stringop_alg;
18154  /* rep; movq or rep; movl is the smallest variant.  */
18155  else if (!optimize_for_speed)
18156    {
18157      if (!count || (count & 3))
18158	return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18159      else
18160	return rep_prefix_usable ? rep_prefix_4_byte : loop;
18161    }
18162  /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18163   */
18164  else if (expected_size != -1 && expected_size < 4)
18165    return loop_1_byte;
18166  else if (expected_size != -1)
18167    {
18168      unsigned int i;
18169      enum stringop_alg alg = libcall;
18170      for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18171	{
18172	  /* We get here if the algorithms that were not libcall-based
18173	     were rep-prefix based and we are unable to use rep prefixes
18174	     based on global register usage.  Break out of the loop and
18175	     use the heuristic below.  */
18176	  if (algs->size[i].max == 0)
18177	    break;
18178	  if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18179	    {
18180	      enum stringop_alg candidate = algs->size[i].alg;
18181
18182	      if (candidate != libcall && ALG_USABLE_P (candidate))
18183		alg = candidate;
18184	      /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18185		 last non-libcall inline algorithm.  */
18186	      if (TARGET_INLINE_ALL_STRINGOPS)
18187		{
18188		  /* When the current size is best to be copied by a libcall,
18189		     but we are still forced to inline, run the heuristic below
18190		     that will pick code for medium sized blocks.  */
18191		  if (alg != libcall)
18192		    return alg;
18193		  break;
18194		}
18195	      else if (ALG_USABLE_P (candidate))
18196		return candidate;
18197	    }
18198	}
18199      gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18200    }
18201  /* When asked to inline the call anyway, try to pick meaningful choice.
18202     We look for maximal size of block that is faster to copy by hand and
18203     take blocks of at most of that size guessing that average size will
18204     be roughly half of the block.
18205
18206     If this turns out to be bad, we might simply specify the preferred
18207     choice in ix86_costs.  */
18208  if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18209      && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18210    {
18211      int max = -1;
18212      enum stringop_alg alg;
18213      int i;
18214      bool any_alg_usable_p = true;
18215
18216      for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18217        {
18218          enum stringop_alg candidate = algs->size[i].alg;
18219          any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18220
18221          if (candidate != libcall && candidate
18222              && ALG_USABLE_P (candidate))
18223              max = algs->size[i].max;
18224        }
18225      /* If there aren't any usable algorithms, then recursing on
18226         smaller sizes isn't going to find anything.  Just return the
18227         simple byte-at-a-time copy loop.  */
18228      if (!any_alg_usable_p)
18229        {
18230          /* Pick something reasonable.  */
18231          if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18232            *dynamic_check = 128;
18233          return loop_1_byte;
18234        }
18235      if (max == -1)
18236	max = 4096;
18237      alg = decide_alg (count, max / 2, memset, dynamic_check);
18238      gcc_assert (*dynamic_check == -1);
18239      gcc_assert (alg != libcall);
18240      if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18241	*dynamic_check = max;
18242      return alg;
18243    }
18244  return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18245#undef ALG_USABLE_P
18246}
18247
18248/* Decide on alignment.  We know that the operand is already aligned to ALIGN
18249   (ALIGN can be based on profile feedback and thus it is not 100% guaranteed).  */
18250static int
18251decide_alignment (int align,
18252		  enum stringop_alg alg,
18253		  int expected_size)
18254{
18255  int desired_align = 0;
18256  switch (alg)
18257    {
18258      case no_stringop:
18259	gcc_unreachable ();
18260      case loop:
18261      case unrolled_loop:
18262	desired_align = GET_MODE_SIZE (Pmode);
18263	break;
18264      case rep_prefix_8_byte:
18265	desired_align = 8;
18266	break;
18267      case rep_prefix_4_byte:
18268	/* PentiumPro has special logic triggering for 8 byte aligned blocks.
18269	   copying whole cacheline at once.  */
18270	if (TARGET_PENTIUMPRO)
18271	  desired_align = 8;
18272	else
18273	  desired_align = 4;
18274	break;
18275      case rep_prefix_1_byte:
18276	/* PentiumPro has special logic triggering for 8 byte aligned blocks.
18277	   copying whole cacheline at once.  */
18278	if (TARGET_PENTIUMPRO)
18279	  desired_align = 8;
18280	else
18281	  desired_align = 1;
18282	break;
18283      case loop_1_byte:
18284	desired_align = 1;
18285	break;
18286      case libcall:
18287	return 0;
18288    }
18289
18290  if (optimize_size)
18291    desired_align = 1;
18292  if (desired_align < align)
18293    desired_align = align;
18294  if (expected_size != -1 && expected_size < 4)
18295    desired_align = align;
18296  return desired_align;
18297}
18298
18299/* Return the smallest power of 2 greater than VAL.  */
18300static int
18301smallest_pow2_greater_than (int val)
18302{
18303  int ret = 1;
18304  while (ret <= val)
18305    ret <<= 1;
18306  return ret;
18307}
18308
18309/* Expand string move (memcpy) operation.  Use i386 string operations when
18310   profitable.  expand_setmem contains similar code.  The code depends upon
18311   architecture, block size and alignment, but always has the same
18312   overall structure:
18313
18314   1) Prologue guard: Conditional that jumps up to epilogues for small
18315      blocks that can be handled by epilogue alone.  This is faster but
18316      also needed for correctness, since prologue assume the block is larger
18317      than the desired alignment.
18318
18319      Optional dynamic check for size and libcall for large
18320      blocks is emitted here too, with -minline-stringops-dynamically.
18321
18322   2) Prologue: copy first few bytes in order to get destination aligned
18323      to DESIRED_ALIGN.  It is emitted only when ALIGN is less than
18324      DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18325      We emit either a jump tree on power of two sized blocks, or a byte loop.
18326
18327   3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18328      with specified algorithm.
18329
18330   4) Epilogue: code copying tail of the block that is too small to be
18331      handled by main body (or up to size guarded by prologue guard).  */
18332
18333int
18334ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18335		    rtx expected_align_exp, rtx expected_size_exp)
18336{
18337  rtx destreg;
18338  rtx srcreg;
18339  rtx label = NULL;
18340  rtx tmp;
18341  rtx jump_around_label = NULL;
18342  HOST_WIDE_INT align = 1;
18343  unsigned HOST_WIDE_INT count = 0;
18344  HOST_WIDE_INT expected_size = -1;
18345  int size_needed = 0, epilogue_size_needed;
18346  int desired_align = 0, align_bytes = 0;
18347  enum stringop_alg alg;
18348  int dynamic_check;
18349  bool need_zero_guard = false;
18350
18351  if (CONST_INT_P (align_exp))
18352    align = INTVAL (align_exp);
18353  /* i386 can do misaligned access on reasonably increased cost.  */
18354  if (CONST_INT_P (expected_align_exp)
18355      && INTVAL (expected_align_exp) > align)
18356    align = INTVAL (expected_align_exp);
18357  /* ALIGN is the minimum of destination and source alignment, but we care here
18358     just about destination alignment.  */
18359  else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18360    align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18361
18362  if (CONST_INT_P (count_exp))
18363    count = expected_size = INTVAL (count_exp);
18364  if (CONST_INT_P (expected_size_exp) && count == 0)
18365    expected_size = INTVAL (expected_size_exp);
18366
18367  /* Make sure we don't need to care about overflow later on.  */
18368  if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18369    return 0;
18370
18371  /* Step 0: Decide on preferred algorithm, desired alignment and
18372     size of chunks to be copied by main loop.  */
18373
18374  alg = decide_alg (count, expected_size, false, &dynamic_check);
18375  desired_align = decide_alignment (align, alg, expected_size);
18376
18377  if (!TARGET_ALIGN_STRINGOPS)
18378    align = desired_align;
18379
18380  if (alg == libcall)
18381    return 0;
18382  gcc_assert (alg != no_stringop);
18383  if (!count)
18384    count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18385  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18386  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18387  switch (alg)
18388    {
18389    case libcall:
18390    case no_stringop:
18391      gcc_unreachable ();
18392    case loop:
18393      need_zero_guard = true;
18394      size_needed = GET_MODE_SIZE (Pmode);
18395      break;
18396    case unrolled_loop:
18397      need_zero_guard = true;
18398      size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18399      break;
18400    case rep_prefix_8_byte:
18401      size_needed = 8;
18402      break;
18403    case rep_prefix_4_byte:
18404      size_needed = 4;
18405      break;
18406    case rep_prefix_1_byte:
18407      size_needed = 1;
18408      break;
18409    case loop_1_byte:
18410      need_zero_guard = true;
18411      size_needed = 1;
18412      break;
18413    }
18414
18415  epilogue_size_needed = size_needed;
18416
18417  /* Step 1: Prologue guard.  */
18418
18419  /* Alignment code needs count to be in register.  */
18420  if (CONST_INT_P (count_exp) && desired_align > align)
18421    {
18422      if (INTVAL (count_exp) > desired_align
18423	  && INTVAL (count_exp) > size_needed)
18424	{
18425	  align_bytes
18426	    = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18427	  if (align_bytes <= 0)
18428	    align_bytes = 0;
18429	  else
18430	    align_bytes = desired_align - align_bytes;
18431	}
18432      if (align_bytes == 0)
18433	count_exp = force_reg (counter_mode (count_exp), count_exp);
18434    }
18435  gcc_assert (desired_align >= 1 && align >= 1);
18436
18437  /* Ensure that alignment prologue won't copy past end of block.  */
18438  if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18439    {
18440      epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18441      /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18442	 Make sure it is power of 2.  */
18443      epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18444
18445      if (count)
18446	{
18447	  if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18448	    {
18449	      /* If main algorithm works on QImode, no epilogue is needed.
18450		 For small sizes just don't align anything.  */
18451	      if (size_needed == 1)
18452		desired_align = align;
18453	      else
18454		goto epilogue;
18455	    }
18456	}
18457      else
18458	{
18459	  label = gen_label_rtx ();
18460	  emit_cmp_and_jump_insns (count_exp,
18461				   GEN_INT (epilogue_size_needed),
18462				   LTU, 0, counter_mode (count_exp), 1, label);
18463	  if (expected_size == -1 || expected_size < epilogue_size_needed)
18464	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
18465	  else
18466	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
18467	}
18468    }
18469
18470  /* Emit code to decide on runtime whether library call or inline should be
18471     used.  */
18472  if (dynamic_check != -1)
18473    {
18474      if (CONST_INT_P (count_exp))
18475	{
18476	  if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18477	    {
18478	      emit_block_move_via_libcall (dst, src, count_exp, false);
18479	      count_exp = const0_rtx;
18480	      goto epilogue;
18481	    }
18482	}
18483      else
18484	{
18485	  rtx hot_label = gen_label_rtx ();
18486	  jump_around_label = gen_label_rtx ();
18487	  emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18488				   LEU, 0, GET_MODE (count_exp), 1, hot_label);
18489	  predict_jump (REG_BR_PROB_BASE * 90 / 100);
18490	  emit_block_move_via_libcall (dst, src, count_exp, false);
18491	  emit_jump (jump_around_label);
18492	  emit_label (hot_label);
18493	}
18494    }
18495
18496  /* Step 2: Alignment prologue.  */
18497
18498  if (desired_align > align)
18499    {
18500      if (align_bytes == 0)
18501	{
18502	  /* Except for the first move in epilogue, we no longer know
18503	     constant offset in aliasing info.  It don't seems to worth
18504	     the pain to maintain it for the first move, so throw away
18505	     the info early.  */
18506	  src = change_address (src, BLKmode, srcreg);
18507	  dst = change_address (dst, BLKmode, destreg);
18508	  expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18509				  desired_align);
18510	}
18511      else
18512	{
18513	  /* If we know how many bytes need to be stored before dst is
18514	     sufficiently aligned, maintain aliasing info accurately.  */
18515	  dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18516						 desired_align, align_bytes);
18517	  count_exp = plus_constant (count_exp, -align_bytes);
18518	  count -= align_bytes;
18519	}
18520      if (need_zero_guard
18521	  && (count < (unsigned HOST_WIDE_INT) size_needed
18522	      || (align_bytes == 0
18523		  && count < ((unsigned HOST_WIDE_INT) size_needed
18524			      + desired_align - align))))
18525	{
18526	  /* It is possible that we copied enough so the main loop will not
18527	     execute.  */
18528	  gcc_assert (size_needed > 1);
18529	  if (label == NULL_RTX)
18530	    label = gen_label_rtx ();
18531	  emit_cmp_and_jump_insns (count_exp,
18532				   GEN_INT (size_needed),
18533				   LTU, 0, counter_mode (count_exp), 1, label);
18534	  if (expected_size == -1
18535	      || expected_size < (desired_align - align) / 2 + size_needed)
18536	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
18537	  else
18538	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
18539	}
18540    }
18541  if (label && size_needed == 1)
18542    {
18543      emit_label (label);
18544      LABEL_NUSES (label) = 1;
18545      label = NULL;
18546      epilogue_size_needed = 1;
18547    }
18548  else if (label == NULL_RTX)
18549    epilogue_size_needed = size_needed;
18550
18551  /* Step 3: Main loop.  */
18552
18553  switch (alg)
18554    {
18555    case libcall:
18556    case no_stringop:
18557      gcc_unreachable ();
18558    case loop_1_byte:
18559      expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18560				     count_exp, QImode, 1, expected_size);
18561      break;
18562    case loop:
18563      expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18564				     count_exp, Pmode, 1, expected_size);
18565      break;
18566    case unrolled_loop:
18567      /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18568	 registers for 4 temporaries anyway.  */
18569      expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18570				     count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18571				     expected_size);
18572      break;
18573    case rep_prefix_8_byte:
18574      expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18575				 DImode);
18576      break;
18577    case rep_prefix_4_byte:
18578      expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18579				 SImode);
18580      break;
18581    case rep_prefix_1_byte:
18582      expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18583				 QImode);
18584      break;
18585    }
18586  /* Adjust properly the offset of src and dest memory for aliasing.  */
18587  if (CONST_INT_P (count_exp))
18588    {
18589      src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18590					  (count / size_needed) * size_needed);
18591      dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18592					  (count / size_needed) * size_needed);
18593    }
18594  else
18595    {
18596      src = change_address (src, BLKmode, srcreg);
18597      dst = change_address (dst, BLKmode, destreg);
18598    }
18599
18600  /* Step 4: Epilogue to copy the remaining bytes.  */
18601 epilogue:
18602  if (label)
18603    {
18604      /* When the main loop is done, COUNT_EXP might hold original count,
18605 	 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18606	 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18607	 bytes. Compensate if needed.  */
18608
18609      if (size_needed < epilogue_size_needed)
18610	{
18611	  tmp =
18612	    expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18613				 GEN_INT (size_needed - 1), count_exp, 1,
18614				 OPTAB_DIRECT);
18615	  if (tmp != count_exp)
18616	    emit_move_insn (count_exp, tmp);
18617	}
18618      emit_label (label);
18619      LABEL_NUSES (label) = 1;
18620    }
18621
18622  if (count_exp != const0_rtx && epilogue_size_needed > 1)
18623    expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18624			    epilogue_size_needed);
18625  if (jump_around_label)
18626    emit_label (jump_around_label);
18627  return 1;
18628}
18629
18630/* Helper function for memcpy.  For QImode value 0xXY produce
18631   0xXYXYXYXY of wide specified by MODE.  This is essentially
18632   a * 0x10101010, but we can do slightly better than
18633   synth_mult by unwinding the sequence by hand on CPUs with
18634   slow multiply.  */
18635static rtx
18636promote_duplicated_reg (enum machine_mode mode, rtx val)
18637{
18638  enum machine_mode valmode = GET_MODE (val);
18639  rtx tmp;
18640  int nops = mode == DImode ? 3 : 2;
18641
18642  gcc_assert (mode == SImode || mode == DImode);
18643  if (val == const0_rtx)
18644    return copy_to_mode_reg (mode, const0_rtx);
18645  if (CONST_INT_P (val))
18646    {
18647      HOST_WIDE_INT v = INTVAL (val) & 255;
18648
18649      v |= v << 8;
18650      v |= v << 16;
18651      if (mode == DImode)
18652        v |= (v << 16) << 16;
18653      return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18654    }
18655
18656  if (valmode == VOIDmode)
18657    valmode = QImode;
18658  if (valmode != QImode)
18659    val = gen_lowpart (QImode, val);
18660  if (mode == QImode)
18661    return val;
18662  if (!TARGET_PARTIAL_REG_STALL)
18663    nops--;
18664  if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18665      + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18666      <= (ix86_cost->shift_const + ix86_cost->add) * nops
18667          + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18668    {
18669      rtx reg = convert_modes (mode, QImode, val, true);
18670      tmp = promote_duplicated_reg (mode, const1_rtx);
18671      return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18672				  OPTAB_DIRECT);
18673    }
18674  else
18675    {
18676      rtx reg = convert_modes (mode, QImode, val, true);
18677
18678      if (!TARGET_PARTIAL_REG_STALL)
18679	if (mode == SImode)
18680	  emit_insn (gen_movsi_insv_1 (reg, reg));
18681	else
18682	  emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18683      else
18684	{
18685	  tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18686				     NULL, 1, OPTAB_DIRECT);
18687	  reg =
18688	    expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18689	}
18690      tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18691			         NULL, 1, OPTAB_DIRECT);
18692      reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18693      if (mode == SImode)
18694	return reg;
18695      tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18696				 NULL, 1, OPTAB_DIRECT);
18697      reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18698      return reg;
18699    }
18700}
18701
18702/* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18703   be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18704   alignment from ALIGN to DESIRED_ALIGN.  */
18705static rtx
18706promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18707{
18708  rtx promoted_val;
18709
18710  if (TARGET_64BIT
18711      && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18712    promoted_val = promote_duplicated_reg (DImode, val);
18713  else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18714    promoted_val = promote_duplicated_reg (SImode, val);
18715  else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18716    promoted_val = promote_duplicated_reg (HImode, val);
18717  else
18718    promoted_val = val;
18719
18720  return promoted_val;
18721}
18722
18723/* Expand string clear operation (bzero).  Use i386 string operations when
18724   profitable.  See expand_movmem comment for explanation of individual
18725   steps performed.  */
18726int
18727ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18728		    rtx expected_align_exp, rtx expected_size_exp)
18729{
18730  rtx destreg;
18731  rtx label = NULL;
18732  rtx tmp;
18733  rtx jump_around_label = NULL;
18734  HOST_WIDE_INT align = 1;
18735  unsigned HOST_WIDE_INT count = 0;
18736  HOST_WIDE_INT expected_size = -1;
18737  int size_needed = 0, epilogue_size_needed;
18738  int desired_align = 0, align_bytes = 0;
18739  enum stringop_alg alg;
18740  rtx promoted_val = NULL;
18741  bool force_loopy_epilogue = false;
18742  int dynamic_check;
18743  bool need_zero_guard = false;
18744
18745  if (CONST_INT_P (align_exp))
18746    align = INTVAL (align_exp);
18747  /* i386 can do misaligned access on reasonably increased cost.  */
18748  if (CONST_INT_P (expected_align_exp)
18749      && INTVAL (expected_align_exp) > align)
18750    align = INTVAL (expected_align_exp);
18751  if (CONST_INT_P (count_exp))
18752    count = expected_size = INTVAL (count_exp);
18753  if (CONST_INT_P (expected_size_exp) && count == 0)
18754    expected_size = INTVAL (expected_size_exp);
18755
18756  /* Make sure we don't need to care about overflow later on.  */
18757  if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18758    return 0;
18759
18760  /* Step 0: Decide on preferred algorithm, desired alignment and
18761     size of chunks to be copied by main loop.  */
18762
18763  alg = decide_alg (count, expected_size, true, &dynamic_check);
18764  desired_align = decide_alignment (align, alg, expected_size);
18765
18766  if (!TARGET_ALIGN_STRINGOPS)
18767    align = desired_align;
18768
18769  if (alg == libcall)
18770    return 0;
18771  gcc_assert (alg != no_stringop);
18772  if (!count)
18773    count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18774  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18775  switch (alg)
18776    {
18777    case libcall:
18778    case no_stringop:
18779      gcc_unreachable ();
18780    case loop:
18781      need_zero_guard = true;
18782      size_needed = GET_MODE_SIZE (Pmode);
18783      break;
18784    case unrolled_loop:
18785      need_zero_guard = true;
18786      size_needed = GET_MODE_SIZE (Pmode) * 4;
18787      break;
18788    case rep_prefix_8_byte:
18789      size_needed = 8;
18790      break;
18791    case rep_prefix_4_byte:
18792      size_needed = 4;
18793      break;
18794    case rep_prefix_1_byte:
18795      size_needed = 1;
18796      break;
18797    case loop_1_byte:
18798      need_zero_guard = true;
18799      size_needed = 1;
18800      break;
18801    }
18802  epilogue_size_needed = size_needed;
18803
18804  /* Step 1: Prologue guard.  */
18805
18806  /* Alignment code needs count to be in register.  */
18807  if (CONST_INT_P (count_exp) && desired_align > align)
18808    {
18809      if (INTVAL (count_exp) > desired_align
18810	  && INTVAL (count_exp) > size_needed)
18811	{
18812	  align_bytes
18813	    = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18814	  if (align_bytes <= 0)
18815	    align_bytes = 0;
18816	  else
18817	    align_bytes = desired_align - align_bytes;
18818	}
18819      if (align_bytes == 0)
18820	{
18821	  enum machine_mode mode = SImode;
18822	  if (TARGET_64BIT && (count & ~0xffffffff))
18823	    mode = DImode;
18824	  count_exp = force_reg (mode, count_exp);
18825	}
18826    }
18827  /* Do the cheap promotion to allow better CSE across the
18828     main loop and epilogue (ie one load of the big constant in the
18829     front of all code.  */
18830  if (CONST_INT_P (val_exp))
18831    promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18832						   desired_align, align);
18833  /* Ensure that alignment prologue won't copy past end of block.  */
18834  if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18835    {
18836      epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18837      /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18838	 Make sure it is power of 2.  */
18839      epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18840
18841      /* To improve performance of small blocks, we jump around the VAL
18842	 promoting mode.  This mean that if the promoted VAL is not constant,
18843	 we might not use it in the epilogue and have to use byte
18844	 loop variant.  */
18845      if (epilogue_size_needed > 2 && !promoted_val)
18846        force_loopy_epilogue = true;
18847      if (count)
18848	{
18849	  if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18850	    {
18851	      /* If main algorithm works on QImode, no epilogue is needed.
18852		 For small sizes just don't align anything.  */
18853	      if (size_needed == 1)
18854		desired_align = align;
18855	      else
18856		goto epilogue;
18857	    }
18858	}
18859      else
18860	{
18861	  label = gen_label_rtx ();
18862	  emit_cmp_and_jump_insns (count_exp,
18863				   GEN_INT (epilogue_size_needed),
18864				   LTU, 0, counter_mode (count_exp), 1, label);
18865	  if (expected_size == -1 || expected_size <= epilogue_size_needed)
18866	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
18867	  else
18868	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
18869	}
18870    }
18871  if (dynamic_check != -1)
18872    {
18873      rtx hot_label = gen_label_rtx ();
18874      jump_around_label = gen_label_rtx ();
18875      emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18876			       LEU, 0, counter_mode (count_exp), 1, hot_label);
18877      predict_jump (REG_BR_PROB_BASE * 90 / 100);
18878      set_storage_via_libcall (dst, count_exp, val_exp, false);
18879      emit_jump (jump_around_label);
18880      emit_label (hot_label);
18881    }
18882
18883  /* Step 2: Alignment prologue.  */
18884
18885  /* Do the expensive promotion once we branched off the small blocks.  */
18886  if (!promoted_val)
18887    promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18888						   desired_align, align);
18889  gcc_assert (desired_align >= 1 && align >= 1);
18890
18891  if (desired_align > align)
18892    {
18893      if (align_bytes == 0)
18894	{
18895	  /* Except for the first move in epilogue, we no longer know
18896	     constant offset in aliasing info.  It don't seems to worth
18897	     the pain to maintain it for the first move, so throw away
18898	     the info early.  */
18899	  dst = change_address (dst, BLKmode, destreg);
18900	  expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18901				  desired_align);
18902	}
18903      else
18904	{
18905	  /* If we know how many bytes need to be stored before dst is
18906	     sufficiently aligned, maintain aliasing info accurately.  */
18907	  dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18908						 desired_align, align_bytes);
18909	  count_exp = plus_constant (count_exp, -align_bytes);
18910	  count -= align_bytes;
18911	}
18912      if (need_zero_guard
18913	  && (count < (unsigned HOST_WIDE_INT) size_needed
18914	      || (align_bytes == 0
18915		  && count < ((unsigned HOST_WIDE_INT) size_needed
18916			      + desired_align - align))))
18917	{
18918	  /* It is possible that we copied enough so the main loop will not
18919	     execute.  */
18920	  gcc_assert (size_needed > 1);
18921	  if (label == NULL_RTX)
18922	    label = gen_label_rtx ();
18923	  emit_cmp_and_jump_insns (count_exp,
18924				   GEN_INT (size_needed),
18925				   LTU, 0, counter_mode (count_exp), 1, label);
18926	  if (expected_size == -1
18927	      || expected_size < (desired_align - align) / 2 + size_needed)
18928	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
18929	  else
18930	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
18931	}
18932    }
18933  if (label && size_needed == 1)
18934    {
18935      emit_label (label);
18936      LABEL_NUSES (label) = 1;
18937      label = NULL;
18938      promoted_val = val_exp;
18939      epilogue_size_needed = 1;
18940    }
18941  else if (label == NULL_RTX)
18942    epilogue_size_needed = size_needed;
18943
18944  /* Step 3: Main loop.  */
18945
18946  switch (alg)
18947    {
18948    case libcall:
18949    case no_stringop:
18950      gcc_unreachable ();
18951    case loop_1_byte:
18952      expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18953				     count_exp, QImode, 1, expected_size);
18954      break;
18955    case loop:
18956      expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18957				     count_exp, Pmode, 1, expected_size);
18958      break;
18959    case unrolled_loop:
18960      expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18961				     count_exp, Pmode, 4, expected_size);
18962      break;
18963    case rep_prefix_8_byte:
18964      expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18965				  DImode, val_exp);
18966      break;
18967    case rep_prefix_4_byte:
18968      expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18969				  SImode, val_exp);
18970      break;
18971    case rep_prefix_1_byte:
18972      expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18973				  QImode, val_exp);
18974      break;
18975    }
18976  /* Adjust properly the offset of src and dest memory for aliasing.  */
18977  if (CONST_INT_P (count_exp))
18978    dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18979					(count / size_needed) * size_needed);
18980  else
18981    dst = change_address (dst, BLKmode, destreg);
18982
18983  /* Step 4: Epilogue to copy the remaining bytes.  */
18984
18985  if (label)
18986    {
18987      /* When the main loop is done, COUNT_EXP might hold original count,
18988 	 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18989	 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18990	 bytes. Compensate if needed.  */
18991
18992      if (size_needed < epilogue_size_needed)
18993	{
18994	  tmp =
18995	    expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18996				 GEN_INT (size_needed - 1), count_exp, 1,
18997				 OPTAB_DIRECT);
18998	  if (tmp != count_exp)
18999	    emit_move_insn (count_exp, tmp);
19000	}
19001      emit_label (label);
19002      LABEL_NUSES (label) = 1;
19003    }
19004 epilogue:
19005  if (count_exp != const0_rtx && epilogue_size_needed > 1)
19006    {
19007      if (force_loopy_epilogue)
19008	expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
19009					 epilogue_size_needed);
19010      else
19011	expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
19012				epilogue_size_needed);
19013    }
19014  if (jump_around_label)
19015    emit_label (jump_around_label);
19016  return 1;
19017}
19018
19019/* Expand the appropriate insns for doing strlen if not just doing
19020   repnz; scasb
19021
19022   out = result, initialized with the start address
19023   align_rtx = alignment of the address.
19024   scratch = scratch register, initialized with the startaddress when
19025	not aligned, otherwise undefined
19026
19027   This is just the body. It needs the initializations mentioned above and
19028   some address computing at the end.  These things are done in i386.md.  */
19029
19030static void
19031ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
19032{
19033  int align;
19034  rtx tmp;
19035  rtx align_2_label = NULL_RTX;
19036  rtx align_3_label = NULL_RTX;
19037  rtx align_4_label = gen_label_rtx ();
19038  rtx end_0_label = gen_label_rtx ();
19039  rtx mem;
19040  rtx tmpreg = gen_reg_rtx (SImode);
19041  rtx scratch = gen_reg_rtx (SImode);
19042  rtx cmp;
19043
19044  align = 0;
19045  if (CONST_INT_P (align_rtx))
19046    align = INTVAL (align_rtx);
19047
19048  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
19049
19050  /* Is there a known alignment and is it less than 4?  */
19051  if (align < 4)
19052    {
19053      rtx scratch1 = gen_reg_rtx (Pmode);
19054      emit_move_insn (scratch1, out);
19055      /* Is there a known alignment and is it not 2? */
19056      if (align != 2)
19057	{
19058	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
19059	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
19060
19061	  /* Leave just the 3 lower bits.  */
19062	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
19063				    NULL_RTX, 0, OPTAB_WIDEN);
19064
19065	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19066				   Pmode, 1, align_4_label);
19067	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
19068				   Pmode, 1, align_2_label);
19069	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
19070				   Pmode, 1, align_3_label);
19071	}
19072      else
19073        {
19074	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
19075	     check if is aligned to 4 - byte.  */
19076
19077	  align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
19078				    NULL_RTX, 0, OPTAB_WIDEN);
19079
19080	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19081				   Pmode, 1, align_4_label);
19082        }
19083
19084      mem = change_address (src, QImode, out);
19085
19086      /* Now compare the bytes.  */
19087
19088      /* Compare the first n unaligned byte on a byte per byte basis.  */
19089      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
19090			       QImode, 1, end_0_label);
19091
19092      /* Increment the address.  */
19093      emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19094
19095      /* Not needed with an alignment of 2 */
19096      if (align != 2)
19097	{
19098	  emit_label (align_2_label);
19099
19100	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19101				   end_0_label);
19102
19103	  emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19104
19105	  emit_label (align_3_label);
19106	}
19107
19108      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19109			       end_0_label);
19110
19111      emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19112    }
19113
19114  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
19115     align this loop.  It gives only huge programs, but does not help to
19116     speed up.  */
19117  emit_label (align_4_label);
19118
19119  mem = change_address (src, SImode, out);
19120  emit_move_insn (scratch, mem);
19121  emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
19122
19123  /* This formula yields a nonzero result iff one of the bytes is zero.
19124     This saves three branches inside loop and many cycles.  */
19125
19126  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
19127  emit_insn (gen_one_cmplsi2 (scratch, scratch));
19128  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
19129  emit_insn (gen_andsi3 (tmpreg, tmpreg,
19130			 gen_int_mode (0x80808080, SImode)));
19131  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
19132			   align_4_label);
19133
19134  if (TARGET_CMOVE)
19135    {
19136       rtx reg = gen_reg_rtx (SImode);
19137       rtx reg2 = gen_reg_rtx (Pmode);
19138       emit_move_insn (reg, tmpreg);
19139       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
19140
19141       /* If zero is not in the first two bytes, move two bytes forward.  */
19142       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19143       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19144       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19145       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
19146			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
19147						     reg,
19148						     tmpreg)));
19149       /* Emit lea manually to avoid clobbering of flags.  */
19150       emit_insn (gen_rtx_SET (SImode, reg2,
19151			       gen_rtx_PLUS (Pmode, out, const2_rtx)));
19152
19153       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19154       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19155       emit_insn (gen_rtx_SET (VOIDmode, out,
19156			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19157						     reg2,
19158						     out)));
19159    }
19160  else
19161    {
19162       rtx end_2_label = gen_label_rtx ();
19163       /* Is zero in the first two bytes? */
19164
19165       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19166       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19167       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19168       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19169                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19170                            pc_rtx);
19171       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19172       JUMP_LABEL (tmp) = end_2_label;
19173
19174       /* Not in the first two.  Move two bytes forward.  */
19175       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19176       emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
19177
19178       emit_label (end_2_label);
19179
19180    }
19181
19182  /* Avoid branch in fixing the byte.  */
19183  tmpreg = gen_lowpart (QImode, tmpreg);
19184  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19185  tmp = gen_rtx_REG (CCmode, FLAGS_REG);
19186  cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
19187  emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), tmp, cmp));
19188
19189  emit_label (end_0_label);
19190}
19191
19192/* Expand strlen.  */
19193
19194int
19195ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19196{
19197  rtx addr, scratch1, scratch2, scratch3, scratch4;
19198
19199  /* The generic case of strlen expander is long.  Avoid it's
19200     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
19201
19202  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19203      && !TARGET_INLINE_ALL_STRINGOPS
19204      && !optimize_insn_for_size_p ()
19205      && (!CONST_INT_P (align) || INTVAL (align) < 4))
19206    return 0;
19207
19208  addr = force_reg (Pmode, XEXP (src, 0));
19209  scratch1 = gen_reg_rtx (Pmode);
19210
19211  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19212      && !optimize_insn_for_size_p ())
19213    {
19214      /* Well it seems that some optimizer does not combine a call like
19215         foo(strlen(bar), strlen(bar));
19216         when the move and the subtraction is done here.  It does calculate
19217         the length just once when these instructions are done inside of
19218         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
19219         often used and I use one fewer register for the lifetime of
19220         output_strlen_unroll() this is better.  */
19221
19222      emit_move_insn (out, addr);
19223
19224      ix86_expand_strlensi_unroll_1 (out, src, align);
19225
19226      /* strlensi_unroll_1 returns the address of the zero at the end of
19227         the string, like memchr(), so compute the length by subtracting
19228         the start address.  */
19229      emit_insn ((*ix86_gen_sub3) (out, out, addr));
19230    }
19231  else
19232    {
19233      rtx unspec;
19234
19235      /* Can't use this if the user has appropriated eax, ecx, or edi.  */
19236      if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19237        return false;
19238
19239      scratch2 = gen_reg_rtx (Pmode);
19240      scratch3 = gen_reg_rtx (Pmode);
19241      scratch4 = force_reg (Pmode, constm1_rtx);
19242
19243      emit_move_insn (scratch3, addr);
19244      eoschar = force_reg (QImode, eoschar);
19245
19246      src = replace_equiv_address_nv (src, scratch3);
19247
19248      /* If .md starts supporting :P, this can be done in .md.  */
19249      unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19250						 scratch4), UNSPEC_SCAS);
19251      emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19252      emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19253      emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19254    }
19255  return 1;
19256}
19257
19258/* For given symbol (function) construct code to compute address of it's PLT
19259   entry in large x86-64 PIC model.  */
19260rtx
19261construct_plt_address (rtx symbol)
19262{
19263  rtx tmp = gen_reg_rtx (Pmode);
19264  rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19265
19266  gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19267  gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19268
19269  emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19270  emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19271  return tmp;
19272}
19273
19274void
19275ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19276		  rtx callarg2,
19277		  rtx pop, int sibcall)
19278{
19279  rtx use = NULL, call;
19280
19281  if (pop == const0_rtx)
19282    pop = NULL;
19283  gcc_assert (!TARGET_64BIT || !pop);
19284
19285  if (TARGET_MACHO && !TARGET_64BIT)
19286    {
19287#if TARGET_MACHO
19288      if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19289	fnaddr = machopic_indirect_call_target (fnaddr);
19290#endif
19291    }
19292  else
19293    {
19294      /* Static functions and indirect calls don't need the pic register.  */
19295      if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19296	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19297	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19298	use_reg (&use, pic_offset_table_rtx);
19299    }
19300
19301  if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19302    {
19303      rtx al = gen_rtx_REG (QImode, AX_REG);
19304      emit_move_insn (al, callarg2);
19305      use_reg (&use, al);
19306    }
19307
19308  if (ix86_cmodel == CM_LARGE_PIC
19309      && MEM_P (fnaddr)
19310      && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19311      && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19312    fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19313  else if (sibcall
19314	   ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
19315	   : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
19316    {
19317      fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19318      fnaddr = gen_rtx_MEM (QImode, fnaddr);
19319    }
19320
19321  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19322  if (retval)
19323    call = gen_rtx_SET (VOIDmode, retval, call);
19324  if (pop)
19325    {
19326      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19327      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19328      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19329    }
19330  if (TARGET_64BIT
19331      && ix86_cfun_abi () == MS_ABI
19332      && (!callarg2 || INTVAL (callarg2) != -2))
19333    {
19334      /* We need to represent that SI and DI registers are clobbered
19335	 by SYSV calls.  */
19336      static int clobbered_registers[] = {
19337	XMM6_REG, XMM7_REG, XMM8_REG,
19338	XMM9_REG, XMM10_REG, XMM11_REG,
19339	XMM12_REG, XMM13_REG, XMM14_REG,
19340	XMM15_REG, SI_REG, DI_REG
19341      };
19342      unsigned int i;
19343      rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19344      rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19345      				   UNSPEC_MS_TO_SYSV_CALL);
19346
19347      vec[0] = call;
19348      vec[1] = unspec;
19349      for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19350        vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19351				      ? TImode : DImode,
19352				      gen_rtx_REG
19353				        (SSE_REGNO_P (clobbered_registers[i])
19354						      ? TImode : DImode,
19355					 clobbered_registers[i]));
19356
19357      call = gen_rtx_PARALLEL (VOIDmode,
19358      			       gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19359			       + 2, vec));
19360    }
19361
19362  call = emit_call_insn (call);
19363  if (use)
19364    CALL_INSN_FUNCTION_USAGE (call) = use;
19365}
19366
19367
19368/* Clear stack slot assignments remembered from previous functions.
19369   This is called from INIT_EXPANDERS once before RTL is emitted for each
19370   function.  */
19371
19372static struct machine_function *
19373ix86_init_machine_status (void)
19374{
19375  struct machine_function *f;
19376
19377  f = GGC_CNEW (struct machine_function);
19378  f->use_fast_prologue_epilogue_nregs = -1;
19379  f->tls_descriptor_call_expanded_p = 0;
19380  f->call_abi = ix86_abi;
19381
19382  return f;
19383}
19384
19385/* Return a MEM corresponding to a stack slot with mode MODE.
19386   Allocate a new slot if necessary.
19387
19388   The RTL for a function can have several slots available: N is
19389   which slot to use.  */
19390
19391rtx
19392assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19393{
19394  struct stack_local_entry *s;
19395
19396  gcc_assert (n < MAX_386_STACK_LOCALS);
19397
19398  /* Virtual slot is valid only before vregs are instantiated.  */
19399  gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19400
19401  for (s = ix86_stack_locals; s; s = s->next)
19402    if (s->mode == mode && s->n == n)
19403      return copy_rtx (s->rtl);
19404
19405  s = (struct stack_local_entry *)
19406    ggc_alloc (sizeof (struct stack_local_entry));
19407  s->n = n;
19408  s->mode = mode;
19409  s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19410
19411  s->next = ix86_stack_locals;
19412  ix86_stack_locals = s;
19413  return s->rtl;
19414}
19415
19416/* Construct the SYMBOL_REF for the tls_get_addr function.  */
19417
19418static GTY(()) rtx ix86_tls_symbol;
19419rtx
19420ix86_tls_get_addr (void)
19421{
19422
19423  if (!ix86_tls_symbol)
19424    {
19425      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19426					    (TARGET_ANY_GNU_TLS
19427					     && !TARGET_64BIT)
19428					    ? "___tls_get_addr"
19429					    : "__tls_get_addr");
19430    }
19431
19432  return ix86_tls_symbol;
19433}
19434
19435/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
19436
19437static GTY(()) rtx ix86_tls_module_base_symbol;
19438rtx
19439ix86_tls_module_base (void)
19440{
19441
19442  if (!ix86_tls_module_base_symbol)
19443    {
19444      ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19445							"_TLS_MODULE_BASE_");
19446      SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19447	|= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19448    }
19449
19450  return ix86_tls_module_base_symbol;
19451}
19452
19453/* Calculate the length of the memory address in the instruction
19454   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
19455
19456int
19457memory_address_length (rtx addr)
19458{
19459  struct ix86_address parts;
19460  rtx base, index, disp;
19461  int len;
19462  int ok;
19463
19464  if (GET_CODE (addr) == PRE_DEC
19465      || GET_CODE (addr) == POST_INC
19466      || GET_CODE (addr) == PRE_MODIFY
19467      || GET_CODE (addr) == POST_MODIFY)
19468    return 0;
19469
19470  ok = ix86_decompose_address (addr, &parts);
19471  gcc_assert (ok);
19472
19473  if (parts.base && GET_CODE (parts.base) == SUBREG)
19474    parts.base = SUBREG_REG (parts.base);
19475  if (parts.index && GET_CODE (parts.index) == SUBREG)
19476    parts.index = SUBREG_REG (parts.index);
19477
19478  base = parts.base;
19479  index = parts.index;
19480  disp = parts.disp;
19481  len = 0;
19482
19483  /* Rule of thumb:
19484       - esp as the base always wants an index,
19485       - ebp as the base always wants a displacement,
19486       - r12 as the base always wants an index,
19487       - r13 as the base always wants a displacement.  */
19488
19489  /* Register Indirect.  */
19490  if (base && !index && !disp)
19491    {
19492      /* esp (for its index) and ebp (for its displacement) need
19493	 the two-byte modrm form.  Similarly for r12 and r13 in 64-bit
19494	 code.  */
19495      if (REG_P (addr)
19496	  && (addr == arg_pointer_rtx
19497	      || addr == frame_pointer_rtx
19498	      || REGNO (addr) == SP_REG
19499	      || REGNO (addr) == BP_REG
19500	      || REGNO (addr) == R12_REG
19501	      || REGNO (addr) == R13_REG))
19502	len = 1;
19503    }
19504
19505  /* Direct Addressing.  In 64-bit mode mod 00 r/m 5
19506     is not disp32, but disp32(%rip), so for disp32
19507     SIB byte is needed, unless print_operand_address
19508     optimizes it into disp32(%rip) or (%rip) is implied
19509     by UNSPEC.  */
19510  else if (disp && !base && !index)
19511    {
19512      len = 4;
19513      if (TARGET_64BIT)
19514	{
19515	  rtx symbol = disp;
19516
19517	  if (GET_CODE (disp) == CONST)
19518	    symbol = XEXP (disp, 0);
19519	  if (GET_CODE (symbol) == PLUS
19520	      && CONST_INT_P (XEXP (symbol, 1)))
19521	    symbol = XEXP (symbol, 0);
19522
19523	  if (GET_CODE (symbol) != LABEL_REF
19524	      && (GET_CODE (symbol) != SYMBOL_REF
19525		  || SYMBOL_REF_TLS_MODEL (symbol) != 0)
19526	      && (GET_CODE (symbol) != UNSPEC
19527		  || (XINT (symbol, 1) != UNSPEC_GOTPCREL
19528		      && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
19529	    len += 1;
19530	}
19531    }
19532
19533  else
19534    {
19535      /* Find the length of the displacement constant.  */
19536      if (disp)
19537	{
19538	  if (base && satisfies_constraint_K (disp))
19539	    len = 1;
19540	  else
19541	    len = 4;
19542	}
19543      /* ebp always wants a displacement.  Similarly r13.  */
19544      else if (base && REG_P (base)
19545	       && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
19546	len = 1;
19547
19548      /* An index requires the two-byte modrm form....  */
19549      if (index
19550	  /* ...like esp (or r12), which always wants an index.  */
19551	  || base == arg_pointer_rtx
19552	  || base == frame_pointer_rtx
19553	  || (base && REG_P (base)
19554	      && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
19555	len += 1;
19556    }
19557
19558  switch (parts.seg)
19559    {
19560    case SEG_FS:
19561    case SEG_GS:
19562      len += 1;
19563      break;
19564    default:
19565      break;
19566    }
19567
19568  return len;
19569}
19570
19571/* Compute default value for "length_immediate" attribute.  When SHORTFORM
19572   is set, expect that insn have 8bit immediate alternative.  */
19573int
19574ix86_attr_length_immediate_default (rtx insn, int shortform)
19575{
19576  int len = 0;
19577  int i;
19578  extract_insn_cached (insn);
19579  for (i = recog_data.n_operands - 1; i >= 0; --i)
19580    if (CONSTANT_P (recog_data.operand[i]))
19581      {
19582        enum attr_mode mode = get_attr_mode (insn);
19583
19584	gcc_assert (!len);
19585	if (shortform && CONST_INT_P (recog_data.operand[i]))
19586	  {
19587	    HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
19588	    switch (mode)
19589	      {
19590	      case MODE_QI:
19591		len = 1;
19592		continue;
19593	      case MODE_HI:
19594		ival = trunc_int_for_mode (ival, HImode);
19595		break;
19596	      case MODE_SI:
19597		ival = trunc_int_for_mode (ival, SImode);
19598		break;
19599	      default:
19600		break;
19601	      }
19602	    if (IN_RANGE (ival, -128, 127))
19603	      {
19604		len = 1;
19605		continue;
19606	      }
19607	  }
19608	switch (mode)
19609	  {
19610	  case MODE_QI:
19611	    len = 1;
19612	    break;
19613	  case MODE_HI:
19614	    len = 2;
19615	    break;
19616	  case MODE_SI:
19617	    len = 4;
19618	    break;
19619	  /* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
19620	  case MODE_DI:
19621	    len = 4;
19622	    break;
19623	  default:
19624	    fatal_insn ("unknown insn mode", insn);
19625	}
19626      }
19627  return len;
19628}
19629/* Compute default value for "length_address" attribute.  */
19630int
19631ix86_attr_length_address_default (rtx insn)
19632{
19633  int i;
19634
19635  if (get_attr_type (insn) == TYPE_LEA)
19636    {
19637      rtx set = PATTERN (insn), addr;
19638
19639      if (GET_CODE (set) == PARALLEL)
19640	set = XVECEXP (set, 0, 0);
19641
19642      gcc_assert (GET_CODE (set) == SET);
19643
19644      addr = SET_SRC (set);
19645      if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
19646	{
19647	  if (GET_CODE (addr) == ZERO_EXTEND)
19648	    addr = XEXP (addr, 0);
19649	  if (GET_CODE (addr) == SUBREG)
19650	    addr = SUBREG_REG (addr);
19651	}
19652
19653      return memory_address_length (addr);
19654    }
19655
19656  extract_insn_cached (insn);
19657  for (i = recog_data.n_operands - 1; i >= 0; --i)
19658    if (MEM_P (recog_data.operand[i]))
19659      {
19660        constrain_operands_cached (reload_completed);
19661        if (which_alternative != -1)
19662	  {
19663	    const char *constraints = recog_data.constraints[i];
19664	    int alt = which_alternative;
19665
19666	    while (*constraints == '=' || *constraints == '+')
19667	      constraints++;
19668	    while (alt-- > 0)
19669	      while (*constraints++ != ',')
19670		;
19671	    /* Skip ignored operands.  */
19672	    if (*constraints == 'X')
19673	      continue;
19674	  }
19675	return memory_address_length (XEXP (recog_data.operand[i], 0));
19676      }
19677  return 0;
19678}
19679
19680/* Compute default value for "length_vex" attribute. It includes
19681   2 or 3 byte VEX prefix and 1 opcode byte.  */
19682
19683int
19684ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19685			      int has_vex_w)
19686{
19687  int i;
19688
19689  /* Only 0f opcode can use 2 byte VEX prefix and  VEX W bit uses 3
19690     byte VEX prefix.  */
19691  if (!has_0f_opcode || has_vex_w)
19692    return 3 + 1;
19693
19694 /* We can always use 2 byte VEX prefix in 32bit.  */
19695  if (!TARGET_64BIT)
19696    return 2 + 1;
19697
19698  extract_insn_cached (insn);
19699
19700  for (i = recog_data.n_operands - 1; i >= 0; --i)
19701    if (REG_P (recog_data.operand[i]))
19702      {
19703	/* REX.W bit uses 3 byte VEX prefix.  */
19704	if (GET_MODE (recog_data.operand[i]) == DImode
19705	    && GENERAL_REG_P (recog_data.operand[i]))
19706	  return 3 + 1;
19707      }
19708    else
19709      {
19710	/* REX.X or REX.B bits use 3 byte VEX prefix.  */
19711	if (MEM_P (recog_data.operand[i])
19712	    && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19713	  return 3 + 1;
19714      }
19715
19716  return 2 + 1;
19717}
19718
19719/* Return the maximum number of instructions a cpu can issue.  */
19720
19721static int
19722ix86_issue_rate (void)
19723{
19724  switch (ix86_tune)
19725    {
19726    case PROCESSOR_PENTIUM:
19727    case PROCESSOR_ATOM:
19728    case PROCESSOR_K6:
19729      return 2;
19730
19731    case PROCESSOR_PENTIUMPRO:
19732    case PROCESSOR_PENTIUM4:
19733    case PROCESSOR_ATHLON:
19734    case PROCESSOR_K8:
19735    case PROCESSOR_AMDFAM10:
19736    case PROCESSOR_NOCONA:
19737    case PROCESSOR_GENERIC32:
19738    case PROCESSOR_GENERIC64:
19739      return 3;
19740
19741    case PROCESSOR_CORE2:
19742      return 4;
19743
19744    default:
19745      return 1;
19746    }
19747}
19748
19749/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19750   by DEP_INSN and nothing set by DEP_INSN.  */
19751
19752static int
19753ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19754{
19755  rtx set, set2;
19756
19757  /* Simplify the test for uninteresting insns.  */
19758  if (insn_type != TYPE_SETCC
19759      && insn_type != TYPE_ICMOV
19760      && insn_type != TYPE_FCMOV
19761      && insn_type != TYPE_IBR)
19762    return 0;
19763
19764  if ((set = single_set (dep_insn)) != 0)
19765    {
19766      set = SET_DEST (set);
19767      set2 = NULL_RTX;
19768    }
19769  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19770	   && XVECLEN (PATTERN (dep_insn), 0) == 2
19771	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19772	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19773    {
19774      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19775      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19776    }
19777  else
19778    return 0;
19779
19780  if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19781    return 0;
19782
19783  /* This test is true if the dependent insn reads the flags but
19784     not any other potentially set register.  */
19785  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19786    return 0;
19787
19788  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19789    return 0;
19790
19791  return 1;
19792}
19793
19794/* Return true iff USE_INSN has a memory address with operands set by
19795   SET_INSN.  */
19796
19797bool
19798ix86_agi_dependent (rtx set_insn, rtx use_insn)
19799{
19800  int i;
19801  extract_insn_cached (use_insn);
19802  for (i = recog_data.n_operands - 1; i >= 0; --i)
19803    if (MEM_P (recog_data.operand[i]))
19804      {
19805	rtx addr = XEXP (recog_data.operand[i], 0);
19806	return modified_in_p (addr, set_insn) != 0;
19807      }
19808  return false;
19809}
19810
19811static int
19812ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19813{
19814  enum attr_type insn_type, dep_insn_type;
19815  enum attr_memory memory;
19816  rtx set, set2;
19817  int dep_insn_code_number;
19818
19819  /* Anti and output dependencies have zero cost on all CPUs.  */
19820  if (REG_NOTE_KIND (link) != 0)
19821    return 0;
19822
19823  dep_insn_code_number = recog_memoized (dep_insn);
19824
19825  /* If we can't recognize the insns, we can't really do anything.  */
19826  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19827    return cost;
19828
19829  insn_type = get_attr_type (insn);
19830  dep_insn_type = get_attr_type (dep_insn);
19831
19832  switch (ix86_tune)
19833    {
19834    case PROCESSOR_PENTIUM:
19835      /* Address Generation Interlock adds a cycle of latency.  */
19836      if (insn_type == TYPE_LEA)
19837	{
19838	  rtx addr = PATTERN (insn);
19839
19840	  if (GET_CODE (addr) == PARALLEL)
19841	    addr = XVECEXP (addr, 0, 0);
19842
19843	  gcc_assert (GET_CODE (addr) == SET);
19844
19845	  addr = SET_SRC (addr);
19846	  if (modified_in_p (addr, dep_insn))
19847	    cost += 1;
19848	}
19849      else if (ix86_agi_dependent (dep_insn, insn))
19850	cost += 1;
19851
19852      /* ??? Compares pair with jump/setcc.  */
19853      if (ix86_flags_dependent (insn, dep_insn, insn_type))
19854	cost = 0;
19855
19856      /* Floating point stores require value to be ready one cycle earlier.  */
19857      if (insn_type == TYPE_FMOV
19858	  && get_attr_memory (insn) == MEMORY_STORE
19859	  && !ix86_agi_dependent (dep_insn, insn))
19860	cost += 1;
19861      break;
19862
19863    case PROCESSOR_PENTIUMPRO:
19864      memory = get_attr_memory (insn);
19865
19866      /* INT->FP conversion is expensive.  */
19867      if (get_attr_fp_int_src (dep_insn))
19868	cost += 5;
19869
19870      /* There is one cycle extra latency between an FP op and a store.  */
19871      if (insn_type == TYPE_FMOV
19872	  && (set = single_set (dep_insn)) != NULL_RTX
19873	  && (set2 = single_set (insn)) != NULL_RTX
19874	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19875	  && MEM_P (SET_DEST (set2)))
19876	cost += 1;
19877
19878      /* Show ability of reorder buffer to hide latency of load by executing
19879	 in parallel with previous instruction in case
19880	 previous instruction is not needed to compute the address.  */
19881      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19882	  && !ix86_agi_dependent (dep_insn, insn))
19883	{
19884	  /* Claim moves to take one cycle, as core can issue one load
19885	     at time and the next load can start cycle later.  */
19886	  if (dep_insn_type == TYPE_IMOV
19887	      || dep_insn_type == TYPE_FMOV)
19888	    cost = 1;
19889	  else if (cost > 1)
19890	    cost--;
19891	}
19892      break;
19893
19894    case PROCESSOR_K6:
19895      memory = get_attr_memory (insn);
19896
19897      /* The esp dependency is resolved before the instruction is really
19898         finished.  */
19899      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19900	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19901	return 1;
19902
19903      /* INT->FP conversion is expensive.  */
19904      if (get_attr_fp_int_src (dep_insn))
19905	cost += 5;
19906
19907      /* Show ability of reorder buffer to hide latency of load by executing
19908	 in parallel with previous instruction in case
19909	 previous instruction is not needed to compute the address.  */
19910      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19911	  && !ix86_agi_dependent (dep_insn, insn))
19912	{
19913	  /* Claim moves to take one cycle, as core can issue one load
19914	     at time and the next load can start cycle later.  */
19915	  if (dep_insn_type == TYPE_IMOV
19916	      || dep_insn_type == TYPE_FMOV)
19917	    cost = 1;
19918	  else if (cost > 2)
19919	    cost -= 2;
19920	  else
19921	    cost = 1;
19922	}
19923      break;
19924
19925    case PROCESSOR_ATHLON:
19926    case PROCESSOR_K8:
19927    case PROCESSOR_AMDFAM10:
19928    case PROCESSOR_ATOM:
19929    case PROCESSOR_GENERIC32:
19930    case PROCESSOR_GENERIC64:
19931      memory = get_attr_memory (insn);
19932
19933      /* Show ability of reorder buffer to hide latency of load by executing
19934	 in parallel with previous instruction in case
19935	 previous instruction is not needed to compute the address.  */
19936      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19937	  && !ix86_agi_dependent (dep_insn, insn))
19938	{
19939	  enum attr_unit unit = get_attr_unit (insn);
19940	  int loadcost = 3;
19941
19942	  /* Because of the difference between the length of integer and
19943	     floating unit pipeline preparation stages, the memory operands
19944	     for floating point are cheaper.
19945
19946	     ??? For Athlon it the difference is most probably 2.  */
19947	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19948	    loadcost = 3;
19949	  else
19950	    loadcost = TARGET_ATHLON ? 2 : 0;
19951
19952	  if (cost >= loadcost)
19953	    cost -= loadcost;
19954	  else
19955	    cost = 0;
19956	}
19957
19958    default:
19959      break;
19960    }
19961
19962  return cost;
19963}
19964
19965/* How many alternative schedules to try.  This should be as wide as the
19966   scheduling freedom in the DFA, but no wider.  Making this value too
19967   large results extra work for the scheduler.  */
19968
19969static int
19970ia32_multipass_dfa_lookahead (void)
19971{
19972  switch (ix86_tune)
19973    {
19974    case PROCESSOR_PENTIUM:
19975      return 2;
19976
19977    case PROCESSOR_PENTIUMPRO:
19978    case PROCESSOR_K6:
19979      return 1;
19980
19981    default:
19982      return 0;
19983    }
19984}
19985
19986
19987/* Compute the alignment given to a constant that is being placed in memory.
19988   EXP is the constant and ALIGN is the alignment that the object would
19989   ordinarily have.
19990   The value of this function is used instead of that alignment to align
19991   the object.  */
19992
19993int
19994ix86_constant_alignment (tree exp, int align)
19995{
19996  if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19997      || TREE_CODE (exp) == INTEGER_CST)
19998    {
19999      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
20000	return 64;
20001      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
20002	return 128;
20003    }
20004  else if (!optimize_size && TREE_CODE (exp) == STRING_CST
20005	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
20006    return BITS_PER_WORD;
20007
20008  return align;
20009}
20010
20011/* Compute the alignment for a static variable.
20012   TYPE is the data type, and ALIGN is the alignment that
20013   the object would ordinarily have.  The value of this function is used
20014   instead of that alignment to align the object.  */
20015
20016int
20017ix86_data_alignment (tree type, int align)
20018{
20019  int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
20020
20021  if (AGGREGATE_TYPE_P (type)
20022      && TYPE_SIZE (type)
20023      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20024      && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
20025	  || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
20026      && align < max_align)
20027    align = max_align;
20028
20029  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20030     to 16byte boundary.  */
20031  if (TARGET_64BIT)
20032    {
20033      if (AGGREGATE_TYPE_P (type)
20034	   && TYPE_SIZE (type)
20035	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20036	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
20037	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20038	return 128;
20039    }
20040
20041  if (TREE_CODE (type) == ARRAY_TYPE)
20042    {
20043      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20044	return 64;
20045      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20046	return 128;
20047    }
20048  else if (TREE_CODE (type) == COMPLEX_TYPE)
20049    {
20050
20051      if (TYPE_MODE (type) == DCmode && align < 64)
20052	return 64;
20053      if ((TYPE_MODE (type) == XCmode
20054	   || TYPE_MODE (type) == TCmode) && align < 128)
20055	return 128;
20056    }
20057  else if ((TREE_CODE (type) == RECORD_TYPE
20058	    || TREE_CODE (type) == UNION_TYPE
20059	    || TREE_CODE (type) == QUAL_UNION_TYPE)
20060	   && TYPE_FIELDS (type))
20061    {
20062      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20063	return 64;
20064      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20065	return 128;
20066    }
20067  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20068	   || TREE_CODE (type) == INTEGER_TYPE)
20069    {
20070      if (TYPE_MODE (type) == DFmode && align < 64)
20071	return 64;
20072      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20073	return 128;
20074    }
20075
20076  return align;
20077}
20078
20079/* Compute the alignment for a local variable or a stack slot.  EXP is
20080   the data type or decl itself, MODE is the widest mode available and
20081   ALIGN is the alignment that the object would ordinarily have.  The
20082   value of this macro is used instead of that alignment to align the
20083   object.  */
20084
20085unsigned int
20086ix86_local_alignment (tree exp, enum machine_mode mode,
20087		      unsigned int align)
20088{
20089  tree type, decl;
20090
20091  if (exp && DECL_P (exp))
20092    {
20093      type = TREE_TYPE (exp);
20094      decl = exp;
20095    }
20096  else
20097    {
20098      type = exp;
20099      decl = NULL;
20100    }
20101
20102  /* Don't do dynamic stack realignment for long long objects with
20103     -mpreferred-stack-boundary=2.  */
20104  if (!TARGET_64BIT
20105      && align == 64
20106      && ix86_preferred_stack_boundary < 64
20107      && (mode == DImode || (type && TYPE_MODE (type) == DImode)
20108          || mode == DFmode || (type && TYPE_MODE (type) == DFmode)
20109          || mode == DCmode || (type && TYPE_MODE (type) == DCmode))
20110      && (!type || !TYPE_USER_ALIGN (type))
20111      && (!decl || !DECL_USER_ALIGN (decl)))
20112    align = 32;
20113
20114  /* If TYPE is NULL, we are allocating a stack slot for caller-save
20115     register in MODE.  We will return the largest alignment of XF
20116     and DF.  */
20117  if (!type)
20118    {
20119      if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
20120	align = GET_MODE_ALIGNMENT (DFmode);
20121      return align;
20122    }
20123
20124  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20125     to 16byte boundary.  */
20126  if (TARGET_64BIT)
20127    {
20128      if (AGGREGATE_TYPE_P (type)
20129	   && TYPE_SIZE (type)
20130	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20131	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
20132	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20133	return 128;
20134    }
20135  if (TREE_CODE (type) == ARRAY_TYPE)
20136    {
20137      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20138	return 64;
20139      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20140	return 128;
20141    }
20142  else if (TREE_CODE (type) == COMPLEX_TYPE)
20143    {
20144      if (TYPE_MODE (type) == DCmode && align < 64)
20145	return 64;
20146      if ((TYPE_MODE (type) == XCmode
20147	   || TYPE_MODE (type) == TCmode) && align < 128)
20148	return 128;
20149    }
20150  else if ((TREE_CODE (type) == RECORD_TYPE
20151	    || TREE_CODE (type) == UNION_TYPE
20152	    || TREE_CODE (type) == QUAL_UNION_TYPE)
20153	   && TYPE_FIELDS (type))
20154    {
20155      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64
20156          && (TARGET_64BIT || ix86_preferred_stack_boundary >= 64))
20157	return 64;
20158      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20159	return 128;
20160    }
20161  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20162	   || TREE_CODE (type) == INTEGER_TYPE)
20163    {
20164
20165      if (TYPE_MODE (type) == DFmode && align < 64
20166          && (TARGET_64BIT || ix86_preferred_stack_boundary >= 64))
20167	return 64;
20168      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20169	return 128;
20170    }
20171  return align;
20172}
20173
20174/* Compute the minimum required alignment for dynamic stack realignment
20175   purposes for a local variable, parameter or a stack slot.  EXP is
20176   the data type or decl itself, MODE is its mode and ALIGN is the
20177   alignment that the object would ordinarily have.  */
20178
20179unsigned int
20180ix86_minimum_alignment (tree exp, enum machine_mode mode,
20181			unsigned int align)
20182{
20183  tree type, decl;
20184
20185  if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
20186    return align;
20187
20188  if (exp && DECL_P (exp))
20189    {
20190      type = TREE_TYPE (exp);
20191      decl = exp;
20192    }
20193  else
20194    {
20195      type = exp;
20196      decl = NULL;
20197    }
20198
20199  /* Don't do dynamic stack realignment for long long objects with
20200     -mpreferred-stack-boundary=2.  */
20201  if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
20202      && (!type || !TYPE_USER_ALIGN (type))
20203      && (!decl || !DECL_USER_ALIGN (decl)))
20204    return 32;
20205
20206  return align;
20207}
20208
20209/* Find a location for the static chain incoming to a nested function.
20210   This is a register, unless all free registers are used by arguments.  */
20211
20212static rtx
20213ix86_static_chain (const_tree fndecl, bool incoming_p)
20214{
20215  unsigned regno;
20216
20217  if (!DECL_STATIC_CHAIN (fndecl))
20218    return NULL;
20219
20220  if (TARGET_64BIT)
20221    {
20222      /* We always use R10 in 64-bit mode.  */
20223      regno = R10_REG;
20224    }
20225  else
20226    {
20227      tree fntype;
20228      /* By default in 32-bit mode we use ECX to pass the static chain.  */
20229      regno = CX_REG;
20230
20231      fntype = TREE_TYPE (fndecl);
20232      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
20233	{
20234	  /* Fastcall functions use ecx/edx for arguments, which leaves
20235	     us with EAX for the static chain.  */
20236	  regno = AX_REG;
20237	}
20238      else if (ix86_function_regparm (fntype, fndecl) == 3)
20239	{
20240	  /* For regparm 3, we have no free call-clobbered registers in
20241	     which to store the static chain.  In order to implement this,
20242	     we have the trampoline push the static chain to the stack.
20243	     However, we can't push a value below the return address when
20244	     we call the nested function directly, so we have to use an
20245	     alternate entry point.  For this we use ESI, and have the
20246	     alternate entry point push ESI, so that things appear the
20247	     same once we're executing the nested function.  */
20248	  if (incoming_p)
20249	    {
20250	      if (fndecl == current_function_decl)
20251		ix86_static_chain_on_stack = true;
20252	      return gen_frame_mem (SImode,
20253				    plus_constant (arg_pointer_rtx, -8));
20254	    }
20255	  regno = SI_REG;
20256	}
20257    }
20258
20259  return gen_rtx_REG (Pmode, regno);
20260}
20261
20262/* Emit RTL insns to initialize the variable parts of a trampoline.
20263   FNDECL is the decl of the target address; M_TRAMP is a MEM for
20264   the trampoline, and CHAIN_VALUE is an RTX for the static chain
20265   to be passed to the target function.  */
20266
20267static void
20268ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
20269{
20270  rtx mem, fnaddr;
20271
20272  fnaddr = XEXP (DECL_RTL (fndecl), 0);
20273
20274  if (!TARGET_64BIT)
20275    {
20276      rtx disp, chain;
20277      int opcode;
20278
20279      /* Depending on the static chain location, either load a register
20280	 with a constant, or push the constant to the stack.  All of the
20281	 instructions are the same size.  */
20282      chain = ix86_static_chain (fndecl, true);
20283      if (REG_P (chain))
20284	{
20285	  if (REGNO (chain) == CX_REG)
20286	    opcode = 0xb9;
20287	  else if (REGNO (chain) == AX_REG)
20288	    opcode = 0xb8;
20289	  else
20290	    gcc_unreachable ();
20291	}
20292      else
20293	opcode = 0x68;
20294
20295      mem = adjust_address (m_tramp, QImode, 0);
20296      emit_move_insn (mem, gen_int_mode (opcode, QImode));
20297
20298      mem = adjust_address (m_tramp, SImode, 1);
20299      emit_move_insn (mem, chain_value);
20300
20301      /* Compute offset from the end of the jmp to the target function.
20302	 In the case in which the trampoline stores the static chain on
20303	 the stack, we need to skip the first insn which pushes the
20304	 (call-saved) register static chain; this push is 1 byte.  */
20305      disp = expand_binop (SImode, sub_optab, fnaddr,
20306			   plus_constant (XEXP (m_tramp, 0),
20307					  MEM_P (chain) ? 9 : 10),
20308			   NULL_RTX, 1, OPTAB_DIRECT);
20309
20310      mem = adjust_address (m_tramp, QImode, 5);
20311      emit_move_insn (mem, gen_int_mode (0xe9, QImode));
20312
20313      mem = adjust_address (m_tramp, SImode, 6);
20314      emit_move_insn (mem, disp);
20315    }
20316  else
20317    {
20318      int offset = 0;
20319
20320      /* Load the function address to r11.  Try to load address using
20321	 the shorter movl instead of movabs.  We may want to support
20322	 movq for kernel mode, but kernel does not use trampolines at
20323	 the moment.  */
20324      if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
20325	{
20326	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
20327
20328	  mem = adjust_address (m_tramp, HImode, offset);
20329	  emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
20330
20331	  mem = adjust_address (m_tramp, SImode, offset + 2);
20332	  emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
20333	  offset += 6;
20334	}
20335      else
20336	{
20337	  mem = adjust_address (m_tramp, HImode, offset);
20338	  emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
20339
20340	  mem = adjust_address (m_tramp, DImode, offset + 2);
20341	  emit_move_insn (mem, fnaddr);
20342	  offset += 10;
20343	}
20344
20345      /* Load static chain using movabs to r10.  */
20346      mem = adjust_address (m_tramp, HImode, offset);
20347      emit_move_insn (mem, gen_int_mode (0xba49, HImode));
20348
20349      mem = adjust_address (m_tramp, DImode, offset + 2);
20350      emit_move_insn (mem, chain_value);
20351      offset += 10;
20352
20353      /* Jump to r11; the last (unused) byte is a nop, only there to
20354	 pad the write out to a single 32-bit store.  */
20355      mem = adjust_address (m_tramp, SImode, offset);
20356      emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
20357      offset += 4;
20358
20359      gcc_assert (offset <= TRAMPOLINE_SIZE);
20360    }
20361
20362#ifdef ENABLE_EXECUTE_STACK
20363#ifdef CHECK_EXECUTE_STACK_ENABLED
20364  if (CHECK_EXECUTE_STACK_ENABLED)
20365#endif
20366  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
20367		     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
20368#endif
20369}
20370
20371/* The following file contains several enumerations and data structures
20372   built from the definitions in i386-builtin-types.def.  */
20373
20374#include "i386-builtin-types.inc"
20375
20376/* Table for the ix86 builtin non-function types.  */
20377static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
20378
20379/* Retrieve an element from the above table, building some of
20380   the types lazily.  */
20381
20382static tree
20383ix86_get_builtin_type (enum ix86_builtin_type tcode)
20384{
20385  unsigned int index;
20386  tree type, itype;
20387
20388  gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
20389
20390  type = ix86_builtin_type_tab[(int) tcode];
20391  if (type != NULL)
20392    return type;
20393
20394  gcc_assert (tcode > IX86_BT_LAST_PRIM);
20395  if (tcode <= IX86_BT_LAST_VECT)
20396    {
20397      enum machine_mode mode;
20398
20399      index = tcode - IX86_BT_LAST_PRIM - 1;
20400      itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
20401      mode = ix86_builtin_type_vect_mode[index];
20402
20403      type = build_vector_type_for_mode (itype, mode);
20404    }
20405  else
20406    {
20407      int quals;
20408
20409      index = tcode - IX86_BT_LAST_VECT - 1;
20410      if (tcode <= IX86_BT_LAST_PTR)
20411	quals = TYPE_UNQUALIFIED;
20412      else
20413	quals = TYPE_QUAL_CONST;
20414
20415      itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
20416      if (quals != TYPE_UNQUALIFIED)
20417	itype = build_qualified_type (itype, quals);
20418
20419      type = build_pointer_type (itype);
20420    }
20421
20422  ix86_builtin_type_tab[(int) tcode] = type;
20423  return type;
20424}
20425
20426/* Table for the ix86 builtin function types.  */
20427static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
20428
20429/* Retrieve an element from the above table, building some of
20430   the types lazily.  */
20431
20432static tree
20433ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
20434{
20435  tree type;
20436
20437  gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
20438
20439  type = ix86_builtin_func_type_tab[(int) tcode];
20440  if (type != NULL)
20441    return type;
20442
20443  if (tcode <= IX86_BT_LAST_FUNC)
20444    {
20445      unsigned start = ix86_builtin_func_start[(int) tcode];
20446      unsigned after = ix86_builtin_func_start[(int) tcode + 1];
20447      tree rtype, atype, args = void_list_node;
20448      unsigned i;
20449
20450      rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
20451      for (i = after - 1; i > start; --i)
20452	{
20453	  atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
20454	  args = tree_cons (NULL, atype, args);
20455	}
20456
20457      type = build_function_type (rtype, args);
20458    }
20459  else
20460    {
20461      unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
20462      enum ix86_builtin_func_type icode;
20463
20464      icode = ix86_builtin_func_alias_base[index];
20465      type = ix86_get_builtin_func_type (icode);
20466    }
20467
20468  ix86_builtin_func_type_tab[(int) tcode] = type;
20469  return type;
20470}
20471
20472
20473/* Codes for all the SSE/MMX builtins.  */
20474enum ix86_builtins
20475{
20476  IX86_BUILTIN_ADDPS,
20477  IX86_BUILTIN_ADDSS,
20478  IX86_BUILTIN_DIVPS,
20479  IX86_BUILTIN_DIVSS,
20480  IX86_BUILTIN_MULPS,
20481  IX86_BUILTIN_MULSS,
20482  IX86_BUILTIN_SUBPS,
20483  IX86_BUILTIN_SUBSS,
20484
20485  IX86_BUILTIN_CMPEQPS,
20486  IX86_BUILTIN_CMPLTPS,
20487  IX86_BUILTIN_CMPLEPS,
20488  IX86_BUILTIN_CMPGTPS,
20489  IX86_BUILTIN_CMPGEPS,
20490  IX86_BUILTIN_CMPNEQPS,
20491  IX86_BUILTIN_CMPNLTPS,
20492  IX86_BUILTIN_CMPNLEPS,
20493  IX86_BUILTIN_CMPNGTPS,
20494  IX86_BUILTIN_CMPNGEPS,
20495  IX86_BUILTIN_CMPORDPS,
20496  IX86_BUILTIN_CMPUNORDPS,
20497  IX86_BUILTIN_CMPEQSS,
20498  IX86_BUILTIN_CMPLTSS,
20499  IX86_BUILTIN_CMPLESS,
20500  IX86_BUILTIN_CMPNEQSS,
20501  IX86_BUILTIN_CMPNLTSS,
20502  IX86_BUILTIN_CMPNLESS,
20503  IX86_BUILTIN_CMPNGTSS,
20504  IX86_BUILTIN_CMPNGESS,
20505  IX86_BUILTIN_CMPORDSS,
20506  IX86_BUILTIN_CMPUNORDSS,
20507
20508  IX86_BUILTIN_COMIEQSS,
20509  IX86_BUILTIN_COMILTSS,
20510  IX86_BUILTIN_COMILESS,
20511  IX86_BUILTIN_COMIGTSS,
20512  IX86_BUILTIN_COMIGESS,
20513  IX86_BUILTIN_COMINEQSS,
20514  IX86_BUILTIN_UCOMIEQSS,
20515  IX86_BUILTIN_UCOMILTSS,
20516  IX86_BUILTIN_UCOMILESS,
20517  IX86_BUILTIN_UCOMIGTSS,
20518  IX86_BUILTIN_UCOMIGESS,
20519  IX86_BUILTIN_UCOMINEQSS,
20520
20521  IX86_BUILTIN_CVTPI2PS,
20522  IX86_BUILTIN_CVTPS2PI,
20523  IX86_BUILTIN_CVTSI2SS,
20524  IX86_BUILTIN_CVTSI642SS,
20525  IX86_BUILTIN_CVTSS2SI,
20526  IX86_BUILTIN_CVTSS2SI64,
20527  IX86_BUILTIN_CVTTPS2PI,
20528  IX86_BUILTIN_CVTTSS2SI,
20529  IX86_BUILTIN_CVTTSS2SI64,
20530
20531  IX86_BUILTIN_MAXPS,
20532  IX86_BUILTIN_MAXSS,
20533  IX86_BUILTIN_MINPS,
20534  IX86_BUILTIN_MINSS,
20535
20536  IX86_BUILTIN_LOADUPS,
20537  IX86_BUILTIN_STOREUPS,
20538  IX86_BUILTIN_MOVSS,
20539
20540  IX86_BUILTIN_MOVHLPS,
20541  IX86_BUILTIN_MOVLHPS,
20542  IX86_BUILTIN_LOADHPS,
20543  IX86_BUILTIN_LOADLPS,
20544  IX86_BUILTIN_STOREHPS,
20545  IX86_BUILTIN_STORELPS,
20546
20547  IX86_BUILTIN_MASKMOVQ,
20548  IX86_BUILTIN_MOVMSKPS,
20549  IX86_BUILTIN_PMOVMSKB,
20550
20551  IX86_BUILTIN_MOVNTPS,
20552  IX86_BUILTIN_MOVNTQ,
20553
20554  IX86_BUILTIN_LOADDQU,
20555  IX86_BUILTIN_STOREDQU,
20556
20557  IX86_BUILTIN_PACKSSWB,
20558  IX86_BUILTIN_PACKSSDW,
20559  IX86_BUILTIN_PACKUSWB,
20560
20561  IX86_BUILTIN_PADDB,
20562  IX86_BUILTIN_PADDW,
20563  IX86_BUILTIN_PADDD,
20564  IX86_BUILTIN_PADDQ,
20565  IX86_BUILTIN_PADDSB,
20566  IX86_BUILTIN_PADDSW,
20567  IX86_BUILTIN_PADDUSB,
20568  IX86_BUILTIN_PADDUSW,
20569  IX86_BUILTIN_PSUBB,
20570  IX86_BUILTIN_PSUBW,
20571  IX86_BUILTIN_PSUBD,
20572  IX86_BUILTIN_PSUBQ,
20573  IX86_BUILTIN_PSUBSB,
20574  IX86_BUILTIN_PSUBSW,
20575  IX86_BUILTIN_PSUBUSB,
20576  IX86_BUILTIN_PSUBUSW,
20577
20578  IX86_BUILTIN_PAND,
20579  IX86_BUILTIN_PANDN,
20580  IX86_BUILTIN_POR,
20581  IX86_BUILTIN_PXOR,
20582
20583  IX86_BUILTIN_PAVGB,
20584  IX86_BUILTIN_PAVGW,
20585
20586  IX86_BUILTIN_PCMPEQB,
20587  IX86_BUILTIN_PCMPEQW,
20588  IX86_BUILTIN_PCMPEQD,
20589  IX86_BUILTIN_PCMPGTB,
20590  IX86_BUILTIN_PCMPGTW,
20591  IX86_BUILTIN_PCMPGTD,
20592
20593  IX86_BUILTIN_PMADDWD,
20594
20595  IX86_BUILTIN_PMAXSW,
20596  IX86_BUILTIN_PMAXUB,
20597  IX86_BUILTIN_PMINSW,
20598  IX86_BUILTIN_PMINUB,
20599
20600  IX86_BUILTIN_PMULHUW,
20601  IX86_BUILTIN_PMULHW,
20602  IX86_BUILTIN_PMULLW,
20603
20604  IX86_BUILTIN_PSADBW,
20605  IX86_BUILTIN_PSHUFW,
20606
20607  IX86_BUILTIN_PSLLW,
20608  IX86_BUILTIN_PSLLD,
20609  IX86_BUILTIN_PSLLQ,
20610  IX86_BUILTIN_PSRAW,
20611  IX86_BUILTIN_PSRAD,
20612  IX86_BUILTIN_PSRLW,
20613  IX86_BUILTIN_PSRLD,
20614  IX86_BUILTIN_PSRLQ,
20615  IX86_BUILTIN_PSLLWI,
20616  IX86_BUILTIN_PSLLDI,
20617  IX86_BUILTIN_PSLLQI,
20618  IX86_BUILTIN_PSRAWI,
20619  IX86_BUILTIN_PSRADI,
20620  IX86_BUILTIN_PSRLWI,
20621  IX86_BUILTIN_PSRLDI,
20622  IX86_BUILTIN_PSRLQI,
20623
20624  IX86_BUILTIN_PUNPCKHBW,
20625  IX86_BUILTIN_PUNPCKHWD,
20626  IX86_BUILTIN_PUNPCKHDQ,
20627  IX86_BUILTIN_PUNPCKLBW,
20628  IX86_BUILTIN_PUNPCKLWD,
20629  IX86_BUILTIN_PUNPCKLDQ,
20630
20631  IX86_BUILTIN_SHUFPS,
20632
20633  IX86_BUILTIN_RCPPS,
20634  IX86_BUILTIN_RCPSS,
20635  IX86_BUILTIN_RSQRTPS,
20636  IX86_BUILTIN_RSQRTPS_NR,
20637  IX86_BUILTIN_RSQRTSS,
20638  IX86_BUILTIN_RSQRTF,
20639  IX86_BUILTIN_SQRTPS,
20640  IX86_BUILTIN_SQRTPS_NR,
20641  IX86_BUILTIN_SQRTSS,
20642
20643  IX86_BUILTIN_UNPCKHPS,
20644  IX86_BUILTIN_UNPCKLPS,
20645
20646  IX86_BUILTIN_ANDPS,
20647  IX86_BUILTIN_ANDNPS,
20648  IX86_BUILTIN_ORPS,
20649  IX86_BUILTIN_XORPS,
20650
20651  IX86_BUILTIN_EMMS,
20652  IX86_BUILTIN_LDMXCSR,
20653  IX86_BUILTIN_STMXCSR,
20654  IX86_BUILTIN_SFENCE,
20655
20656  /* 3DNow! Original */
20657  IX86_BUILTIN_FEMMS,
20658  IX86_BUILTIN_PAVGUSB,
20659  IX86_BUILTIN_PF2ID,
20660  IX86_BUILTIN_PFACC,
20661  IX86_BUILTIN_PFADD,
20662  IX86_BUILTIN_PFCMPEQ,
20663  IX86_BUILTIN_PFCMPGE,
20664  IX86_BUILTIN_PFCMPGT,
20665  IX86_BUILTIN_PFMAX,
20666  IX86_BUILTIN_PFMIN,
20667  IX86_BUILTIN_PFMUL,
20668  IX86_BUILTIN_PFRCP,
20669  IX86_BUILTIN_PFRCPIT1,
20670  IX86_BUILTIN_PFRCPIT2,
20671  IX86_BUILTIN_PFRSQIT1,
20672  IX86_BUILTIN_PFRSQRT,
20673  IX86_BUILTIN_PFSUB,
20674  IX86_BUILTIN_PFSUBR,
20675  IX86_BUILTIN_PI2FD,
20676  IX86_BUILTIN_PMULHRW,
20677
20678  /* 3DNow! Athlon Extensions */
20679  IX86_BUILTIN_PF2IW,
20680  IX86_BUILTIN_PFNACC,
20681  IX86_BUILTIN_PFPNACC,
20682  IX86_BUILTIN_PI2FW,
20683  IX86_BUILTIN_PSWAPDSI,
20684  IX86_BUILTIN_PSWAPDSF,
20685
20686  /* SSE2 */
20687  IX86_BUILTIN_ADDPD,
20688  IX86_BUILTIN_ADDSD,
20689  IX86_BUILTIN_DIVPD,
20690  IX86_BUILTIN_DIVSD,
20691  IX86_BUILTIN_MULPD,
20692  IX86_BUILTIN_MULSD,
20693  IX86_BUILTIN_SUBPD,
20694  IX86_BUILTIN_SUBSD,
20695
20696  IX86_BUILTIN_CMPEQPD,
20697  IX86_BUILTIN_CMPLTPD,
20698  IX86_BUILTIN_CMPLEPD,
20699  IX86_BUILTIN_CMPGTPD,
20700  IX86_BUILTIN_CMPGEPD,
20701  IX86_BUILTIN_CMPNEQPD,
20702  IX86_BUILTIN_CMPNLTPD,
20703  IX86_BUILTIN_CMPNLEPD,
20704  IX86_BUILTIN_CMPNGTPD,
20705  IX86_BUILTIN_CMPNGEPD,
20706  IX86_BUILTIN_CMPORDPD,
20707  IX86_BUILTIN_CMPUNORDPD,
20708  IX86_BUILTIN_CMPEQSD,
20709  IX86_BUILTIN_CMPLTSD,
20710  IX86_BUILTIN_CMPLESD,
20711  IX86_BUILTIN_CMPNEQSD,
20712  IX86_BUILTIN_CMPNLTSD,
20713  IX86_BUILTIN_CMPNLESD,
20714  IX86_BUILTIN_CMPORDSD,
20715  IX86_BUILTIN_CMPUNORDSD,
20716
20717  IX86_BUILTIN_COMIEQSD,
20718  IX86_BUILTIN_COMILTSD,
20719  IX86_BUILTIN_COMILESD,
20720  IX86_BUILTIN_COMIGTSD,
20721  IX86_BUILTIN_COMIGESD,
20722  IX86_BUILTIN_COMINEQSD,
20723  IX86_BUILTIN_UCOMIEQSD,
20724  IX86_BUILTIN_UCOMILTSD,
20725  IX86_BUILTIN_UCOMILESD,
20726  IX86_BUILTIN_UCOMIGTSD,
20727  IX86_BUILTIN_UCOMIGESD,
20728  IX86_BUILTIN_UCOMINEQSD,
20729
20730  IX86_BUILTIN_MAXPD,
20731  IX86_BUILTIN_MAXSD,
20732  IX86_BUILTIN_MINPD,
20733  IX86_BUILTIN_MINSD,
20734
20735  IX86_BUILTIN_ANDPD,
20736  IX86_BUILTIN_ANDNPD,
20737  IX86_BUILTIN_ORPD,
20738  IX86_BUILTIN_XORPD,
20739
20740  IX86_BUILTIN_SQRTPD,
20741  IX86_BUILTIN_SQRTSD,
20742
20743  IX86_BUILTIN_UNPCKHPD,
20744  IX86_BUILTIN_UNPCKLPD,
20745
20746  IX86_BUILTIN_SHUFPD,
20747
20748  IX86_BUILTIN_LOADUPD,
20749  IX86_BUILTIN_STOREUPD,
20750  IX86_BUILTIN_MOVSD,
20751
20752  IX86_BUILTIN_LOADHPD,
20753  IX86_BUILTIN_LOADLPD,
20754
20755  IX86_BUILTIN_CVTDQ2PD,
20756  IX86_BUILTIN_CVTDQ2PS,
20757
20758  IX86_BUILTIN_CVTPD2DQ,
20759  IX86_BUILTIN_CVTPD2PI,
20760  IX86_BUILTIN_CVTPD2PS,
20761  IX86_BUILTIN_CVTTPD2DQ,
20762  IX86_BUILTIN_CVTTPD2PI,
20763
20764  IX86_BUILTIN_CVTPI2PD,
20765  IX86_BUILTIN_CVTSI2SD,
20766  IX86_BUILTIN_CVTSI642SD,
20767
20768  IX86_BUILTIN_CVTSD2SI,
20769  IX86_BUILTIN_CVTSD2SI64,
20770  IX86_BUILTIN_CVTSD2SS,
20771  IX86_BUILTIN_CVTSS2SD,
20772  IX86_BUILTIN_CVTTSD2SI,
20773  IX86_BUILTIN_CVTTSD2SI64,
20774
20775  IX86_BUILTIN_CVTPS2DQ,
20776  IX86_BUILTIN_CVTPS2PD,
20777  IX86_BUILTIN_CVTTPS2DQ,
20778
20779  IX86_BUILTIN_MOVNTI,
20780  IX86_BUILTIN_MOVNTPD,
20781  IX86_BUILTIN_MOVNTDQ,
20782
20783  IX86_BUILTIN_MOVQ128,
20784
20785  /* SSE2 MMX */
20786  IX86_BUILTIN_MASKMOVDQU,
20787  IX86_BUILTIN_MOVMSKPD,
20788  IX86_BUILTIN_PMOVMSKB128,
20789
20790  IX86_BUILTIN_PACKSSWB128,
20791  IX86_BUILTIN_PACKSSDW128,
20792  IX86_BUILTIN_PACKUSWB128,
20793
20794  IX86_BUILTIN_PADDB128,
20795  IX86_BUILTIN_PADDW128,
20796  IX86_BUILTIN_PADDD128,
20797  IX86_BUILTIN_PADDQ128,
20798  IX86_BUILTIN_PADDSB128,
20799  IX86_BUILTIN_PADDSW128,
20800  IX86_BUILTIN_PADDUSB128,
20801  IX86_BUILTIN_PADDUSW128,
20802  IX86_BUILTIN_PSUBB128,
20803  IX86_BUILTIN_PSUBW128,
20804  IX86_BUILTIN_PSUBD128,
20805  IX86_BUILTIN_PSUBQ128,
20806  IX86_BUILTIN_PSUBSB128,
20807  IX86_BUILTIN_PSUBSW128,
20808  IX86_BUILTIN_PSUBUSB128,
20809  IX86_BUILTIN_PSUBUSW128,
20810
20811  IX86_BUILTIN_PAND128,
20812  IX86_BUILTIN_PANDN128,
20813  IX86_BUILTIN_POR128,
20814  IX86_BUILTIN_PXOR128,
20815
20816  IX86_BUILTIN_PAVGB128,
20817  IX86_BUILTIN_PAVGW128,
20818
20819  IX86_BUILTIN_PCMPEQB128,
20820  IX86_BUILTIN_PCMPEQW128,
20821  IX86_BUILTIN_PCMPEQD128,
20822  IX86_BUILTIN_PCMPGTB128,
20823  IX86_BUILTIN_PCMPGTW128,
20824  IX86_BUILTIN_PCMPGTD128,
20825
20826  IX86_BUILTIN_PMADDWD128,
20827
20828  IX86_BUILTIN_PMAXSW128,
20829  IX86_BUILTIN_PMAXUB128,
20830  IX86_BUILTIN_PMINSW128,
20831  IX86_BUILTIN_PMINUB128,
20832
20833  IX86_BUILTIN_PMULUDQ,
20834  IX86_BUILTIN_PMULUDQ128,
20835  IX86_BUILTIN_PMULHUW128,
20836  IX86_BUILTIN_PMULHW128,
20837  IX86_BUILTIN_PMULLW128,
20838
20839  IX86_BUILTIN_PSADBW128,
20840  IX86_BUILTIN_PSHUFHW,
20841  IX86_BUILTIN_PSHUFLW,
20842  IX86_BUILTIN_PSHUFD,
20843
20844  IX86_BUILTIN_PSLLDQI128,
20845  IX86_BUILTIN_PSLLWI128,
20846  IX86_BUILTIN_PSLLDI128,
20847  IX86_BUILTIN_PSLLQI128,
20848  IX86_BUILTIN_PSRAWI128,
20849  IX86_BUILTIN_PSRADI128,
20850  IX86_BUILTIN_PSRLDQI128,
20851  IX86_BUILTIN_PSRLWI128,
20852  IX86_BUILTIN_PSRLDI128,
20853  IX86_BUILTIN_PSRLQI128,
20854
20855  IX86_BUILTIN_PSLLDQ128,
20856  IX86_BUILTIN_PSLLW128,
20857  IX86_BUILTIN_PSLLD128,
20858  IX86_BUILTIN_PSLLQ128,
20859  IX86_BUILTIN_PSRAW128,
20860  IX86_BUILTIN_PSRAD128,
20861  IX86_BUILTIN_PSRLW128,
20862  IX86_BUILTIN_PSRLD128,
20863  IX86_BUILTIN_PSRLQ128,
20864
20865  IX86_BUILTIN_PUNPCKHBW128,
20866  IX86_BUILTIN_PUNPCKHWD128,
20867  IX86_BUILTIN_PUNPCKHDQ128,
20868  IX86_BUILTIN_PUNPCKHQDQ128,
20869  IX86_BUILTIN_PUNPCKLBW128,
20870  IX86_BUILTIN_PUNPCKLWD128,
20871  IX86_BUILTIN_PUNPCKLDQ128,
20872  IX86_BUILTIN_PUNPCKLQDQ128,
20873
20874  IX86_BUILTIN_CLFLUSH,
20875  IX86_BUILTIN_MFENCE,
20876  IX86_BUILTIN_LFENCE,
20877
20878  IX86_BUILTIN_BSRSI,
20879  IX86_BUILTIN_BSRDI,
20880  IX86_BUILTIN_RDPMC,
20881  IX86_BUILTIN_RDTSC,
20882  IX86_BUILTIN_RDTSCP,
20883  IX86_BUILTIN_ROLQI,
20884  IX86_BUILTIN_ROLHI,
20885  IX86_BUILTIN_RORQI,
20886  IX86_BUILTIN_RORHI,
20887
20888  /* SSE3.  */
20889  IX86_BUILTIN_ADDSUBPS,
20890  IX86_BUILTIN_HADDPS,
20891  IX86_BUILTIN_HSUBPS,
20892  IX86_BUILTIN_MOVSHDUP,
20893  IX86_BUILTIN_MOVSLDUP,
20894  IX86_BUILTIN_ADDSUBPD,
20895  IX86_BUILTIN_HADDPD,
20896  IX86_BUILTIN_HSUBPD,
20897  IX86_BUILTIN_LDDQU,
20898
20899  IX86_BUILTIN_MONITOR,
20900  IX86_BUILTIN_MWAIT,
20901
20902  /* SSSE3.  */
20903  IX86_BUILTIN_PHADDW,
20904  IX86_BUILTIN_PHADDD,
20905  IX86_BUILTIN_PHADDSW,
20906  IX86_BUILTIN_PHSUBW,
20907  IX86_BUILTIN_PHSUBD,
20908  IX86_BUILTIN_PHSUBSW,
20909  IX86_BUILTIN_PMADDUBSW,
20910  IX86_BUILTIN_PMULHRSW,
20911  IX86_BUILTIN_PSHUFB,
20912  IX86_BUILTIN_PSIGNB,
20913  IX86_BUILTIN_PSIGNW,
20914  IX86_BUILTIN_PSIGND,
20915  IX86_BUILTIN_PALIGNR,
20916  IX86_BUILTIN_PABSB,
20917  IX86_BUILTIN_PABSW,
20918  IX86_BUILTIN_PABSD,
20919
20920  IX86_BUILTIN_PHADDW128,
20921  IX86_BUILTIN_PHADDD128,
20922  IX86_BUILTIN_PHADDSW128,
20923  IX86_BUILTIN_PHSUBW128,
20924  IX86_BUILTIN_PHSUBD128,
20925  IX86_BUILTIN_PHSUBSW128,
20926  IX86_BUILTIN_PMADDUBSW128,
20927  IX86_BUILTIN_PMULHRSW128,
20928  IX86_BUILTIN_PSHUFB128,
20929  IX86_BUILTIN_PSIGNB128,
20930  IX86_BUILTIN_PSIGNW128,
20931  IX86_BUILTIN_PSIGND128,
20932  IX86_BUILTIN_PALIGNR128,
20933  IX86_BUILTIN_PABSB128,
20934  IX86_BUILTIN_PABSW128,
20935  IX86_BUILTIN_PABSD128,
20936
20937  /* AMDFAM10 - SSE4A New Instructions.  */
20938  IX86_BUILTIN_MOVNTSD,
20939  IX86_BUILTIN_MOVNTSS,
20940  IX86_BUILTIN_EXTRQI,
20941  IX86_BUILTIN_EXTRQ,
20942  IX86_BUILTIN_INSERTQI,
20943  IX86_BUILTIN_INSERTQ,
20944
20945  /* SSE4.1.  */
20946  IX86_BUILTIN_BLENDPD,
20947  IX86_BUILTIN_BLENDPS,
20948  IX86_BUILTIN_BLENDVPD,
20949  IX86_BUILTIN_BLENDVPS,
20950  IX86_BUILTIN_PBLENDVB128,
20951  IX86_BUILTIN_PBLENDW128,
20952
20953  IX86_BUILTIN_DPPD,
20954  IX86_BUILTIN_DPPS,
20955
20956  IX86_BUILTIN_INSERTPS128,
20957
20958  IX86_BUILTIN_MOVNTDQA,
20959  IX86_BUILTIN_MPSADBW128,
20960  IX86_BUILTIN_PACKUSDW128,
20961  IX86_BUILTIN_PCMPEQQ,
20962  IX86_BUILTIN_PHMINPOSUW128,
20963
20964  IX86_BUILTIN_PMAXSB128,
20965  IX86_BUILTIN_PMAXSD128,
20966  IX86_BUILTIN_PMAXUD128,
20967  IX86_BUILTIN_PMAXUW128,
20968
20969  IX86_BUILTIN_PMINSB128,
20970  IX86_BUILTIN_PMINSD128,
20971  IX86_BUILTIN_PMINUD128,
20972  IX86_BUILTIN_PMINUW128,
20973
20974  IX86_BUILTIN_PMOVSXBW128,
20975  IX86_BUILTIN_PMOVSXBD128,
20976  IX86_BUILTIN_PMOVSXBQ128,
20977  IX86_BUILTIN_PMOVSXWD128,
20978  IX86_BUILTIN_PMOVSXWQ128,
20979  IX86_BUILTIN_PMOVSXDQ128,
20980
20981  IX86_BUILTIN_PMOVZXBW128,
20982  IX86_BUILTIN_PMOVZXBD128,
20983  IX86_BUILTIN_PMOVZXBQ128,
20984  IX86_BUILTIN_PMOVZXWD128,
20985  IX86_BUILTIN_PMOVZXWQ128,
20986  IX86_BUILTIN_PMOVZXDQ128,
20987
20988  IX86_BUILTIN_PMULDQ128,
20989  IX86_BUILTIN_PMULLD128,
20990
20991  IX86_BUILTIN_ROUNDPD,
20992  IX86_BUILTIN_ROUNDPS,
20993  IX86_BUILTIN_ROUNDSD,
20994  IX86_BUILTIN_ROUNDSS,
20995
20996  IX86_BUILTIN_PTESTZ,
20997  IX86_BUILTIN_PTESTC,
20998  IX86_BUILTIN_PTESTNZC,
20999
21000  IX86_BUILTIN_VEC_INIT_V2SI,
21001  IX86_BUILTIN_VEC_INIT_V4HI,
21002  IX86_BUILTIN_VEC_INIT_V8QI,
21003  IX86_BUILTIN_VEC_EXT_V2DF,
21004  IX86_BUILTIN_VEC_EXT_V2DI,
21005  IX86_BUILTIN_VEC_EXT_V4SF,
21006  IX86_BUILTIN_VEC_EXT_V4SI,
21007  IX86_BUILTIN_VEC_EXT_V8HI,
21008  IX86_BUILTIN_VEC_EXT_V2SI,
21009  IX86_BUILTIN_VEC_EXT_V4HI,
21010  IX86_BUILTIN_VEC_EXT_V16QI,
21011  IX86_BUILTIN_VEC_SET_V2DI,
21012  IX86_BUILTIN_VEC_SET_V4SF,
21013  IX86_BUILTIN_VEC_SET_V4SI,
21014  IX86_BUILTIN_VEC_SET_V8HI,
21015  IX86_BUILTIN_VEC_SET_V4HI,
21016  IX86_BUILTIN_VEC_SET_V16QI,
21017
21018  IX86_BUILTIN_VEC_PACK_SFIX,
21019
21020  /* SSE4.2.  */
21021  IX86_BUILTIN_CRC32QI,
21022  IX86_BUILTIN_CRC32HI,
21023  IX86_BUILTIN_CRC32SI,
21024  IX86_BUILTIN_CRC32DI,
21025
21026  IX86_BUILTIN_PCMPESTRI128,
21027  IX86_BUILTIN_PCMPESTRM128,
21028  IX86_BUILTIN_PCMPESTRA128,
21029  IX86_BUILTIN_PCMPESTRC128,
21030  IX86_BUILTIN_PCMPESTRO128,
21031  IX86_BUILTIN_PCMPESTRS128,
21032  IX86_BUILTIN_PCMPESTRZ128,
21033  IX86_BUILTIN_PCMPISTRI128,
21034  IX86_BUILTIN_PCMPISTRM128,
21035  IX86_BUILTIN_PCMPISTRA128,
21036  IX86_BUILTIN_PCMPISTRC128,
21037  IX86_BUILTIN_PCMPISTRO128,
21038  IX86_BUILTIN_PCMPISTRS128,
21039  IX86_BUILTIN_PCMPISTRZ128,
21040
21041  IX86_BUILTIN_PCMPGTQ,
21042
21043  /* AES instructions */
21044  IX86_BUILTIN_AESENC128,
21045  IX86_BUILTIN_AESENCLAST128,
21046  IX86_BUILTIN_AESDEC128,
21047  IX86_BUILTIN_AESDECLAST128,
21048  IX86_BUILTIN_AESIMC128,
21049  IX86_BUILTIN_AESKEYGENASSIST128,
21050
21051  /* PCLMUL instruction */
21052  IX86_BUILTIN_PCLMULQDQ128,
21053
21054  /* AVX */
21055  IX86_BUILTIN_ADDPD256,
21056  IX86_BUILTIN_ADDPS256,
21057  IX86_BUILTIN_ADDSUBPD256,
21058  IX86_BUILTIN_ADDSUBPS256,
21059  IX86_BUILTIN_ANDPD256,
21060  IX86_BUILTIN_ANDPS256,
21061  IX86_BUILTIN_ANDNPD256,
21062  IX86_BUILTIN_ANDNPS256,
21063  IX86_BUILTIN_BLENDPD256,
21064  IX86_BUILTIN_BLENDPS256,
21065  IX86_BUILTIN_BLENDVPD256,
21066  IX86_BUILTIN_BLENDVPS256,
21067  IX86_BUILTIN_DIVPD256,
21068  IX86_BUILTIN_DIVPS256,
21069  IX86_BUILTIN_DPPS256,
21070  IX86_BUILTIN_HADDPD256,
21071  IX86_BUILTIN_HADDPS256,
21072  IX86_BUILTIN_HSUBPD256,
21073  IX86_BUILTIN_HSUBPS256,
21074  IX86_BUILTIN_MAXPD256,
21075  IX86_BUILTIN_MAXPS256,
21076  IX86_BUILTIN_MINPD256,
21077  IX86_BUILTIN_MINPS256,
21078  IX86_BUILTIN_MULPD256,
21079  IX86_BUILTIN_MULPS256,
21080  IX86_BUILTIN_ORPD256,
21081  IX86_BUILTIN_ORPS256,
21082  IX86_BUILTIN_SHUFPD256,
21083  IX86_BUILTIN_SHUFPS256,
21084  IX86_BUILTIN_SUBPD256,
21085  IX86_BUILTIN_SUBPS256,
21086  IX86_BUILTIN_XORPD256,
21087  IX86_BUILTIN_XORPS256,
21088  IX86_BUILTIN_CMPSD,
21089  IX86_BUILTIN_CMPSS,
21090  IX86_BUILTIN_CMPPD,
21091  IX86_BUILTIN_CMPPS,
21092  IX86_BUILTIN_CMPPD256,
21093  IX86_BUILTIN_CMPPS256,
21094  IX86_BUILTIN_CVTDQ2PD256,
21095  IX86_BUILTIN_CVTDQ2PS256,
21096  IX86_BUILTIN_CVTPD2PS256,
21097  IX86_BUILTIN_CVTPS2DQ256,
21098  IX86_BUILTIN_CVTPS2PD256,
21099  IX86_BUILTIN_CVTTPD2DQ256,
21100  IX86_BUILTIN_CVTPD2DQ256,
21101  IX86_BUILTIN_CVTTPS2DQ256,
21102  IX86_BUILTIN_EXTRACTF128PD256,
21103  IX86_BUILTIN_EXTRACTF128PS256,
21104  IX86_BUILTIN_EXTRACTF128SI256,
21105  IX86_BUILTIN_VZEROALL,
21106  IX86_BUILTIN_VZEROUPPER,
21107  IX86_BUILTIN_VPERMILVARPD,
21108  IX86_BUILTIN_VPERMILVARPS,
21109  IX86_BUILTIN_VPERMILVARPD256,
21110  IX86_BUILTIN_VPERMILVARPS256,
21111  IX86_BUILTIN_VPERMILPD,
21112  IX86_BUILTIN_VPERMILPS,
21113  IX86_BUILTIN_VPERMILPD256,
21114  IX86_BUILTIN_VPERMILPS256,
21115  IX86_BUILTIN_VPERMIL2PD,
21116  IX86_BUILTIN_VPERMIL2PS,
21117  IX86_BUILTIN_VPERMIL2PD256,
21118  IX86_BUILTIN_VPERMIL2PS256,
21119  IX86_BUILTIN_VPERM2F128PD256,
21120  IX86_BUILTIN_VPERM2F128PS256,
21121  IX86_BUILTIN_VPERM2F128SI256,
21122  IX86_BUILTIN_VBROADCASTSS,
21123  IX86_BUILTIN_VBROADCASTSD256,
21124  IX86_BUILTIN_VBROADCASTSS256,
21125  IX86_BUILTIN_VBROADCASTPD256,
21126  IX86_BUILTIN_VBROADCASTPS256,
21127  IX86_BUILTIN_VINSERTF128PD256,
21128  IX86_BUILTIN_VINSERTF128PS256,
21129  IX86_BUILTIN_VINSERTF128SI256,
21130  IX86_BUILTIN_LOADUPD256,
21131  IX86_BUILTIN_LOADUPS256,
21132  IX86_BUILTIN_STOREUPD256,
21133  IX86_BUILTIN_STOREUPS256,
21134  IX86_BUILTIN_LDDQU256,
21135  IX86_BUILTIN_MOVNTDQ256,
21136  IX86_BUILTIN_MOVNTPD256,
21137  IX86_BUILTIN_MOVNTPS256,
21138  IX86_BUILTIN_LOADDQU256,
21139  IX86_BUILTIN_STOREDQU256,
21140  IX86_BUILTIN_MASKLOADPD,
21141  IX86_BUILTIN_MASKLOADPS,
21142  IX86_BUILTIN_MASKSTOREPD,
21143  IX86_BUILTIN_MASKSTOREPS,
21144  IX86_BUILTIN_MASKLOADPD256,
21145  IX86_BUILTIN_MASKLOADPS256,
21146  IX86_BUILTIN_MASKSTOREPD256,
21147  IX86_BUILTIN_MASKSTOREPS256,
21148  IX86_BUILTIN_MOVSHDUP256,
21149  IX86_BUILTIN_MOVSLDUP256,
21150  IX86_BUILTIN_MOVDDUP256,
21151
21152  IX86_BUILTIN_SQRTPD256,
21153  IX86_BUILTIN_SQRTPS256,
21154  IX86_BUILTIN_SQRTPS_NR256,
21155  IX86_BUILTIN_RSQRTPS256,
21156  IX86_BUILTIN_RSQRTPS_NR256,
21157
21158  IX86_BUILTIN_RCPPS256,
21159
21160  IX86_BUILTIN_ROUNDPD256,
21161  IX86_BUILTIN_ROUNDPS256,
21162
21163  IX86_BUILTIN_UNPCKHPD256,
21164  IX86_BUILTIN_UNPCKLPD256,
21165  IX86_BUILTIN_UNPCKHPS256,
21166  IX86_BUILTIN_UNPCKLPS256,
21167
21168  IX86_BUILTIN_SI256_SI,
21169  IX86_BUILTIN_PS256_PS,
21170  IX86_BUILTIN_PD256_PD,
21171  IX86_BUILTIN_SI_SI256,
21172  IX86_BUILTIN_PS_PS256,
21173  IX86_BUILTIN_PD_PD256,
21174
21175  IX86_BUILTIN_VTESTZPD,
21176  IX86_BUILTIN_VTESTCPD,
21177  IX86_BUILTIN_VTESTNZCPD,
21178  IX86_BUILTIN_VTESTZPS,
21179  IX86_BUILTIN_VTESTCPS,
21180  IX86_BUILTIN_VTESTNZCPS,
21181  IX86_BUILTIN_VTESTZPD256,
21182  IX86_BUILTIN_VTESTCPD256,
21183  IX86_BUILTIN_VTESTNZCPD256,
21184  IX86_BUILTIN_VTESTZPS256,
21185  IX86_BUILTIN_VTESTCPS256,
21186  IX86_BUILTIN_VTESTNZCPS256,
21187  IX86_BUILTIN_PTESTZ256,
21188  IX86_BUILTIN_PTESTC256,
21189  IX86_BUILTIN_PTESTNZC256,
21190
21191  IX86_BUILTIN_MOVMSKPD256,
21192  IX86_BUILTIN_MOVMSKPS256,
21193
21194  /* TFmode support builtins.  */
21195  IX86_BUILTIN_INFQ,
21196  IX86_BUILTIN_HUGE_VALQ,
21197  IX86_BUILTIN_FABSQ,
21198  IX86_BUILTIN_COPYSIGNQ,
21199
21200  /* Vectorizer support builtins.  */
21201  IX86_BUILTIN_CPYSGNPS,
21202  IX86_BUILTIN_CPYSGNPD,
21203
21204  IX86_BUILTIN_CVTUDQ2PS,
21205
21206  IX86_BUILTIN_VEC_PERM_V2DF,
21207  IX86_BUILTIN_VEC_PERM_V4SF,
21208  IX86_BUILTIN_VEC_PERM_V2DI,
21209  IX86_BUILTIN_VEC_PERM_V4SI,
21210  IX86_BUILTIN_VEC_PERM_V8HI,
21211  IX86_BUILTIN_VEC_PERM_V16QI,
21212  IX86_BUILTIN_VEC_PERM_V2DI_U,
21213  IX86_BUILTIN_VEC_PERM_V4SI_U,
21214  IX86_BUILTIN_VEC_PERM_V8HI_U,
21215  IX86_BUILTIN_VEC_PERM_V16QI_U,
21216  IX86_BUILTIN_VEC_PERM_V4DF,
21217  IX86_BUILTIN_VEC_PERM_V8SF,
21218
21219  /* FMA4 and XOP instructions.  */
21220  IX86_BUILTIN_VFMADDSS,
21221  IX86_BUILTIN_VFMADDSD,
21222  IX86_BUILTIN_VFMADDPS,
21223  IX86_BUILTIN_VFMADDPD,
21224  IX86_BUILTIN_VFMSUBSS,
21225  IX86_BUILTIN_VFMSUBSD,
21226  IX86_BUILTIN_VFMSUBPS,
21227  IX86_BUILTIN_VFMSUBPD,
21228  IX86_BUILTIN_VFMADDSUBPS,
21229  IX86_BUILTIN_VFMADDSUBPD,
21230  IX86_BUILTIN_VFMSUBADDPS,
21231  IX86_BUILTIN_VFMSUBADDPD,
21232  IX86_BUILTIN_VFNMADDSS,
21233  IX86_BUILTIN_VFNMADDSD,
21234  IX86_BUILTIN_VFNMADDPS,
21235  IX86_BUILTIN_VFNMADDPD,
21236  IX86_BUILTIN_VFNMSUBSS,
21237  IX86_BUILTIN_VFNMSUBSD,
21238  IX86_BUILTIN_VFNMSUBPS,
21239  IX86_BUILTIN_VFNMSUBPD,
21240  IX86_BUILTIN_VFMADDPS256,
21241  IX86_BUILTIN_VFMADDPD256,
21242  IX86_BUILTIN_VFMSUBPS256,
21243  IX86_BUILTIN_VFMSUBPD256,
21244  IX86_BUILTIN_VFMADDSUBPS256,
21245  IX86_BUILTIN_VFMADDSUBPD256,
21246  IX86_BUILTIN_VFMSUBADDPS256,
21247  IX86_BUILTIN_VFMSUBADDPD256,
21248  IX86_BUILTIN_VFNMADDPS256,
21249  IX86_BUILTIN_VFNMADDPD256,
21250  IX86_BUILTIN_VFNMSUBPS256,
21251  IX86_BUILTIN_VFNMSUBPD256,
21252
21253  IX86_BUILTIN_VPCMOV,
21254  IX86_BUILTIN_VPCMOV_V2DI,
21255  IX86_BUILTIN_VPCMOV_V4SI,
21256  IX86_BUILTIN_VPCMOV_V8HI,
21257  IX86_BUILTIN_VPCMOV_V16QI,
21258  IX86_BUILTIN_VPCMOV_V4SF,
21259  IX86_BUILTIN_VPCMOV_V2DF,
21260  IX86_BUILTIN_VPCMOV256,
21261  IX86_BUILTIN_VPCMOV_V4DI256,
21262  IX86_BUILTIN_VPCMOV_V8SI256,
21263  IX86_BUILTIN_VPCMOV_V16HI256,
21264  IX86_BUILTIN_VPCMOV_V32QI256,
21265  IX86_BUILTIN_VPCMOV_V8SF256,
21266  IX86_BUILTIN_VPCMOV_V4DF256,
21267
21268  IX86_BUILTIN_VPPERM,
21269
21270  IX86_BUILTIN_VPMACSSWW,
21271  IX86_BUILTIN_VPMACSWW,
21272  IX86_BUILTIN_VPMACSSWD,
21273  IX86_BUILTIN_VPMACSWD,
21274  IX86_BUILTIN_VPMACSSDD,
21275  IX86_BUILTIN_VPMACSDD,
21276  IX86_BUILTIN_VPMACSSDQL,
21277  IX86_BUILTIN_VPMACSSDQH,
21278  IX86_BUILTIN_VPMACSDQL,
21279  IX86_BUILTIN_VPMACSDQH,
21280  IX86_BUILTIN_VPMADCSSWD,
21281  IX86_BUILTIN_VPMADCSWD,
21282
21283  IX86_BUILTIN_VPHADDBW,
21284  IX86_BUILTIN_VPHADDBD,
21285  IX86_BUILTIN_VPHADDBQ,
21286  IX86_BUILTIN_VPHADDWD,
21287  IX86_BUILTIN_VPHADDWQ,
21288  IX86_BUILTIN_VPHADDDQ,
21289  IX86_BUILTIN_VPHADDUBW,
21290  IX86_BUILTIN_VPHADDUBD,
21291  IX86_BUILTIN_VPHADDUBQ,
21292  IX86_BUILTIN_VPHADDUWD,
21293  IX86_BUILTIN_VPHADDUWQ,
21294  IX86_BUILTIN_VPHADDUDQ,
21295  IX86_BUILTIN_VPHSUBBW,
21296  IX86_BUILTIN_VPHSUBWD,
21297  IX86_BUILTIN_VPHSUBDQ,
21298
21299  IX86_BUILTIN_VPROTB,
21300  IX86_BUILTIN_VPROTW,
21301  IX86_BUILTIN_VPROTD,
21302  IX86_BUILTIN_VPROTQ,
21303  IX86_BUILTIN_VPROTB_IMM,
21304  IX86_BUILTIN_VPROTW_IMM,
21305  IX86_BUILTIN_VPROTD_IMM,
21306  IX86_BUILTIN_VPROTQ_IMM,
21307
21308  IX86_BUILTIN_VPSHLB,
21309  IX86_BUILTIN_VPSHLW,
21310  IX86_BUILTIN_VPSHLD,
21311  IX86_BUILTIN_VPSHLQ,
21312  IX86_BUILTIN_VPSHAB,
21313  IX86_BUILTIN_VPSHAW,
21314  IX86_BUILTIN_VPSHAD,
21315  IX86_BUILTIN_VPSHAQ,
21316
21317  IX86_BUILTIN_VFRCZSS,
21318  IX86_BUILTIN_VFRCZSD,
21319  IX86_BUILTIN_VFRCZPS,
21320  IX86_BUILTIN_VFRCZPD,
21321  IX86_BUILTIN_VFRCZPS256,
21322  IX86_BUILTIN_VFRCZPD256,
21323
21324  IX86_BUILTIN_VPCOMEQUB,
21325  IX86_BUILTIN_VPCOMNEUB,
21326  IX86_BUILTIN_VPCOMLTUB,
21327  IX86_BUILTIN_VPCOMLEUB,
21328  IX86_BUILTIN_VPCOMGTUB,
21329  IX86_BUILTIN_VPCOMGEUB,
21330  IX86_BUILTIN_VPCOMFALSEUB,
21331  IX86_BUILTIN_VPCOMTRUEUB,
21332
21333  IX86_BUILTIN_VPCOMEQUW,
21334  IX86_BUILTIN_VPCOMNEUW,
21335  IX86_BUILTIN_VPCOMLTUW,
21336  IX86_BUILTIN_VPCOMLEUW,
21337  IX86_BUILTIN_VPCOMGTUW,
21338  IX86_BUILTIN_VPCOMGEUW,
21339  IX86_BUILTIN_VPCOMFALSEUW,
21340  IX86_BUILTIN_VPCOMTRUEUW,
21341
21342  IX86_BUILTIN_VPCOMEQUD,
21343  IX86_BUILTIN_VPCOMNEUD,
21344  IX86_BUILTIN_VPCOMLTUD,
21345  IX86_BUILTIN_VPCOMLEUD,
21346  IX86_BUILTIN_VPCOMGTUD,
21347  IX86_BUILTIN_VPCOMGEUD,
21348  IX86_BUILTIN_VPCOMFALSEUD,
21349  IX86_BUILTIN_VPCOMTRUEUD,
21350
21351  IX86_BUILTIN_VPCOMEQUQ,
21352  IX86_BUILTIN_VPCOMNEUQ,
21353  IX86_BUILTIN_VPCOMLTUQ,
21354  IX86_BUILTIN_VPCOMLEUQ,
21355  IX86_BUILTIN_VPCOMGTUQ,
21356  IX86_BUILTIN_VPCOMGEUQ,
21357  IX86_BUILTIN_VPCOMFALSEUQ,
21358  IX86_BUILTIN_VPCOMTRUEUQ,
21359
21360  IX86_BUILTIN_VPCOMEQB,
21361  IX86_BUILTIN_VPCOMNEB,
21362  IX86_BUILTIN_VPCOMLTB,
21363  IX86_BUILTIN_VPCOMLEB,
21364  IX86_BUILTIN_VPCOMGTB,
21365  IX86_BUILTIN_VPCOMGEB,
21366  IX86_BUILTIN_VPCOMFALSEB,
21367  IX86_BUILTIN_VPCOMTRUEB,
21368
21369  IX86_BUILTIN_VPCOMEQW,
21370  IX86_BUILTIN_VPCOMNEW,
21371  IX86_BUILTIN_VPCOMLTW,
21372  IX86_BUILTIN_VPCOMLEW,
21373  IX86_BUILTIN_VPCOMGTW,
21374  IX86_BUILTIN_VPCOMGEW,
21375  IX86_BUILTIN_VPCOMFALSEW,
21376  IX86_BUILTIN_VPCOMTRUEW,
21377
21378  IX86_BUILTIN_VPCOMEQD,
21379  IX86_BUILTIN_VPCOMNED,
21380  IX86_BUILTIN_VPCOMLTD,
21381  IX86_BUILTIN_VPCOMLED,
21382  IX86_BUILTIN_VPCOMGTD,
21383  IX86_BUILTIN_VPCOMGED,
21384  IX86_BUILTIN_VPCOMFALSED,
21385  IX86_BUILTIN_VPCOMTRUED,
21386
21387  IX86_BUILTIN_VPCOMEQQ,
21388  IX86_BUILTIN_VPCOMNEQ,
21389  IX86_BUILTIN_VPCOMLTQ,
21390  IX86_BUILTIN_VPCOMLEQ,
21391  IX86_BUILTIN_VPCOMGTQ,
21392  IX86_BUILTIN_VPCOMGEQ,
21393  IX86_BUILTIN_VPCOMFALSEQ,
21394  IX86_BUILTIN_VPCOMTRUEQ,
21395
21396  /* LWP instructions.  */
21397  IX86_BUILTIN_LLWPCB,
21398  IX86_BUILTIN_SLWPCB,
21399  IX86_BUILTIN_LWPVAL32,
21400  IX86_BUILTIN_LWPVAL64,
21401  IX86_BUILTIN_LWPINS32,
21402  IX86_BUILTIN_LWPINS64,
21403
21404  IX86_BUILTIN_CLZS,
21405
21406  IX86_BUILTIN_MAX
21407};
21408
21409/* Table for the ix86 builtin decls.  */
21410static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
21411
21412/* Table of all of the builtin functions that are possible with different ISA's
21413   but are waiting to be built until a function is declared to use that
21414   ISA.  */
21415struct builtin_isa {
21416  const char *name;		/* function name */
21417  enum ix86_builtin_func_type tcode; /* type to use in the declaration */
21418  int isa;			/* isa_flags this builtin is defined for */
21419  bool const_p;			/* true if the declaration is constant */
21420  bool set_and_not_built_p;
21421};
21422
21423static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
21424
21425
21426/* Add an ix86 target builtin function with CODE, NAME and TYPE.  Save the MASK
21427   of which isa_flags to use in the ix86_builtins_isa array.  Stores the
21428   function decl in the ix86_builtins array.  Returns the function decl or
21429   NULL_TREE, if the builtin was not added.
21430
21431   If the front end has a special hook for builtin functions, delay adding
21432   builtin functions that aren't in the current ISA until the ISA is changed
21433   with function specific optimization.  Doing so, can save about 300K for the
21434   default compiler.  When the builtin is expanded, check at that time whether
21435   it is valid.
21436
21437   If the front end doesn't have a special hook, record all builtins, even if
21438   it isn't an instruction set in the current ISA in case the user uses
21439   function specific options for a different ISA, so that we don't get scope
21440   errors if a builtin is added in the middle of a function scope.  */
21441
21442static inline tree
21443def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
21444	     enum ix86_builtins code)
21445{
21446  tree decl = NULL_TREE;
21447
21448  if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
21449    {
21450      ix86_builtins_isa[(int) code].isa = mask;
21451
21452      mask &= ~OPTION_MASK_ISA_64BIT;
21453      if (mask == 0
21454	  || (mask & ix86_isa_flags) != 0
21455	  || (lang_hooks.builtin_function
21456	      == lang_hooks.builtin_function_ext_scope))
21457
21458	{
21459	  tree type = ix86_get_builtin_func_type (tcode);
21460	  decl = add_builtin_function (name, type, code, BUILT_IN_MD,
21461				       NULL, NULL_TREE);
21462	  ix86_builtins[(int) code] = decl;
21463	  ix86_builtins_isa[(int) code].set_and_not_built_p = false;
21464	}
21465      else
21466	{
21467	  ix86_builtins[(int) code] = NULL_TREE;
21468	  ix86_builtins_isa[(int) code].tcode = tcode;
21469	  ix86_builtins_isa[(int) code].name = name;
21470	  ix86_builtins_isa[(int) code].const_p = false;
21471	  ix86_builtins_isa[(int) code].set_and_not_built_p = true;
21472	}
21473    }
21474
21475  return decl;
21476}
21477
21478/* Like def_builtin, but also marks the function decl "const".  */
21479
21480static inline tree
21481def_builtin_const (int mask, const char *name,
21482		   enum ix86_builtin_func_type tcode, enum ix86_builtins code)
21483{
21484  tree decl = def_builtin (mask, name, tcode, code);
21485  if (decl)
21486    TREE_READONLY (decl) = 1;
21487  else
21488    ix86_builtins_isa[(int) code].const_p = true;
21489
21490  return decl;
21491}
21492
21493/* Add any new builtin functions for a given ISA that may not have been
21494   declared.  This saves a bit of space compared to adding all of the
21495   declarations to the tree, even if we didn't use them.  */
21496
21497static void
21498ix86_add_new_builtins (int isa)
21499{
21500  int i;
21501
21502  for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
21503    {
21504      if ((ix86_builtins_isa[i].isa & isa) != 0
21505	  && ix86_builtins_isa[i].set_and_not_built_p)
21506	{
21507	  tree decl, type;
21508
21509	  /* Don't define the builtin again.  */
21510	  ix86_builtins_isa[i].set_and_not_built_p = false;
21511
21512	  type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
21513	  decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
21514						 type, i, BUILT_IN_MD, NULL,
21515						 NULL_TREE);
21516
21517	  ix86_builtins[i] = decl;
21518	  if (ix86_builtins_isa[i].const_p)
21519	    TREE_READONLY (decl) = 1;
21520	}
21521    }
21522}
21523
21524/* Bits for builtin_description.flag.  */
21525
21526/* Set when we don't support the comparison natively, and should
21527   swap_comparison in order to support it.  */
21528#define BUILTIN_DESC_SWAP_OPERANDS	1
21529
21530struct builtin_description
21531{
21532  const unsigned int mask;
21533  const enum insn_code icode;
21534  const char *const name;
21535  const enum ix86_builtins code;
21536  const enum rtx_code comparison;
21537  const int flag;
21538};
21539
21540static const struct builtin_description bdesc_comi[] =
21541{
21542  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21543  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21544  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21545  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21546  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21547  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21548  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21549  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21550  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21551  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21552  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21553  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21554  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21555  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21556  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21557  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21558  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21559  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21560  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21561  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21562  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21563  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21564  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21565  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21566};
21567
21568static const struct builtin_description bdesc_pcmpestr[] =
21569{
21570  /* SSE4.2 */
21571  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21572  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21573  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21574  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21575  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21576  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21577  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21578};
21579
21580static const struct builtin_description bdesc_pcmpistr[] =
21581{
21582  /* SSE4.2 */
21583  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21584  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21585  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21586  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21587  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21588  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21589  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21590};
21591
21592/* Special builtins with variable number of arguments.  */
21593static const struct builtin_description bdesc_special_args[] =
21594{
21595  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
21596  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
21597
21598  /* MMX */
21599  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21600
21601  /* 3DNow! */
21602  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21603
21604  /* SSE */
21605  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21606  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21607  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21608
21609  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21610  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21611  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21612  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21613
21614  /* SSE or 3DNow!A  */
21615  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21616  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
21617
21618  /* SSE2 */
21619  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21620  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21621  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21622  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21623  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21624  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21625  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21626  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21627  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21628
21629  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21630  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21631
21632  /* SSE3 */
21633  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21634
21635  /* SSE4.1 */
21636  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21637
21638  /* SSE4A */
21639  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21640  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21641
21642  /* AVX */
21643  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21644  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21645
21646  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21647  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21648  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21649  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21650  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21651
21652  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21653  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21654  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21655  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21656  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21657  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21658  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21659
21660  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21661  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21662  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21663
21664  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
21665  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
21666  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
21667  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
21668  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
21669  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
21670  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
21671  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
21672
21673  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
21674  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
21675  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
21676  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
21677  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
21678  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
21679
21680};
21681
21682/* Builtins with variable number of arguments.  */
21683static const struct builtin_description bdesc_args[] =
21684{
21685  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
21686  { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
21687  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
21688  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21689  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21690  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21691  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21692
21693  /* MMX */
21694  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21695  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21696  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21697  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21698  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21699  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21700
21701  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21702  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21703  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21704  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21705  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21706  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21707  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21708  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21709
21710  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21711  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21712
21713  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21714  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21715  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21716  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21717
21718  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21719  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21720  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21721  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21722  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21723  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21724
21725  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21726  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21727  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21728  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21729  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21730  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21731
21732  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21733  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21734  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21735
21736  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21737
21738  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21739  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21740  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21741  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21742  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21743  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21744
21745  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21746  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21747  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21748  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21749  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21750  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21751
21752  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21753  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21754  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21755  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21756
21757  /* 3DNow! */
21758  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21759  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21760  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21761  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21762
21763  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21764  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21765  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21766  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21767  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21768  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21769  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21770  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21771  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21772  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21773  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21774  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21775  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21776  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21777  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21778
21779  /* 3DNow!A */
21780  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21781  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21782  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21783  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21784  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21785  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21786
21787  /* SSE */
21788  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21789  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21790  { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21791  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21792  { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21793  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21794  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21795  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21796  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21797  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21798  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21799  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21800
21801  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21802
21803  { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21804  { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21805  { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21806  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21807  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21808  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21809  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21810  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21811
21812  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21813  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21814  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21815  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21816  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21817  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21818  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21819  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21820  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21821  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21822  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21823  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21824  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21825  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21826  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21827  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21828  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21829  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21830  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21831  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21832  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21833  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21834
21835  { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21836  { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21837  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21838  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21839
21840  { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21841  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21842  { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21843  { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21844
21845  { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3,  "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21846
21847  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21848  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21849  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21850  { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21851  { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21852
21853  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21854  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21855  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21856
21857  { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21858
21859  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21860  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21861  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21862
21863  /* SSE MMX or 3Dnow!A */
21864  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21865  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21866  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21867
21868  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21869  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21870  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21871  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21872
21873  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21874  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21875
21876  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21877
21878  /* SSE2 */
21879  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21880
21881  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
21882  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
21883  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
21884  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
21885  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
21886  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21887  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
21888  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
21889  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
21890  { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
21891  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
21892  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
21893
21894  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF  },
21895  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21896  { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21897  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21898  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21899  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21900
21901  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21902  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21903  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21904  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21905  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21906
21907  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21908
21909  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21910  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21911  { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21912  { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21913
21914  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21915  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21916  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21917
21918  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21919  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21920  { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21921  { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21922  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21923  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21924  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21925  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21926
21927  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21928  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21929  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21930  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21931  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21932  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21933  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21934  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21935  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21936  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21937  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21938  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21939  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21940  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21941  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21942  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21943  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21944  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21945  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21946  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21947
21948  { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21949  { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21950  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21951  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21952
21953  { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21954  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21955  { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21956  { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21957
21958  { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3,  "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21959
21960  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21961  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21962  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21963
21964  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21965
21966  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21967  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21968  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21969  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21970  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21971  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21972  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21973  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21974
21975  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21976  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21977  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21978  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21979  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21980  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21981  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21982  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21983
21984  { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21985  { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21986
21987  { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21988  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21989  { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21990  { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21991
21992  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21993  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21994
21995  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21996  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21997  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
21998  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21999  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22000  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
22001
22002  { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22003  { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22004  { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22005  { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22006
22007  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22008  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI  },
22009  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN,  (int) V4SI_FTYPE_V4SI_V4SI },
22010  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22011  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22012  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22013  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22014  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22015
22016  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22017  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22018  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22019
22020  { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22021  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
22022
22023  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
22024  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22025
22026  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
22027
22028  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
22029  { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
22030  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
22031  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
22032
22033  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
22034  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22035  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22036  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22037  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22038  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22039  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22040
22041  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
22042  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22043  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22044  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22045  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22046  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22047  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22048
22049  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22050  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22051  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22052  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22053
22054  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
22055  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22056  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22057
22058  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
22059
22060  { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
22061  { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
22062
22063  { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22064
22065  /* SSE2 MMX */
22066  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22067  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22068
22069  /* SSE3 */
22070  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
22071  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22072
22073  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22074  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22075  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22076  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22077  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22078  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22079
22080  /* SSSE3 */
22081  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
22082  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
22083  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22084  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
22085  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
22086  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22087
22088  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22089  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22090  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22091  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22092  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22093  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22094  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22095  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22096  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22097  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22098  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22099  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22100  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
22101  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
22102  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22103  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22104  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22105  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22106  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22107  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22108  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22109  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22110  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22111  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22112
22113  /* SSSE3.  */
22114  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
22115  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
22116
22117  /* SSE4.1 */
22118  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22119  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22120  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
22121  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
22122  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22123  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22124  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22125  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
22126  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22127  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
22128
22129  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22130  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22131  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22132  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22133  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22134  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22135  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22136  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22137  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22138  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22139  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22140  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22141  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22142
22143  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22144  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22145  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22146  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22147  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22148  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22149  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22150  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22151  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22152  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22153  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22154  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22155
22156  /* SSE4.1 */
22157  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22158  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22159  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22160  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22161
22162  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22163  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22164  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22165
22166  /* SSE4.2 */
22167  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22168  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
22169  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
22170  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
22171  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
22172
22173  /* SSE4A */
22174  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
22175  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
22176  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
22177  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22178
22179  /* AES */
22180  { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
22181  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22182
22183  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22184  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22185  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22186  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22187
22188  /* PCLMUL */
22189  { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
22190
22191  /* AVX */
22192  { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22193  { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22194  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22195  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22196  { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22197  { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22198  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22199  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22200  { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22201  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22202  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22203  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22204  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22205  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22206  { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22207  { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22208  { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22209  { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22210  { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22211  { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22212  { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22213  { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22214  { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22215  { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22216  { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22217  { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22218
22219  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
22220  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
22221  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
22222  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
22223
22224  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22225  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22226  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
22227  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
22228  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22229  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22230  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22231  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22232  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22233  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22234  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22235  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22236  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22237  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
22238  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
22239  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
22240  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
22241  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
22242  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
22243  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22244  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
22245  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22246  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22247  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22248  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22249  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22250  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
22251  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22252  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22253  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22254  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22255  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
22256  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
22257  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
22258
22259  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22260  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22261  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22262
22263  { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22264  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22265  { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22266  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22267  { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22268
22269  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22270
22271  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22272  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22273
22274  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256,  "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22275  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256,  "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22276  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256,  "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22277  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256,  "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22278
22279  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
22280  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
22281  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
22282  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
22283  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
22284  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
22285
22286  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22287  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22288  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22289  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22290  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22291  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22292  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22293  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22294  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22295  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22296  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22297  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22298  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22299  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22300  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22301
22302  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF  },
22303  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
22304
22305  { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm,   "__builtin_clzs",   IX86_BUILTIN_CLZS,    UNKNOWN,     (int) UINT16_FTYPE_UINT16 },
22306};
22307
22308/* FMA4 and XOP.  */
22309#define MULTI_ARG_4_DF2_DI_I	V2DF_FTYPE_V2DF_V2DF_V2DI_INT
22310#define MULTI_ARG_4_DF2_DI_I1	V4DF_FTYPE_V4DF_V4DF_V4DI_INT
22311#define MULTI_ARG_4_SF2_SI_I	V4SF_FTYPE_V4SF_V4SF_V4SI_INT
22312#define MULTI_ARG_4_SF2_SI_I1	V8SF_FTYPE_V8SF_V8SF_V8SI_INT
22313#define MULTI_ARG_3_SF		V4SF_FTYPE_V4SF_V4SF_V4SF
22314#define MULTI_ARG_3_DF		V2DF_FTYPE_V2DF_V2DF_V2DF
22315#define MULTI_ARG_3_SF2		V8SF_FTYPE_V8SF_V8SF_V8SF
22316#define MULTI_ARG_3_DF2		V4DF_FTYPE_V4DF_V4DF_V4DF
22317#define MULTI_ARG_3_DI		V2DI_FTYPE_V2DI_V2DI_V2DI
22318#define MULTI_ARG_3_SI		V4SI_FTYPE_V4SI_V4SI_V4SI
22319#define MULTI_ARG_3_SI_DI	V4SI_FTYPE_V4SI_V4SI_V2DI
22320#define MULTI_ARG_3_HI		V8HI_FTYPE_V8HI_V8HI_V8HI
22321#define MULTI_ARG_3_HI_SI	V8HI_FTYPE_V8HI_V8HI_V4SI
22322#define MULTI_ARG_3_QI		V16QI_FTYPE_V16QI_V16QI_V16QI
22323#define MULTI_ARG_3_DI2		V4DI_FTYPE_V4DI_V4DI_V4DI
22324#define MULTI_ARG_3_SI2		V8SI_FTYPE_V8SI_V8SI_V8SI
22325#define MULTI_ARG_3_HI2		V16HI_FTYPE_V16HI_V16HI_V16HI
22326#define MULTI_ARG_3_QI2		V32QI_FTYPE_V32QI_V32QI_V32QI
22327#define MULTI_ARG_2_SF		V4SF_FTYPE_V4SF_V4SF
22328#define MULTI_ARG_2_DF		V2DF_FTYPE_V2DF_V2DF
22329#define MULTI_ARG_2_DI		V2DI_FTYPE_V2DI_V2DI
22330#define MULTI_ARG_2_SI		V4SI_FTYPE_V4SI_V4SI
22331#define MULTI_ARG_2_HI		V8HI_FTYPE_V8HI_V8HI
22332#define MULTI_ARG_2_QI		V16QI_FTYPE_V16QI_V16QI
22333#define MULTI_ARG_2_DI_IMM	V2DI_FTYPE_V2DI_SI
22334#define MULTI_ARG_2_SI_IMM	V4SI_FTYPE_V4SI_SI
22335#define MULTI_ARG_2_HI_IMM	V8HI_FTYPE_V8HI_SI
22336#define MULTI_ARG_2_QI_IMM	V16QI_FTYPE_V16QI_SI
22337#define MULTI_ARG_2_DI_CMP	V2DI_FTYPE_V2DI_V2DI_CMP
22338#define MULTI_ARG_2_SI_CMP	V4SI_FTYPE_V4SI_V4SI_CMP
22339#define MULTI_ARG_2_HI_CMP	V8HI_FTYPE_V8HI_V8HI_CMP
22340#define MULTI_ARG_2_QI_CMP	V16QI_FTYPE_V16QI_V16QI_CMP
22341#define MULTI_ARG_2_SF_TF	V4SF_FTYPE_V4SF_V4SF_TF
22342#define MULTI_ARG_2_DF_TF	V2DF_FTYPE_V2DF_V2DF_TF
22343#define MULTI_ARG_2_DI_TF	V2DI_FTYPE_V2DI_V2DI_TF
22344#define MULTI_ARG_2_SI_TF	V4SI_FTYPE_V4SI_V4SI_TF
22345#define MULTI_ARG_2_HI_TF	V8HI_FTYPE_V8HI_V8HI_TF
22346#define MULTI_ARG_2_QI_TF	V16QI_FTYPE_V16QI_V16QI_TF
22347#define MULTI_ARG_1_SF		V4SF_FTYPE_V4SF
22348#define MULTI_ARG_1_DF		V2DF_FTYPE_V2DF
22349#define MULTI_ARG_1_SF2		V8SF_FTYPE_V8SF
22350#define MULTI_ARG_1_DF2		V4DF_FTYPE_V4DF
22351#define MULTI_ARG_1_DI		V2DI_FTYPE_V2DI
22352#define MULTI_ARG_1_SI		V4SI_FTYPE_V4SI
22353#define MULTI_ARG_1_HI		V8HI_FTYPE_V8HI
22354#define MULTI_ARG_1_QI		V16QI_FTYPE_V16QI
22355#define MULTI_ARG_1_SI_DI	V2DI_FTYPE_V4SI
22356#define MULTI_ARG_1_HI_DI	V2DI_FTYPE_V8HI
22357#define MULTI_ARG_1_HI_SI	V4SI_FTYPE_V8HI
22358#define MULTI_ARG_1_QI_DI	V2DI_FTYPE_V16QI
22359#define MULTI_ARG_1_QI_SI	V4SI_FTYPE_V16QI
22360#define MULTI_ARG_1_QI_HI	V8HI_FTYPE_V16QI
22361
22362static const struct builtin_description bdesc_multi_arg[] =
22363{
22364  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4,     "__builtin_ia32_vfmaddss",    IX86_BUILTIN_VFMADDSS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
22365  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4,     "__builtin_ia32_vfmaddsd",    IX86_BUILTIN_VFMADDSD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
22366  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4,       "__builtin_ia32_vfmaddps",    IX86_BUILTIN_VFMADDPS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
22367  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4,       "__builtin_ia32_vfmaddpd",    IX86_BUILTIN_VFMADDPD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
22368  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4,     "__builtin_ia32_vfmsubss",    IX86_BUILTIN_VFMSUBSS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
22369  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4,     "__builtin_ia32_vfmsubsd",    IX86_BUILTIN_VFMSUBSD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
22370  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4,       "__builtin_ia32_vfmsubps",    IX86_BUILTIN_VFMSUBPS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
22371  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4,       "__builtin_ia32_vfmsubpd",    IX86_BUILTIN_VFMSUBPD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
22372
22373  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4,    "__builtin_ia32_vfnmaddss",   IX86_BUILTIN_VFNMADDSS,   UNKNOWN,      (int)MULTI_ARG_3_SF },
22374  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4,    "__builtin_ia32_vfnmaddsd",   IX86_BUILTIN_VFNMADDSD,   UNKNOWN,      (int)MULTI_ARG_3_DF },
22375  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4,      "__builtin_ia32_vfnmaddps",   IX86_BUILTIN_VFNMADDPS,   UNKNOWN,      (int)MULTI_ARG_3_SF },
22376  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4,      "__builtin_ia32_vfnmaddpd",   IX86_BUILTIN_VFNMADDPD,   UNKNOWN,      (int)MULTI_ARG_3_DF },
22377  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4,    "__builtin_ia32_vfnmsubss",   IX86_BUILTIN_VFNMSUBSS,   UNKNOWN,      (int)MULTI_ARG_3_SF },
22378  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4,    "__builtin_ia32_vfnmsubsd",   IX86_BUILTIN_VFNMSUBSD,   UNKNOWN,      (int)MULTI_ARG_3_DF },
22379  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4,      "__builtin_ia32_vfnmsubps",   IX86_BUILTIN_VFNMSUBPS,   UNKNOWN,      (int)MULTI_ARG_3_SF },
22380  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4,      "__builtin_ia32_vfnmsubpd",   IX86_BUILTIN_VFNMSUBPD,   UNKNOWN,      (int)MULTI_ARG_3_DF },
22381
22382  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4,	   "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
22383  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4,	   "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
22384  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4,	   "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
22385  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4,	   "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
22386
22387  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256,       "__builtin_ia32_vfmaddps256",    IX86_BUILTIN_VFMADDPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
22388  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256,       "__builtin_ia32_vfmaddpd256",    IX86_BUILTIN_VFMADDPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
22389  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256,       "__builtin_ia32_vfmsubps256",    IX86_BUILTIN_VFMSUBPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
22390  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256,       "__builtin_ia32_vfmsubpd256",    IX86_BUILTIN_VFMSUBPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
22391
22392  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256,      "__builtin_ia32_vfnmaddps256",   IX86_BUILTIN_VFNMADDPS256,   UNKNOWN,      (int)MULTI_ARG_3_SF2 },
22393  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256,      "__builtin_ia32_vfnmaddpd256",   IX86_BUILTIN_VFNMADDPD256,   UNKNOWN,      (int)MULTI_ARG_3_DF2 },
22394  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256,      "__builtin_ia32_vfnmsubps256",   IX86_BUILTIN_VFNMSUBPS256,   UNKNOWN,      (int)MULTI_ARG_3_SF2 },
22395  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256,      "__builtin_ia32_vfnmsubpd256",   IX86_BUILTIN_VFNMSUBPD256,   UNKNOWN,      (int)MULTI_ARG_3_DF2 },
22396
22397  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4,	   "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
22398  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4,	   "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
22399  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4,	   "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
22400  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4,	   "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
22401
22402  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov",      IX86_BUILTIN_VPCMOV,	 UNKNOWN,      (int)MULTI_ARG_3_DI },
22403  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN,      (int)MULTI_ARG_3_DI },
22404  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si,        "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN,      (int)MULTI_ARG_3_SI },
22405  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi,        "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN,      (int)MULTI_ARG_3_HI },
22406  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi,       "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN,      (int)MULTI_ARG_3_QI },
22407  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df,        "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN,      (int)MULTI_ARG_3_DF },
22408  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf,        "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN,      (int)MULTI_ARG_3_SF },
22409
22410  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov256",       IX86_BUILTIN_VPCMOV256,       UNKNOWN,      (int)MULTI_ARG_3_DI2 },
22411  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov_v4di256",  IX86_BUILTIN_VPCMOV_V4DI256,  UNKNOWN,      (int)MULTI_ARG_3_DI2 },
22412  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256,        "__builtin_ia32_vpcmov_v8si256",  IX86_BUILTIN_VPCMOV_V8SI256,  UNKNOWN,      (int)MULTI_ARG_3_SI2 },
22413  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256,       "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN,      (int)MULTI_ARG_3_HI2 },
22414  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256,       "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN,      (int)MULTI_ARG_3_QI2 },
22415  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256,        "__builtin_ia32_vpcmov_v4df256",  IX86_BUILTIN_VPCMOV_V4DF256,  UNKNOWN,      (int)MULTI_ARG_3_DF2 },
22416  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256,        "__builtin_ia32_vpcmov_v8sf256",  IX86_BUILTIN_VPCMOV_V8SF256,  UNKNOWN,      (int)MULTI_ARG_3_SF2 },
22417
22418  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm,             "__builtin_ia32_vpperm",      IX86_BUILTIN_VPPERM,      UNKNOWN,      (int)MULTI_ARG_3_QI },
22419
22420  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww,          "__builtin_ia32_vpmacssww",   IX86_BUILTIN_VPMACSSWW,   UNKNOWN,      (int)MULTI_ARG_3_HI },
22421  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww,           "__builtin_ia32_vpmacsww",    IX86_BUILTIN_VPMACSWW,    UNKNOWN,      (int)MULTI_ARG_3_HI },
22422  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd,          "__builtin_ia32_vpmacsswd",   IX86_BUILTIN_VPMACSSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
22423  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd,           "__builtin_ia32_vpmacswd",    IX86_BUILTIN_VPMACSWD,    UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
22424  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd,          "__builtin_ia32_vpmacssdd",   IX86_BUILTIN_VPMACSSDD,   UNKNOWN,      (int)MULTI_ARG_3_SI },
22425  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd,           "__builtin_ia32_vpmacsdd",    IX86_BUILTIN_VPMACSDD,    UNKNOWN,      (int)MULTI_ARG_3_SI },
22426  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql,         "__builtin_ia32_vpmacssdql",  IX86_BUILTIN_VPMACSSDQL,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
22427  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh,         "__builtin_ia32_vpmacssdqh",  IX86_BUILTIN_VPMACSSDQH,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
22428  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql,          "__builtin_ia32_vpmacsdql",   IX86_BUILTIN_VPMACSDQL,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
22429  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh,          "__builtin_ia32_vpmacsdqh",   IX86_BUILTIN_VPMACSDQH,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
22430  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd,         "__builtin_ia32_vpmadcsswd",  IX86_BUILTIN_VPMADCSSWD,  UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
22431  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd,          "__builtin_ia32_vpmadcswd",   IX86_BUILTIN_VPMADCSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
22432
22433  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3,        "__builtin_ia32_vprotq",      IX86_BUILTIN_VPROTQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
22434  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3,        "__builtin_ia32_vprotd",      IX86_BUILTIN_VPROTD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
22435  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3,        "__builtin_ia32_vprotw",      IX86_BUILTIN_VPROTW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
22436  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3,       "__builtin_ia32_vprotb",      IX86_BUILTIN_VPROTB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
22437  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3,         "__builtin_ia32_vprotqi",     IX86_BUILTIN_VPROTQ_IMM,  UNKNOWN,      (int)MULTI_ARG_2_DI_IMM },
22438  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3,         "__builtin_ia32_vprotdi",     IX86_BUILTIN_VPROTD_IMM,  UNKNOWN,      (int)MULTI_ARG_2_SI_IMM },
22439  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3,         "__builtin_ia32_vprotwi",     IX86_BUILTIN_VPROTW_IMM,  UNKNOWN,      (int)MULTI_ARG_2_HI_IMM },
22440  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3,        "__builtin_ia32_vprotbi",     IX86_BUILTIN_VPROTB_IMM,  UNKNOWN,      (int)MULTI_ARG_2_QI_IMM },
22441  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3,         "__builtin_ia32_vpshaq",      IX86_BUILTIN_VPSHAQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
22442  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3,         "__builtin_ia32_vpshad",      IX86_BUILTIN_VPSHAD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
22443  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3,         "__builtin_ia32_vpshaw",      IX86_BUILTIN_VPSHAW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
22444  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3,        "__builtin_ia32_vpshab",      IX86_BUILTIN_VPSHAB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
22445  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3,         "__builtin_ia32_vpshlq",      IX86_BUILTIN_VPSHLQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
22446  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3,         "__builtin_ia32_vpshld",      IX86_BUILTIN_VPSHLD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
22447  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3,         "__builtin_ia32_vpshlw",      IX86_BUILTIN_VPSHLW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
22448  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3,        "__builtin_ia32_vpshlb",      IX86_BUILTIN_VPSHLB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
22449
22450  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2,       "__builtin_ia32_vfrczss",     IX86_BUILTIN_VFRCZSS,     UNKNOWN,      (int)MULTI_ARG_2_SF },
22451  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2,       "__builtin_ia32_vfrczsd",     IX86_BUILTIN_VFRCZSD,     UNKNOWN,      (int)MULTI_ARG_2_DF },
22452  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2,         "__builtin_ia32_vfrczps",     IX86_BUILTIN_VFRCZPS,     UNKNOWN,      (int)MULTI_ARG_1_SF },
22453  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2,         "__builtin_ia32_vfrczpd",     IX86_BUILTIN_VFRCZPD,     UNKNOWN,      (int)MULTI_ARG_1_DF },
22454  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256,         "__builtin_ia32_vfrczps256",  IX86_BUILTIN_VFRCZPS256,  UNKNOWN,      (int)MULTI_ARG_1_SF2 },
22455  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256,         "__builtin_ia32_vfrczpd256",  IX86_BUILTIN_VFRCZPD256,  UNKNOWN,      (int)MULTI_ARG_1_DF2 },
22456
22457  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw,           "__builtin_ia32_vphaddbw",    IX86_BUILTIN_VPHADDBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
22458  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd,           "__builtin_ia32_vphaddbd",    IX86_BUILTIN_VPHADDBD,    UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
22459  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq,           "__builtin_ia32_vphaddbq",    IX86_BUILTIN_VPHADDBQ,    UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
22460  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd,           "__builtin_ia32_vphaddwd",    IX86_BUILTIN_VPHADDWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
22461  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq,           "__builtin_ia32_vphaddwq",    IX86_BUILTIN_VPHADDWQ,    UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
22462  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq,           "__builtin_ia32_vphadddq",    IX86_BUILTIN_VPHADDDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
22463  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw,          "__builtin_ia32_vphaddubw",   IX86_BUILTIN_VPHADDUBW,   UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
22464  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd,          "__builtin_ia32_vphaddubd",   IX86_BUILTIN_VPHADDUBD,   UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
22465  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq,          "__builtin_ia32_vphaddubq",   IX86_BUILTIN_VPHADDUBQ,   UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
22466  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd,          "__builtin_ia32_vphadduwd",   IX86_BUILTIN_VPHADDUWD,   UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
22467  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq,          "__builtin_ia32_vphadduwq",   IX86_BUILTIN_VPHADDUWQ,   UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
22468  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq,          "__builtin_ia32_vphaddudq",   IX86_BUILTIN_VPHADDUDQ,   UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
22469  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw,           "__builtin_ia32_vphsubbw",    IX86_BUILTIN_VPHSUBBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
22470  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd,           "__builtin_ia32_vphsubwd",    IX86_BUILTIN_VPHSUBWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
22471  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq,           "__builtin_ia32_vphsubdq",    IX86_BUILTIN_VPHSUBDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
22472
22473  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomeqb",    IX86_BUILTIN_VPCOMEQB,    EQ,           (int)MULTI_ARG_2_QI_CMP },
22474  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneb",    IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
22475  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneqb",   IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
22476  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomltb",    IX86_BUILTIN_VPCOMLTB,    LT,           (int)MULTI_ARG_2_QI_CMP },
22477  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomleb",    IX86_BUILTIN_VPCOMLEB,    LE,           (int)MULTI_ARG_2_QI_CMP },
22478  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgtb",    IX86_BUILTIN_VPCOMGTB,    GT,           (int)MULTI_ARG_2_QI_CMP },
22479  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgeb",    IX86_BUILTIN_VPCOMGEB,    GE,           (int)MULTI_ARG_2_QI_CMP },
22480
22481  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomeqw",    IX86_BUILTIN_VPCOMEQW,    EQ,           (int)MULTI_ARG_2_HI_CMP },
22482  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomnew",    IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
22483  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomneqw",   IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
22484  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomltw",    IX86_BUILTIN_VPCOMLTW,    LT,           (int)MULTI_ARG_2_HI_CMP },
22485  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomlew",    IX86_BUILTIN_VPCOMLEW,    LE,           (int)MULTI_ARG_2_HI_CMP },
22486  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgtw",    IX86_BUILTIN_VPCOMGTW,    GT,           (int)MULTI_ARG_2_HI_CMP },
22487  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgew",    IX86_BUILTIN_VPCOMGEW,    GE,           (int)MULTI_ARG_2_HI_CMP },
22488
22489  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomeqd",    IX86_BUILTIN_VPCOMEQD,    EQ,           (int)MULTI_ARG_2_SI_CMP },
22490  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomned",    IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
22491  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomneqd",   IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
22492  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomltd",    IX86_BUILTIN_VPCOMLTD,    LT,           (int)MULTI_ARG_2_SI_CMP },
22493  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomled",    IX86_BUILTIN_VPCOMLED,    LE,           (int)MULTI_ARG_2_SI_CMP },
22494  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomgtd",    IX86_BUILTIN_VPCOMGTD,    GT,           (int)MULTI_ARG_2_SI_CMP },
22495  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomged",    IX86_BUILTIN_VPCOMGED,    GE,           (int)MULTI_ARG_2_SI_CMP },
22496
22497  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomeqq",    IX86_BUILTIN_VPCOMEQQ,    EQ,           (int)MULTI_ARG_2_DI_CMP },
22498  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneq",    IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
22499  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneqq",   IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
22500  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomltq",    IX86_BUILTIN_VPCOMLTQ,    LT,           (int)MULTI_ARG_2_DI_CMP },
22501  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomleq",    IX86_BUILTIN_VPCOMLEQ,    LE,           (int)MULTI_ARG_2_DI_CMP },
22502  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgtq",    IX86_BUILTIN_VPCOMGTQ,    GT,           (int)MULTI_ARG_2_DI_CMP },
22503  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgeq",    IX86_BUILTIN_VPCOMGEQ,    GE,           (int)MULTI_ARG_2_DI_CMP },
22504
22505  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb",   IX86_BUILTIN_VPCOMEQUB,   EQ,           (int)MULTI_ARG_2_QI_CMP },
22506  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub",   IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
22507  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb",  IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
22508  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub",   IX86_BUILTIN_VPCOMLTUB,   LTU,          (int)MULTI_ARG_2_QI_CMP },
22509  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub",   IX86_BUILTIN_VPCOMLEUB,   LEU,          (int)MULTI_ARG_2_QI_CMP },
22510  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub",   IX86_BUILTIN_VPCOMGTUB,   GTU,          (int)MULTI_ARG_2_QI_CMP },
22511  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub",   IX86_BUILTIN_VPCOMGEUB,   GEU,          (int)MULTI_ARG_2_QI_CMP },
22512
22513  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw",   IX86_BUILTIN_VPCOMEQUW,   EQ,           (int)MULTI_ARG_2_HI_CMP },
22514  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw",   IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
22515  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw",  IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
22516  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomltuw",   IX86_BUILTIN_VPCOMLTUW,   LTU,          (int)MULTI_ARG_2_HI_CMP },
22517  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomleuw",   IX86_BUILTIN_VPCOMLEUW,   LEU,          (int)MULTI_ARG_2_HI_CMP },
22518  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgtuw",   IX86_BUILTIN_VPCOMGTUW,   GTU,          (int)MULTI_ARG_2_HI_CMP },
22519  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgeuw",   IX86_BUILTIN_VPCOMGEUW,   GEU,          (int)MULTI_ARG_2_HI_CMP },
22520
22521  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd",   IX86_BUILTIN_VPCOMEQUD,   EQ,           (int)MULTI_ARG_2_SI_CMP },
22522  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud",   IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
22523  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd",  IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
22524  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomltud",   IX86_BUILTIN_VPCOMLTUD,   LTU,          (int)MULTI_ARG_2_SI_CMP },
22525  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomleud",   IX86_BUILTIN_VPCOMLEUD,   LEU,          (int)MULTI_ARG_2_SI_CMP },
22526  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgtud",   IX86_BUILTIN_VPCOMGTUD,   GTU,          (int)MULTI_ARG_2_SI_CMP },
22527  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgeud",   IX86_BUILTIN_VPCOMGEUD,   GEU,          (int)MULTI_ARG_2_SI_CMP },
22528
22529  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq",   IX86_BUILTIN_VPCOMEQUQ,   EQ,           (int)MULTI_ARG_2_DI_CMP },
22530  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq",   IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
22531  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq",  IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
22532  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomltuq",   IX86_BUILTIN_VPCOMLTUQ,   LTU,          (int)MULTI_ARG_2_DI_CMP },
22533  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomleuq",   IX86_BUILTIN_VPCOMLEUQ,   LEU,          (int)MULTI_ARG_2_DI_CMP },
22534  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgtuq",   IX86_BUILTIN_VPCOMGTUQ,   GTU,          (int)MULTI_ARG_2_DI_CMP },
22535  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgeuq",   IX86_BUILTIN_VPCOMGEUQ,   GEU,          (int)MULTI_ARG_2_DI_CMP },
22536
22537  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
22538  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
22539  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
22540  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
22541  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
22542  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
22543  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
22544  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
22545
22546  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueb",  IX86_BUILTIN_VPCOMTRUEB,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
22547  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtruew",  IX86_BUILTIN_VPCOMTRUEW,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
22548  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrued",  IX86_BUILTIN_VPCOMTRUED,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
22549  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueq",  IX86_BUILTIN_VPCOMTRUEQ,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
22550  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
22551  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
22552  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
22553  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
22554
22555  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3,     "__builtin_ia32_vpermil2pd",  IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
22556  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3,     "__builtin_ia32_vpermil2ps",  IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
22557  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3,     "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
22558  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3,     "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
22559
22560};
22561
22562/* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22563   in the current target ISA to allow the user to compile particular modules
22564   with different target specific options that differ from the command line
22565   options.  */
22566static void
22567ix86_init_mmx_sse_builtins (void)
22568{
22569  const struct builtin_description * d;
22570  enum ix86_builtin_func_type ftype;
22571  size_t i;
22572
22573  /* Add all special builtins with variable number of operands.  */
22574  for (i = 0, d = bdesc_special_args;
22575       i < ARRAY_SIZE (bdesc_special_args);
22576       i++, d++)
22577    {
22578      if (d->name == 0)
22579	continue;
22580
22581      ftype = (enum ix86_builtin_func_type) d->flag;
22582      def_builtin (d->mask, d->name, ftype, d->code);
22583    }
22584
22585  /* Add all builtins with variable number of operands.  */
22586  for (i = 0, d = bdesc_args;
22587       i < ARRAY_SIZE (bdesc_args);
22588       i++, d++)
22589    {
22590      if (d->name == 0)
22591	continue;
22592
22593      ftype = (enum ix86_builtin_func_type) d->flag;
22594      def_builtin_const (d->mask, d->name, ftype, d->code);
22595    }
22596
22597  /* pcmpestr[im] insns.  */
22598  for (i = 0, d = bdesc_pcmpestr;
22599       i < ARRAY_SIZE (bdesc_pcmpestr);
22600       i++, d++)
22601    {
22602      if (d->code == IX86_BUILTIN_PCMPESTRM128)
22603	ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
22604      else
22605	ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
22606      def_builtin_const (d->mask, d->name, ftype, d->code);
22607    }
22608
22609  /* pcmpistr[im] insns.  */
22610  for (i = 0, d = bdesc_pcmpistr;
22611       i < ARRAY_SIZE (bdesc_pcmpistr);
22612       i++, d++)
22613    {
22614      if (d->code == IX86_BUILTIN_PCMPISTRM128)
22615	ftype = V16QI_FTYPE_V16QI_V16QI_INT;
22616      else
22617	ftype = INT_FTYPE_V16QI_V16QI_INT;
22618      def_builtin_const (d->mask, d->name, ftype, d->code);
22619    }
22620
22621  /* comi/ucomi insns.  */
22622  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
22623    {
22624      if (d->mask == OPTION_MASK_ISA_SSE2)
22625	ftype = INT_FTYPE_V2DF_V2DF;
22626      else
22627	ftype = INT_FTYPE_V4SF_V4SF;
22628      def_builtin_const (d->mask, d->name, ftype, d->code);
22629    }
22630
22631  /* SSE */
22632  def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
22633	       VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
22634  def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
22635	       UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
22636
22637  /* SSE or 3DNow!A */
22638  def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
22639	       "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
22640	       IX86_BUILTIN_MASKMOVQ);
22641
22642  /* SSE2 */
22643  def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
22644	       VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
22645
22646  def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
22647	       VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
22648  x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
22649			    VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
22650
22651  /* SSE3.  */
22652  def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
22653	       VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
22654  def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
22655	       VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
22656
22657  /* AES */
22658  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
22659		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
22660  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
22661		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
22662  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
22663		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
22664  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
22665		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
22666  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
22667		     V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
22668  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
22669		     V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
22670
22671  /* PCLMUL */
22672  def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
22673		     V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
22674
22675  /* MMX access to the vec_init patterns.  */
22676  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
22677		     V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
22678
22679  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
22680		     V4HI_FTYPE_HI_HI_HI_HI,
22681		     IX86_BUILTIN_VEC_INIT_V4HI);
22682
22683  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
22684		     V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
22685		     IX86_BUILTIN_VEC_INIT_V8QI);
22686
22687  /* Access to the vec_extract patterns.  */
22688  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
22689		     DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
22690  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
22691		     DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
22692  def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
22693		     FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
22694  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
22695		     SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
22696  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
22697		     HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
22698
22699  def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
22700		     "__builtin_ia32_vec_ext_v4hi",
22701		     HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
22702
22703  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
22704		     SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
22705
22706  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
22707		     QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
22708
22709  /* Access to the vec_set patterns.  */
22710  def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
22711		     "__builtin_ia32_vec_set_v2di",
22712		     V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
22713
22714  def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
22715		     V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
22716
22717  def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
22718		     V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
22719
22720  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
22721		     V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
22722
22723  def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
22724		     "__builtin_ia32_vec_set_v4hi",
22725		     V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
22726
22727  def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
22728		     V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
22729
22730  /* Add FMA4 multi-arg argument instructions */
22731  for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
22732    {
22733      if (d->name == 0)
22734	continue;
22735
22736      ftype = (enum ix86_builtin_func_type) d->flag;
22737      def_builtin_const (d->mask, d->name, ftype, d->code);
22738    }
22739}
22740
22741/* Internal method for ix86_init_builtins.  */
22742
22743static void
22744ix86_init_builtins_va_builtins_abi (void)
22745{
22746  tree ms_va_ref, sysv_va_ref;
22747  tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
22748  tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
22749  tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
22750  tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
22751
22752  if (!TARGET_64BIT)
22753    return;
22754  fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
22755  fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
22756  ms_va_ref = build_reference_type (ms_va_list_type_node);
22757  sysv_va_ref =
22758    build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
22759
22760  fnvoid_va_end_ms =
22761    build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22762  fnvoid_va_start_ms =
22763    build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22764  fnvoid_va_end_sysv =
22765    build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
22766  fnvoid_va_start_sysv =
22767    build_varargs_function_type_list (void_type_node, sysv_va_ref,
22768    				       NULL_TREE);
22769  fnvoid_va_copy_ms =
22770    build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
22771    			      NULL_TREE);
22772  fnvoid_va_copy_sysv =
22773    build_function_type_list (void_type_node, sysv_va_ref,
22774    			      sysv_va_ref, NULL_TREE);
22775
22776  add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
22777  			BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
22778  add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
22779  			BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
22780  add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
22781			BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
22782  add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
22783  			BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22784  add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
22785  			BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22786  add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
22787			BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22788}
22789
22790static void
22791ix86_init_builtin_types (void)
22792{
22793  tree float128_type_node, float80_type_node;
22794
22795  /* The __float80 type.  */
22796  float80_type_node = long_double_type_node;
22797  if (TYPE_MODE (float80_type_node) != XFmode)
22798    {
22799      /* The __float80 type.  */
22800      float80_type_node = make_node (REAL_TYPE);
22801
22802      TYPE_PRECISION (float80_type_node) = 80;
22803      layout_type (float80_type_node);
22804    }
22805  (*lang_hooks.types.register_builtin_type) (float80_type_node, "__float80");
22806
22807  /* The __float128 type.  */
22808  float128_type_node = make_node (REAL_TYPE);
22809  TYPE_PRECISION (float128_type_node) = 128;
22810  layout_type (float128_type_node);
22811  (*lang_hooks.types.register_builtin_type) (float128_type_node, "__float128");
22812
22813  /* This macro is built by i386-builtin-types.awk.  */
22814  DEFINE_BUILTIN_PRIMITIVE_TYPES;
22815}
22816
22817static void
22818ix86_init_builtins (void)
22819{
22820  tree t;
22821
22822  ix86_init_builtin_types ();
22823
22824  /* TFmode support builtins.  */
22825  def_builtin_const (0, "__builtin_infq",
22826		     FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
22827  def_builtin_const (0, "__builtin_huge_valq",
22828		     FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
22829
22830  /* We will expand them to normal call if SSE2 isn't available since
22831     they are used by libgcc. */
22832  t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
22833  t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
22834			    BUILT_IN_MD, "__fabstf2", NULL_TREE);
22835  TREE_READONLY (t) = 1;
22836  ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
22837
22838  t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
22839  t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
22840			    BUILT_IN_MD, "__copysigntf3", NULL_TREE);
22841  TREE_READONLY (t) = 1;
22842  ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
22843
22844  ix86_init_mmx_sse_builtins ();
22845
22846  if (TARGET_64BIT)
22847    ix86_init_builtins_va_builtins_abi ();
22848}
22849
22850/* Return the ix86 builtin for CODE.  */
22851
22852static tree
22853ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
22854{
22855  if (code >= IX86_BUILTIN_MAX)
22856    return error_mark_node;
22857
22858  return ix86_builtins[code];
22859}
22860
22861/* Errors in the source file can cause expand_expr to return const0_rtx
22862   where we expect a vector.  To avoid crashing, use one of the vector
22863   clear instructions.  */
22864static rtx
22865safe_vector_operand (rtx x, enum machine_mode mode)
22866{
22867  if (x == const0_rtx)
22868    x = CONST0_RTX (mode);
22869  return x;
22870}
22871
22872/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
22873
22874static rtx
22875ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
22876{
22877  rtx pat;
22878  tree arg0 = CALL_EXPR_ARG (exp, 0);
22879  tree arg1 = CALL_EXPR_ARG (exp, 1);
22880  rtx op0 = expand_normal (arg0);
22881  rtx op1 = expand_normal (arg1);
22882  enum machine_mode tmode = insn_data[icode].operand[0].mode;
22883  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22884  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
22885
22886  if (VECTOR_MODE_P (mode0))
22887    op0 = safe_vector_operand (op0, mode0);
22888  if (VECTOR_MODE_P (mode1))
22889    op1 = safe_vector_operand (op1, mode1);
22890
22891  if (optimize || !target
22892      || GET_MODE (target) != tmode
22893      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22894    target = gen_reg_rtx (tmode);
22895
22896  if (GET_MODE (op1) == SImode && mode1 == TImode)
22897    {
22898      rtx x = gen_reg_rtx (V4SImode);
22899      emit_insn (gen_sse2_loadd (x, op1));
22900      op1 = gen_lowpart (TImode, x);
22901    }
22902
22903  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22904    op0 = copy_to_mode_reg (mode0, op0);
22905  if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22906    op1 = copy_to_mode_reg (mode1, op1);
22907
22908  pat = GEN_FCN (icode) (target, op0, op1);
22909  if (! pat)
22910    return 0;
22911
22912  emit_insn (pat);
22913
22914  return target;
22915}
22916
22917/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns.  */
22918
22919static rtx
22920ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
22921			       enum ix86_builtin_func_type m_type,
22922			       enum rtx_code sub_code)
22923{
22924  rtx pat;
22925  int i;
22926  int nargs;
22927  bool comparison_p = false;
22928  bool tf_p = false;
22929  bool last_arg_constant = false;
22930  int num_memory = 0;
22931  struct {
22932    rtx op;
22933    enum machine_mode mode;
22934  } args[4];
22935
22936  enum machine_mode tmode = insn_data[icode].operand[0].mode;
22937
22938  switch (m_type)
22939    {
22940    case MULTI_ARG_4_DF2_DI_I:
22941    case MULTI_ARG_4_DF2_DI_I1:
22942    case MULTI_ARG_4_SF2_SI_I:
22943    case MULTI_ARG_4_SF2_SI_I1:
22944      nargs = 4;
22945      last_arg_constant = true;
22946      break;
22947
22948    case MULTI_ARG_3_SF:
22949    case MULTI_ARG_3_DF:
22950    case MULTI_ARG_3_SF2:
22951    case MULTI_ARG_3_DF2:
22952    case MULTI_ARG_3_DI:
22953    case MULTI_ARG_3_SI:
22954    case MULTI_ARG_3_SI_DI:
22955    case MULTI_ARG_3_HI:
22956    case MULTI_ARG_3_HI_SI:
22957    case MULTI_ARG_3_QI:
22958    case MULTI_ARG_3_DI2:
22959    case MULTI_ARG_3_SI2:
22960    case MULTI_ARG_3_HI2:
22961    case MULTI_ARG_3_QI2:
22962      nargs = 3;
22963      break;
22964
22965    case MULTI_ARG_2_SF:
22966    case MULTI_ARG_2_DF:
22967    case MULTI_ARG_2_DI:
22968    case MULTI_ARG_2_SI:
22969    case MULTI_ARG_2_HI:
22970    case MULTI_ARG_2_QI:
22971      nargs = 2;
22972      break;
22973
22974    case MULTI_ARG_2_DI_IMM:
22975    case MULTI_ARG_2_SI_IMM:
22976    case MULTI_ARG_2_HI_IMM:
22977    case MULTI_ARG_2_QI_IMM:
22978      nargs = 2;
22979      last_arg_constant = true;
22980      break;
22981
22982    case MULTI_ARG_1_SF:
22983    case MULTI_ARG_1_DF:
22984    case MULTI_ARG_1_SF2:
22985    case MULTI_ARG_1_DF2:
22986    case MULTI_ARG_1_DI:
22987    case MULTI_ARG_1_SI:
22988    case MULTI_ARG_1_HI:
22989    case MULTI_ARG_1_QI:
22990    case MULTI_ARG_1_SI_DI:
22991    case MULTI_ARG_1_HI_DI:
22992    case MULTI_ARG_1_HI_SI:
22993    case MULTI_ARG_1_QI_DI:
22994    case MULTI_ARG_1_QI_SI:
22995    case MULTI_ARG_1_QI_HI:
22996      nargs = 1;
22997      break;
22998
22999    case MULTI_ARG_2_DI_CMP:
23000    case MULTI_ARG_2_SI_CMP:
23001    case MULTI_ARG_2_HI_CMP:
23002    case MULTI_ARG_2_QI_CMP:
23003      nargs = 2;
23004      comparison_p = true;
23005      break;
23006
23007    case MULTI_ARG_2_SF_TF:
23008    case MULTI_ARG_2_DF_TF:
23009    case MULTI_ARG_2_DI_TF:
23010    case MULTI_ARG_2_SI_TF:
23011    case MULTI_ARG_2_HI_TF:
23012    case MULTI_ARG_2_QI_TF:
23013      nargs = 2;
23014      tf_p = true;
23015      break;
23016
23017    default:
23018      gcc_unreachable ();
23019    }
23020
23021  if (optimize || !target
23022      || GET_MODE (target) != tmode
23023      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23024    target = gen_reg_rtx (tmode);
23025
23026  gcc_assert (nargs <= 4);
23027
23028  for (i = 0; i < nargs; i++)
23029    {
23030      tree arg = CALL_EXPR_ARG (exp, i);
23031      rtx op = expand_normal (arg);
23032      int adjust = (comparison_p) ? 1 : 0;
23033      enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23034
23035      if (last_arg_constant && i == nargs-1)
23036	{
23037	  if (!CONST_INT_P (op))
23038	    {
23039	      error ("last argument must be an immediate");
23040	      return gen_reg_rtx (tmode);
23041	    }
23042	}
23043      else
23044	{
23045	  if (VECTOR_MODE_P (mode))
23046	    op = safe_vector_operand (op, mode);
23047
23048	  /* If we aren't optimizing, only allow one memory operand to be
23049	     generated.  */
23050	  if (memory_operand (op, mode))
23051	    num_memory++;
23052
23053	  gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23054
23055	  if (optimize
23056	      || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
23057	      || num_memory > 1)
23058	    op = force_reg (mode, op);
23059	}
23060
23061      args[i].op = op;
23062      args[i].mode = mode;
23063    }
23064
23065  switch (nargs)
23066    {
23067    case 1:
23068      pat = GEN_FCN (icode) (target, args[0].op);
23069      break;
23070
23071    case 2:
23072      if (tf_p)
23073	pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23074			       GEN_INT ((int)sub_code));
23075      else if (! comparison_p)
23076	pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23077      else
23078	{
23079	  rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23080				       args[0].op,
23081				       args[1].op);
23082
23083	  pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23084	}
23085      break;
23086
23087    case 3:
23088      pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23089      break;
23090
23091    case 4:
23092      pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
23093      break;
23094
23095    default:
23096      gcc_unreachable ();
23097    }
23098
23099  if (! pat)
23100    return 0;
23101
23102  emit_insn (pat);
23103  return target;
23104}
23105
23106/* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23107   insns with vec_merge.  */
23108
23109static rtx
23110ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23111				    rtx target)
23112{
23113  rtx pat;
23114  tree arg0 = CALL_EXPR_ARG (exp, 0);
23115  rtx op1, op0 = expand_normal (arg0);
23116  enum machine_mode tmode = insn_data[icode].operand[0].mode;
23117  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23118
23119  if (optimize || !target
23120      || GET_MODE (target) != tmode
23121      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23122    target = gen_reg_rtx (tmode);
23123
23124  if (VECTOR_MODE_P (mode0))
23125    op0 = safe_vector_operand (op0, mode0);
23126
23127  if ((optimize && !register_operand (op0, mode0))
23128      || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23129    op0 = copy_to_mode_reg (mode0, op0);
23130
23131  op1 = op0;
23132  if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
23133    op1 = copy_to_mode_reg (mode0, op1);
23134
23135  pat = GEN_FCN (icode) (target, op0, op1);
23136  if (! pat)
23137    return 0;
23138  emit_insn (pat);
23139  return target;
23140}
23141
23142/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
23143
23144static rtx
23145ix86_expand_sse_compare (const struct builtin_description *d,
23146			 tree exp, rtx target, bool swap)
23147{
23148  rtx pat;
23149  tree arg0 = CALL_EXPR_ARG (exp, 0);
23150  tree arg1 = CALL_EXPR_ARG (exp, 1);
23151  rtx op0 = expand_normal (arg0);
23152  rtx op1 = expand_normal (arg1);
23153  rtx op2;
23154  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23155  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23156  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23157  enum rtx_code comparison = d->comparison;
23158
23159  if (VECTOR_MODE_P (mode0))
23160    op0 = safe_vector_operand (op0, mode0);
23161  if (VECTOR_MODE_P (mode1))
23162    op1 = safe_vector_operand (op1, mode1);
23163
23164  /* Swap operands if we have a comparison that isn't available in
23165     hardware.  */
23166  if (swap)
23167    {
23168      rtx tmp = gen_reg_rtx (mode1);
23169      emit_move_insn (tmp, op1);
23170      op1 = op0;
23171      op0 = tmp;
23172    }
23173
23174  if (optimize || !target
23175      || GET_MODE (target) != tmode
23176      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
23177    target = gen_reg_rtx (tmode);
23178
23179  if ((optimize && !register_operand (op0, mode0))
23180      || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23181    op0 = copy_to_mode_reg (mode0, op0);
23182  if ((optimize && !register_operand (op1, mode1))
23183      || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23184    op1 = copy_to_mode_reg (mode1, op1);
23185
23186  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23187  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23188  if (! pat)
23189    return 0;
23190  emit_insn (pat);
23191  return target;
23192}
23193
23194/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
23195
23196static rtx
23197ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23198		      rtx target)
23199{
23200  rtx pat;
23201  tree arg0 = CALL_EXPR_ARG (exp, 0);
23202  tree arg1 = CALL_EXPR_ARG (exp, 1);
23203  rtx op0 = expand_normal (arg0);
23204  rtx op1 = expand_normal (arg1);
23205  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23206  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23207  enum rtx_code comparison = d->comparison;
23208
23209  if (VECTOR_MODE_P (mode0))
23210    op0 = safe_vector_operand (op0, mode0);
23211  if (VECTOR_MODE_P (mode1))
23212    op1 = safe_vector_operand (op1, mode1);
23213
23214  /* Swap operands if we have a comparison that isn't available in
23215     hardware.  */
23216  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
23217    {
23218      rtx tmp = op1;
23219      op1 = op0;
23220      op0 = tmp;
23221    }
23222
23223  target = gen_reg_rtx (SImode);
23224  emit_move_insn (target, const0_rtx);
23225  target = gen_rtx_SUBREG (QImode, target, 0);
23226
23227  if ((optimize && !register_operand (op0, mode0))
23228      || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23229    op0 = copy_to_mode_reg (mode0, op0);
23230  if ((optimize && !register_operand (op1, mode1))
23231      || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23232    op1 = copy_to_mode_reg (mode1, op1);
23233
23234  pat = GEN_FCN (d->icode) (op0, op1);
23235  if (! pat)
23236    return 0;
23237  emit_insn (pat);
23238  emit_insn (gen_rtx_SET (VOIDmode,
23239			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23240			  gen_rtx_fmt_ee (comparison, QImode,
23241					  SET_DEST (pat),
23242					  const0_rtx)));
23243
23244  return SUBREG_REG (target);
23245}
23246
23247/* Subroutine of ix86_expand_builtin to take care of ptest insns.  */
23248
23249static rtx
23250ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
23251		       rtx target)
23252{
23253  rtx pat;
23254  tree arg0 = CALL_EXPR_ARG (exp, 0);
23255  tree arg1 = CALL_EXPR_ARG (exp, 1);
23256  rtx op0 = expand_normal (arg0);
23257  rtx op1 = expand_normal (arg1);
23258  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23259  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23260  enum rtx_code comparison = d->comparison;
23261
23262  if (VECTOR_MODE_P (mode0))
23263    op0 = safe_vector_operand (op0, mode0);
23264  if (VECTOR_MODE_P (mode1))
23265    op1 = safe_vector_operand (op1, mode1);
23266
23267  target = gen_reg_rtx (SImode);
23268  emit_move_insn (target, const0_rtx);
23269  target = gen_rtx_SUBREG (QImode, target, 0);
23270
23271  if ((optimize && !register_operand (op0, mode0))
23272      || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23273    op0 = copy_to_mode_reg (mode0, op0);
23274  if ((optimize && !register_operand (op1, mode1))
23275      || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23276    op1 = copy_to_mode_reg (mode1, op1);
23277
23278  pat = GEN_FCN (d->icode) (op0, op1);
23279  if (! pat)
23280    return 0;
23281  emit_insn (pat);
23282  emit_insn (gen_rtx_SET (VOIDmode,
23283			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23284			  gen_rtx_fmt_ee (comparison, QImode,
23285					  SET_DEST (pat),
23286					  const0_rtx)));
23287
23288  return SUBREG_REG (target);
23289}
23290
23291/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns.  */
23292
23293static rtx
23294ix86_expand_sse_pcmpestr (const struct builtin_description *d,
23295			  tree exp, rtx target)
23296{
23297  rtx pat;
23298  tree arg0 = CALL_EXPR_ARG (exp, 0);
23299  tree arg1 = CALL_EXPR_ARG (exp, 1);
23300  tree arg2 = CALL_EXPR_ARG (exp, 2);
23301  tree arg3 = CALL_EXPR_ARG (exp, 3);
23302  tree arg4 = CALL_EXPR_ARG (exp, 4);
23303  rtx scratch0, scratch1;
23304  rtx op0 = expand_normal (arg0);
23305  rtx op1 = expand_normal (arg1);
23306  rtx op2 = expand_normal (arg2);
23307  rtx op3 = expand_normal (arg3);
23308  rtx op4 = expand_normal (arg4);
23309  enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
23310
23311  tmode0 = insn_data[d->icode].operand[0].mode;
23312  tmode1 = insn_data[d->icode].operand[1].mode;
23313  modev2 = insn_data[d->icode].operand[2].mode;
23314  modei3 = insn_data[d->icode].operand[3].mode;
23315  modev4 = insn_data[d->icode].operand[4].mode;
23316  modei5 = insn_data[d->icode].operand[5].mode;
23317  modeimm = insn_data[d->icode].operand[6].mode;
23318
23319  if (VECTOR_MODE_P (modev2))
23320    op0 = safe_vector_operand (op0, modev2);
23321  if (VECTOR_MODE_P (modev4))
23322    op2 = safe_vector_operand (op2, modev4);
23323
23324  if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23325    op0 = copy_to_mode_reg (modev2, op0);
23326  if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
23327    op1 = copy_to_mode_reg (modei3, op1);
23328  if ((optimize && !register_operand (op2, modev4))
23329      || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
23330    op2 = copy_to_mode_reg (modev4, op2);
23331  if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
23332    op3 = copy_to_mode_reg (modei5, op3);
23333
23334  if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
23335    {
23336      error ("the fifth argument must be a 8-bit immediate");
23337      return const0_rtx;
23338    }
23339
23340  if (d->code == IX86_BUILTIN_PCMPESTRI128)
23341    {
23342      if (optimize || !target
23343	  || GET_MODE (target) != tmode0
23344	  || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23345	target = gen_reg_rtx (tmode0);
23346
23347      scratch1 = gen_reg_rtx (tmode1);
23348
23349      pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
23350    }
23351  else if (d->code == IX86_BUILTIN_PCMPESTRM128)
23352    {
23353      if (optimize || !target
23354	  || GET_MODE (target) != tmode1
23355	  || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23356	target = gen_reg_rtx (tmode1);
23357
23358      scratch0 = gen_reg_rtx (tmode0);
23359
23360      pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
23361    }
23362  else
23363    {
23364      gcc_assert (d->flag);
23365
23366      scratch0 = gen_reg_rtx (tmode0);
23367      scratch1 = gen_reg_rtx (tmode1);
23368
23369      pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
23370    }
23371
23372  if (! pat)
23373    return 0;
23374
23375  emit_insn (pat);
23376
23377  if (d->flag)
23378    {
23379      target = gen_reg_rtx (SImode);
23380      emit_move_insn (target, const0_rtx);
23381      target = gen_rtx_SUBREG (QImode, target, 0);
23382
23383      emit_insn
23384	(gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23385		      gen_rtx_fmt_ee (EQ, QImode,
23386				      gen_rtx_REG ((enum machine_mode) d->flag,
23387						   FLAGS_REG),
23388				      const0_rtx)));
23389      return SUBREG_REG (target);
23390    }
23391  else
23392    return target;
23393}
23394
23395
23396/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns.  */
23397
23398static rtx
23399ix86_expand_sse_pcmpistr (const struct builtin_description *d,
23400			  tree exp, rtx target)
23401{
23402  rtx pat;
23403  tree arg0 = CALL_EXPR_ARG (exp, 0);
23404  tree arg1 = CALL_EXPR_ARG (exp, 1);
23405  tree arg2 = CALL_EXPR_ARG (exp, 2);
23406  rtx scratch0, scratch1;
23407  rtx op0 = expand_normal (arg0);
23408  rtx op1 = expand_normal (arg1);
23409  rtx op2 = expand_normal (arg2);
23410  enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
23411
23412  tmode0 = insn_data[d->icode].operand[0].mode;
23413  tmode1 = insn_data[d->icode].operand[1].mode;
23414  modev2 = insn_data[d->icode].operand[2].mode;
23415  modev3 = insn_data[d->icode].operand[3].mode;
23416  modeimm = insn_data[d->icode].operand[4].mode;
23417
23418  if (VECTOR_MODE_P (modev2))
23419    op0 = safe_vector_operand (op0, modev2);
23420  if (VECTOR_MODE_P (modev3))
23421    op1 = safe_vector_operand (op1, modev3);
23422
23423  if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23424    op0 = copy_to_mode_reg (modev2, op0);
23425  if ((optimize && !register_operand (op1, modev3))
23426      || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
23427    op1 = copy_to_mode_reg (modev3, op1);
23428
23429  if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
23430    {
23431      error ("the third argument must be a 8-bit immediate");
23432      return const0_rtx;
23433    }
23434
23435  if (d->code == IX86_BUILTIN_PCMPISTRI128)
23436    {
23437      if (optimize || !target
23438	  || GET_MODE (target) != tmode0
23439	  || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23440	target = gen_reg_rtx (tmode0);
23441
23442      scratch1 = gen_reg_rtx (tmode1);
23443
23444      pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
23445    }
23446  else if (d->code == IX86_BUILTIN_PCMPISTRM128)
23447    {
23448      if (optimize || !target
23449	  || GET_MODE (target) != tmode1
23450	  || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23451	target = gen_reg_rtx (tmode1);
23452
23453      scratch0 = gen_reg_rtx (tmode0);
23454
23455      pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
23456    }
23457  else
23458    {
23459      gcc_assert (d->flag);
23460
23461      scratch0 = gen_reg_rtx (tmode0);
23462      scratch1 = gen_reg_rtx (tmode1);
23463
23464      pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
23465    }
23466
23467  if (! pat)
23468    return 0;
23469
23470  emit_insn (pat);
23471
23472  if (d->flag)
23473    {
23474      target = gen_reg_rtx (SImode);
23475      emit_move_insn (target, const0_rtx);
23476      target = gen_rtx_SUBREG (QImode, target, 0);
23477
23478      emit_insn
23479	(gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23480		      gen_rtx_fmt_ee (EQ, QImode,
23481				      gen_rtx_REG ((enum machine_mode) d->flag,
23482						   FLAGS_REG),
23483				      const0_rtx)));
23484      return SUBREG_REG (target);
23485    }
23486  else
23487    return target;
23488}
23489
23490/* Subroutine of ix86_expand_builtin to take care of insns with
23491   variable number of operands.  */
23492
23493static rtx
23494ix86_expand_args_builtin (const struct builtin_description *d,
23495			  tree exp, rtx target)
23496{
23497  rtx pat, real_target;
23498  unsigned int i, nargs;
23499  unsigned int nargs_constant = 0;
23500  int num_memory = 0;
23501  struct
23502    {
23503      rtx op;
23504      enum machine_mode mode;
23505    } args[4];
23506  bool last_arg_count = false;
23507  enum insn_code icode = d->icode;
23508  const struct insn_data *insn_p = &insn_data[icode];
23509  enum machine_mode tmode = insn_p->operand[0].mode;
23510  enum machine_mode rmode = VOIDmode;
23511  bool swap = false;
23512  enum rtx_code comparison = d->comparison;
23513
23514  switch ((enum ix86_builtin_func_type) d->flag)
23515    {
23516    case INT_FTYPE_V8SF_V8SF_PTEST:
23517    case INT_FTYPE_V4DI_V4DI_PTEST:
23518    case INT_FTYPE_V4DF_V4DF_PTEST:
23519    case INT_FTYPE_V4SF_V4SF_PTEST:
23520    case INT_FTYPE_V2DI_V2DI_PTEST:
23521    case INT_FTYPE_V2DF_V2DF_PTEST:
23522      return ix86_expand_sse_ptest (d, exp, target);
23523    case FLOAT128_FTYPE_FLOAT128:
23524    case FLOAT_FTYPE_FLOAT:
23525    case INT_FTYPE_INT:
23526    case UINT64_FTYPE_INT:
23527    case UINT16_FTYPE_UINT16:
23528    case INT64_FTYPE_INT64:
23529    case INT64_FTYPE_V4SF:
23530    case INT64_FTYPE_V2DF:
23531    case INT_FTYPE_V16QI:
23532    case INT_FTYPE_V8QI:
23533    case INT_FTYPE_V8SF:
23534    case INT_FTYPE_V4DF:
23535    case INT_FTYPE_V4SF:
23536    case INT_FTYPE_V2DF:
23537    case V16QI_FTYPE_V16QI:
23538    case V8SI_FTYPE_V8SF:
23539    case V8SI_FTYPE_V4SI:
23540    case V8HI_FTYPE_V8HI:
23541    case V8HI_FTYPE_V16QI:
23542    case V8QI_FTYPE_V8QI:
23543    case V8SF_FTYPE_V8SF:
23544    case V8SF_FTYPE_V8SI:
23545    case V8SF_FTYPE_V4SF:
23546    case V4SI_FTYPE_V4SI:
23547    case V4SI_FTYPE_V16QI:
23548    case V4SI_FTYPE_V4SF:
23549    case V4SI_FTYPE_V8SI:
23550    case V4SI_FTYPE_V8HI:
23551    case V4SI_FTYPE_V4DF:
23552    case V4SI_FTYPE_V2DF:
23553    case V4HI_FTYPE_V4HI:
23554    case V4DF_FTYPE_V4DF:
23555    case V4DF_FTYPE_V4SI:
23556    case V4DF_FTYPE_V4SF:
23557    case V4DF_FTYPE_V2DF:
23558    case V4SF_FTYPE_V4SF:
23559    case V4SF_FTYPE_V4SI:
23560    case V4SF_FTYPE_V8SF:
23561    case V4SF_FTYPE_V4DF:
23562    case V4SF_FTYPE_V2DF:
23563    case V2DI_FTYPE_V2DI:
23564    case V2DI_FTYPE_V16QI:
23565    case V2DI_FTYPE_V8HI:
23566    case V2DI_FTYPE_V4SI:
23567    case V2DF_FTYPE_V2DF:
23568    case V2DF_FTYPE_V4SI:
23569    case V2DF_FTYPE_V4DF:
23570    case V2DF_FTYPE_V4SF:
23571    case V2DF_FTYPE_V2SI:
23572    case V2SI_FTYPE_V2SI:
23573    case V2SI_FTYPE_V4SF:
23574    case V2SI_FTYPE_V2SF:
23575    case V2SI_FTYPE_V2DF:
23576    case V2SF_FTYPE_V2SF:
23577    case V2SF_FTYPE_V2SI:
23578      nargs = 1;
23579      break;
23580    case V4SF_FTYPE_V4SF_VEC_MERGE:
23581    case V2DF_FTYPE_V2DF_VEC_MERGE:
23582      return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
23583    case FLOAT128_FTYPE_FLOAT128_FLOAT128:
23584    case V16QI_FTYPE_V16QI_V16QI:
23585    case V16QI_FTYPE_V8HI_V8HI:
23586    case V8QI_FTYPE_V8QI_V8QI:
23587    case V8QI_FTYPE_V4HI_V4HI:
23588    case V8HI_FTYPE_V8HI_V8HI:
23589    case V8HI_FTYPE_V16QI_V16QI:
23590    case V8HI_FTYPE_V4SI_V4SI:
23591    case V8SF_FTYPE_V8SF_V8SF:
23592    case V8SF_FTYPE_V8SF_V8SI:
23593    case V4SI_FTYPE_V4SI_V4SI:
23594    case V4SI_FTYPE_V8HI_V8HI:
23595    case V4SI_FTYPE_V4SF_V4SF:
23596    case V4SI_FTYPE_V2DF_V2DF:
23597    case V4HI_FTYPE_V4HI_V4HI:
23598    case V4HI_FTYPE_V8QI_V8QI:
23599    case V4HI_FTYPE_V2SI_V2SI:
23600    case V4DF_FTYPE_V4DF_V4DF:
23601    case V4DF_FTYPE_V4DF_V4DI:
23602    case V4SF_FTYPE_V4SF_V4SF:
23603    case V4SF_FTYPE_V4SF_V4SI:
23604    case V4SF_FTYPE_V4SF_V2SI:
23605    case V4SF_FTYPE_V4SF_V2DF:
23606    case V4SF_FTYPE_V4SF_DI:
23607    case V4SF_FTYPE_V4SF_SI:
23608    case V2DI_FTYPE_V2DI_V2DI:
23609    case V2DI_FTYPE_V16QI_V16QI:
23610    case V2DI_FTYPE_V4SI_V4SI:
23611    case V2DI_FTYPE_V2DI_V16QI:
23612    case V2DI_FTYPE_V2DF_V2DF:
23613    case V2SI_FTYPE_V2SI_V2SI:
23614    case V2SI_FTYPE_V4HI_V4HI:
23615    case V2SI_FTYPE_V2SF_V2SF:
23616    case V2DF_FTYPE_V2DF_V2DF:
23617    case V2DF_FTYPE_V2DF_V4SF:
23618    case V2DF_FTYPE_V2DF_V2DI:
23619    case V2DF_FTYPE_V2DF_DI:
23620    case V2DF_FTYPE_V2DF_SI:
23621    case V2SF_FTYPE_V2SF_V2SF:
23622    case V1DI_FTYPE_V1DI_V1DI:
23623    case V1DI_FTYPE_V8QI_V8QI:
23624    case V1DI_FTYPE_V2SI_V2SI:
23625      if (comparison == UNKNOWN)
23626	return ix86_expand_binop_builtin (icode, exp, target);
23627      nargs = 2;
23628      break;
23629    case V4SF_FTYPE_V4SF_V4SF_SWAP:
23630    case V2DF_FTYPE_V2DF_V2DF_SWAP:
23631      gcc_assert (comparison != UNKNOWN);
23632      nargs = 2;
23633      swap = true;
23634      break;
23635    case V8HI_FTYPE_V8HI_V8HI_COUNT:
23636    case V8HI_FTYPE_V8HI_SI_COUNT:
23637    case V4SI_FTYPE_V4SI_V4SI_COUNT:
23638    case V4SI_FTYPE_V4SI_SI_COUNT:
23639    case V4HI_FTYPE_V4HI_V4HI_COUNT:
23640    case V4HI_FTYPE_V4HI_SI_COUNT:
23641    case V2DI_FTYPE_V2DI_V2DI_COUNT:
23642    case V2DI_FTYPE_V2DI_SI_COUNT:
23643    case V2SI_FTYPE_V2SI_V2SI_COUNT:
23644    case V2SI_FTYPE_V2SI_SI_COUNT:
23645    case V1DI_FTYPE_V1DI_V1DI_COUNT:
23646    case V1DI_FTYPE_V1DI_SI_COUNT:
23647      nargs = 2;
23648      last_arg_count = true;
23649      break;
23650    case UINT64_FTYPE_UINT64_UINT64:
23651    case UINT_FTYPE_UINT_UINT:
23652    case UINT_FTYPE_UINT_USHORT:
23653    case UINT_FTYPE_UINT_UCHAR:
23654    case UINT16_FTYPE_UINT16_INT:
23655    case UINT8_FTYPE_UINT8_INT:
23656      nargs = 2;
23657      break;
23658    case V2DI_FTYPE_V2DI_INT_CONVERT:
23659      nargs = 2;
23660      rmode = V1TImode;
23661      nargs_constant = 1;
23662      break;
23663    case V8HI_FTYPE_V8HI_INT:
23664    case V8SF_FTYPE_V8SF_INT:
23665    case V4SI_FTYPE_V4SI_INT:
23666    case V4SI_FTYPE_V8SI_INT:
23667    case V4HI_FTYPE_V4HI_INT:
23668    case V4DF_FTYPE_V4DF_INT:
23669    case V4SF_FTYPE_V4SF_INT:
23670    case V4SF_FTYPE_V8SF_INT:
23671    case V2DI_FTYPE_V2DI_INT:
23672    case V2DF_FTYPE_V2DF_INT:
23673    case V2DF_FTYPE_V4DF_INT:
23674      nargs = 2;
23675      nargs_constant = 1;
23676      break;
23677    case V16QI_FTYPE_V16QI_V16QI_V16QI:
23678    case V8SF_FTYPE_V8SF_V8SF_V8SF:
23679    case V4DF_FTYPE_V4DF_V4DF_V4DF:
23680    case V4SF_FTYPE_V4SF_V4SF_V4SF:
23681    case V2DF_FTYPE_V2DF_V2DF_V2DF:
23682      nargs = 3;
23683      break;
23684    case V16QI_FTYPE_V16QI_V16QI_INT:
23685    case V8HI_FTYPE_V8HI_V8HI_INT:
23686    case V8SI_FTYPE_V8SI_V8SI_INT:
23687    case V8SI_FTYPE_V8SI_V4SI_INT:
23688    case V8SF_FTYPE_V8SF_V8SF_INT:
23689    case V8SF_FTYPE_V8SF_V4SF_INT:
23690    case V4SI_FTYPE_V4SI_V4SI_INT:
23691    case V4DF_FTYPE_V4DF_V4DF_INT:
23692    case V4DF_FTYPE_V4DF_V2DF_INT:
23693    case V4SF_FTYPE_V4SF_V4SF_INT:
23694    case V2DI_FTYPE_V2DI_V2DI_INT:
23695    case V2DF_FTYPE_V2DF_V2DF_INT:
23696      nargs = 3;
23697      nargs_constant = 1;
23698      break;
23699    case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
23700      nargs = 3;
23701      rmode = V2DImode;
23702      nargs_constant = 1;
23703      break;
23704    case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
23705      nargs = 3;
23706      rmode = DImode;
23707      nargs_constant = 1;
23708      break;
23709    case V2DI_FTYPE_V2DI_UINT_UINT:
23710      nargs = 3;
23711      nargs_constant = 2;
23712      break;
23713    case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
23714    case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
23715    case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
23716    case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
23717      nargs = 4;
23718      nargs_constant = 1;
23719      break;
23720    case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23721      nargs = 4;
23722      nargs_constant = 2;
23723      break;
23724    default:
23725      gcc_unreachable ();
23726    }
23727
23728  gcc_assert (nargs <= ARRAY_SIZE (args));
23729
23730  if (comparison != UNKNOWN)
23731    {
23732      gcc_assert (nargs == 2);
23733      return ix86_expand_sse_compare (d, exp, target, swap);
23734    }
23735
23736  if (rmode == VOIDmode || rmode == tmode)
23737    {
23738      if (optimize
23739	  || target == 0
23740	  || GET_MODE (target) != tmode
23741	  || ! (*insn_p->operand[0].predicate) (target, tmode))
23742	target = gen_reg_rtx (tmode);
23743      real_target = target;
23744    }
23745  else
23746    {
23747      target = gen_reg_rtx (rmode);
23748      real_target = simplify_gen_subreg (tmode, target, rmode, 0);
23749    }
23750
23751  for (i = 0; i < nargs; i++)
23752    {
23753      tree arg = CALL_EXPR_ARG (exp, i);
23754      rtx op = expand_normal (arg);
23755      enum machine_mode mode = insn_p->operand[i + 1].mode;
23756      bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
23757
23758      if (last_arg_count && (i + 1) == nargs)
23759	{
23760	  /* SIMD shift insns take either an 8-bit immediate or
23761	     register as count.  But builtin functions take int as
23762	     count.  If count doesn't match, we put it in register.  */
23763	  if (!match)
23764	    {
23765	      op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
23766	      if (!(*insn_p->operand[i + 1].predicate) (op, mode))
23767		op = copy_to_reg (op);
23768	    }
23769	}
23770      else if ((nargs - i) <= nargs_constant)
23771	{
23772	  if (!match)
23773	    switch (icode)
23774	      {
23775	      case CODE_FOR_sse4_1_roundpd:
23776	      case CODE_FOR_sse4_1_roundps:
23777	      case CODE_FOR_sse4_1_roundsd:
23778	      case CODE_FOR_sse4_1_roundss:
23779	      case CODE_FOR_sse4_1_blendps:
23780	      case CODE_FOR_avx_blendpd256:
23781	      case CODE_FOR_avx_vpermilv4df:
23782	      case CODE_FOR_avx_roundpd256:
23783	      case CODE_FOR_avx_roundps256:
23784		error ("the last argument must be a 4-bit immediate");
23785		return const0_rtx;
23786
23787	      case CODE_FOR_sse4_1_blendpd:
23788	      case CODE_FOR_avx_vpermilv2df:
23789	      case CODE_FOR_xop_vpermil2v2df3:
23790	      case CODE_FOR_xop_vpermil2v4sf3:
23791	      case CODE_FOR_xop_vpermil2v4df3:
23792	      case CODE_FOR_xop_vpermil2v8sf3:
23793		error ("the last argument must be a 2-bit immediate");
23794		return const0_rtx;
23795
23796	      case CODE_FOR_avx_vextractf128v4df:
23797	      case CODE_FOR_avx_vextractf128v8sf:
23798	      case CODE_FOR_avx_vextractf128v8si:
23799	      case CODE_FOR_avx_vinsertf128v4df:
23800	      case CODE_FOR_avx_vinsertf128v8sf:
23801	      case CODE_FOR_avx_vinsertf128v8si:
23802		error ("the last argument must be a 1-bit immediate");
23803		return const0_rtx;
23804
23805	      case CODE_FOR_avx_cmpsdv2df3:
23806	      case CODE_FOR_avx_cmpssv4sf3:
23807	      case CODE_FOR_avx_cmppdv2df3:
23808	      case CODE_FOR_avx_cmppsv4sf3:
23809	      case CODE_FOR_avx_cmppdv4df3:
23810	      case CODE_FOR_avx_cmppsv8sf3:
23811		error ("the last argument must be a 5-bit immediate");
23812		return const0_rtx;
23813
23814	     default:
23815		switch (nargs_constant)
23816		  {
23817		  case 2:
23818		    if ((nargs - i) == nargs_constant)
23819		      {
23820			error ("the next to last argument must be an 8-bit immediate");
23821			break;
23822		      }
23823		  case 1:
23824		    error ("the last argument must be an 8-bit immediate");
23825		    break;
23826		  default:
23827		    gcc_unreachable ();
23828		  }
23829		return const0_rtx;
23830	      }
23831	}
23832      else
23833	{
23834	  if (VECTOR_MODE_P (mode))
23835	    op = safe_vector_operand (op, mode);
23836
23837	  /* If we aren't optimizing, only allow one memory operand to
23838	     be generated.  */
23839	  if (memory_operand (op, mode))
23840	    num_memory++;
23841
23842	  if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
23843	    {
23844	      if (optimize || !match || num_memory > 1)
23845		op = copy_to_mode_reg (mode, op);
23846	    }
23847	  else
23848	    {
23849	      op = copy_to_reg (op);
23850	      op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
23851	    }
23852	}
23853
23854      args[i].op = op;
23855      args[i].mode = mode;
23856    }
23857
23858  switch (nargs)
23859    {
23860    case 1:
23861      pat = GEN_FCN (icode) (real_target, args[0].op);
23862      break;
23863    case 2:
23864      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
23865      break;
23866    case 3:
23867      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23868			     args[2].op);
23869      break;
23870    case 4:
23871      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23872			     args[2].op, args[3].op);
23873      break;
23874    default:
23875      gcc_unreachable ();
23876    }
23877
23878  if (! pat)
23879    return 0;
23880
23881  emit_insn (pat);
23882  return target;
23883}
23884
23885/* Subroutine of ix86_expand_builtin to take care of special insns
23886   with variable number of operands.  */
23887
23888static rtx
23889ix86_expand_special_args_builtin (const struct builtin_description *d,
23890				    tree exp, rtx target)
23891{
23892  tree arg;
23893  rtx pat, op;
23894  unsigned int i, nargs, arg_adjust, memory;
23895  struct
23896    {
23897      rtx op;
23898      enum machine_mode mode;
23899    } args[3];
23900  enum insn_code icode = d->icode;
23901  bool last_arg_constant = false;
23902  const struct insn_data *insn_p = &insn_data[icode];
23903  enum machine_mode tmode = insn_p->operand[0].mode;
23904  enum { load, store } klass;
23905
23906  switch ((enum ix86_builtin_func_type) d->flag)
23907    {
23908    case VOID_FTYPE_VOID:
23909      emit_insn (GEN_FCN (icode) (target));
23910      return 0;
23911    case UINT64_FTYPE_VOID:
23912      nargs = 0;
23913      klass = load;
23914      memory = 0;
23915      break;
23916    case UINT64_FTYPE_PUNSIGNED:
23917    case V2DI_FTYPE_PV2DI:
23918    case V32QI_FTYPE_PCCHAR:
23919    case V16QI_FTYPE_PCCHAR:
23920    case V8SF_FTYPE_PCV4SF:
23921    case V8SF_FTYPE_PCFLOAT:
23922    case V4SF_FTYPE_PCFLOAT:
23923    case V4DF_FTYPE_PCV2DF:
23924    case V4DF_FTYPE_PCDOUBLE:
23925    case V2DF_FTYPE_PCDOUBLE:
23926    case VOID_FTYPE_PVOID:
23927      nargs = 1;
23928      klass = load;
23929      memory = 0;
23930      break;
23931    case VOID_FTYPE_PV2SF_V4SF:
23932    case VOID_FTYPE_PV4DI_V4DI:
23933    case VOID_FTYPE_PV2DI_V2DI:
23934    case VOID_FTYPE_PCHAR_V32QI:
23935    case VOID_FTYPE_PCHAR_V16QI:
23936    case VOID_FTYPE_PFLOAT_V8SF:
23937    case VOID_FTYPE_PFLOAT_V4SF:
23938    case VOID_FTYPE_PDOUBLE_V4DF:
23939    case VOID_FTYPE_PDOUBLE_V2DF:
23940    case VOID_FTYPE_PULONGLONG_ULONGLONG:
23941    case VOID_FTYPE_PINT_INT:
23942      nargs = 1;
23943      klass = store;
23944      /* Reserve memory operand for target.  */
23945      memory = ARRAY_SIZE (args);
23946      break;
23947    case V4SF_FTYPE_V4SF_PCV2SF:
23948    case V2DF_FTYPE_V2DF_PCDOUBLE:
23949      nargs = 2;
23950      klass = load;
23951      memory = 1;
23952      break;
23953    case V8SF_FTYPE_PCV8SF_V8SI:
23954    case V4DF_FTYPE_PCV4DF_V4DI:
23955    case V4SF_FTYPE_PCV4SF_V4SI:
23956    case V2DF_FTYPE_PCV2DF_V2DI:
23957      nargs = 2;
23958      klass = load;
23959      memory = 0;
23960      break;
23961    case VOID_FTYPE_PV8SF_V8SI_V8SF:
23962    case VOID_FTYPE_PV4DF_V4DI_V4DF:
23963    case VOID_FTYPE_PV4SF_V4SI_V4SF:
23964    case VOID_FTYPE_PV2DF_V2DI_V2DF:
23965      nargs = 2;
23966      klass = store;
23967      /* Reserve memory operand for target.  */
23968      memory = ARRAY_SIZE (args);
23969      break;
23970    case VOID_FTYPE_UINT_UINT_UINT:
23971    case VOID_FTYPE_UINT64_UINT_UINT:
23972    case UCHAR_FTYPE_UINT_UINT_UINT:
23973    case UCHAR_FTYPE_UINT64_UINT_UINT:
23974      nargs = 3;
23975      klass = load;
23976      memory = ARRAY_SIZE (args);
23977      last_arg_constant = true;
23978      break;
23979    default:
23980      gcc_unreachable ();
23981    }
23982
23983  gcc_assert (nargs <= ARRAY_SIZE (args));
23984
23985  if (klass == store)
23986    {
23987      arg = CALL_EXPR_ARG (exp, 0);
23988      op = expand_normal (arg);
23989      gcc_assert (target == 0);
23990      target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
23991      arg_adjust = 1;
23992    }
23993  else
23994    {
23995      arg_adjust = 0;
23996      if (optimize
23997	  || target == 0
23998	  || GET_MODE (target) != tmode
23999	  || ! (*insn_p->operand[0].predicate) (target, tmode))
24000	target = gen_reg_rtx (tmode);
24001    }
24002
24003  for (i = 0; i < nargs; i++)
24004    {
24005      enum machine_mode mode = insn_p->operand[i + 1].mode;
24006      bool match;
24007
24008      arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24009      op = expand_normal (arg);
24010      match = (*insn_p->operand[i + 1].predicate) (op, mode);
24011
24012      if (last_arg_constant && (i + 1) == nargs)
24013	{
24014	  if (!match)
24015	    {
24016	      if (icode == CODE_FOR_lwp_lwpvalsi3
24017		  || icode == CODE_FOR_lwp_lwpinssi3
24018		  || icode == CODE_FOR_lwp_lwpvaldi3
24019		  || icode == CODE_FOR_lwp_lwpinsdi3)
24020		error ("the last argument must be a 32-bit immediate");
24021	      else
24022		error ("the last argument must be an 8-bit immediate");
24023	      return const0_rtx;
24024	    }
24025	}
24026      else
24027	{
24028	  if (i == memory)
24029	    {
24030	      /* This must be the memory operand.  */
24031	      op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24032	      gcc_assert (GET_MODE (op) == mode
24033			  || GET_MODE (op) == VOIDmode);
24034	    }
24035	  else
24036	    {
24037	      /* This must be register.  */
24038	      if (VECTOR_MODE_P (mode))
24039		op = safe_vector_operand (op, mode);
24040
24041	      gcc_assert (GET_MODE (op) == mode
24042			  || GET_MODE (op) == VOIDmode);
24043	      op = copy_to_mode_reg (mode, op);
24044	    }
24045	}
24046
24047      args[i].op = op;
24048      args[i].mode = mode;
24049    }
24050
24051  switch (nargs)
24052    {
24053    case 0:
24054      pat = GEN_FCN (icode) (target);
24055      break;
24056    case 1:
24057      pat = GEN_FCN (icode) (target, args[0].op);
24058      break;
24059    case 2:
24060      pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24061      break;
24062    case 3:
24063      pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24064      break;
24065    default:
24066      gcc_unreachable ();
24067    }
24068
24069  if (! pat)
24070    return 0;
24071  emit_insn (pat);
24072  return klass == store ? 0 : target;
24073}
24074
24075/* Return the integer constant in ARG.  Constrain it to be in the range
24076   of the subparts of VEC_TYPE; issue an error if not.  */
24077
24078static int
24079get_element_number (tree vec_type, tree arg)
24080{
24081  unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24082
24083  if (!host_integerp (arg, 1)
24084      || (elt = tree_low_cst (arg, 1), elt > max))
24085    {
24086      error ("selector must be an integer constant in the range 0..%wi", max);
24087      return 0;
24088    }
24089
24090  return elt;
24091}
24092
24093/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
24094   ix86_expand_vector_init.  We DO have language-level syntax for this, in
24095   the form of  (type){ init-list }.  Except that since we can't place emms
24096   instructions from inside the compiler, we can't allow the use of MMX
24097   registers unless the user explicitly asks for it.  So we do *not* define
24098   vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
24099   we have builtins invoked by mmintrin.h that gives us license to emit
24100   these sorts of instructions.  */
24101
24102static rtx
24103ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24104{
24105  enum machine_mode tmode = TYPE_MODE (type);
24106  enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24107  int i, n_elt = GET_MODE_NUNITS (tmode);
24108  rtvec v = rtvec_alloc (n_elt);
24109
24110  gcc_assert (VECTOR_MODE_P (tmode));
24111  gcc_assert (call_expr_nargs (exp) == n_elt);
24112
24113  for (i = 0; i < n_elt; ++i)
24114    {
24115      rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24116      RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24117    }
24118
24119  if (!target || !register_operand (target, tmode))
24120    target = gen_reg_rtx (tmode);
24121
24122  ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24123  return target;
24124}
24125
24126/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
24127   ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
24128   had a language-level syntax for referencing vector elements.  */
24129
24130static rtx
24131ix86_expand_vec_ext_builtin (tree exp, rtx target)
24132{
24133  enum machine_mode tmode, mode0;
24134  tree arg0, arg1;
24135  int elt;
24136  rtx op0;
24137
24138  arg0 = CALL_EXPR_ARG (exp, 0);
24139  arg1 = CALL_EXPR_ARG (exp, 1);
24140
24141  op0 = expand_normal (arg0);
24142  elt = get_element_number (TREE_TYPE (arg0), arg1);
24143
24144  tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24145  mode0 = TYPE_MODE (TREE_TYPE (arg0));
24146  gcc_assert (VECTOR_MODE_P (mode0));
24147
24148  op0 = force_reg (mode0, op0);
24149
24150  if (optimize || !target || !register_operand (target, tmode))
24151    target = gen_reg_rtx (tmode);
24152
24153  ix86_expand_vector_extract (true, target, op0, elt);
24154
24155  return target;
24156}
24157
24158/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
24159   ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
24160   a language-level syntax for referencing vector elements.  */
24161
24162static rtx
24163ix86_expand_vec_set_builtin (tree exp)
24164{
24165  enum machine_mode tmode, mode1;
24166  tree arg0, arg1, arg2;
24167  int elt;
24168  rtx op0, op1, target;
24169
24170  arg0 = CALL_EXPR_ARG (exp, 0);
24171  arg1 = CALL_EXPR_ARG (exp, 1);
24172  arg2 = CALL_EXPR_ARG (exp, 2);
24173
24174  tmode = TYPE_MODE (TREE_TYPE (arg0));
24175  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24176  gcc_assert (VECTOR_MODE_P (tmode));
24177
24178  op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24179  op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24180  elt = get_element_number (TREE_TYPE (arg0), arg2);
24181
24182  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24183    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24184
24185  op0 = force_reg (tmode, op0);
24186  op1 = force_reg (mode1, op1);
24187
24188  /* OP0 is the source of these builtin functions and shouldn't be
24189     modified.  Create a copy, use it and return it as target.  */
24190  target = gen_reg_rtx (tmode);
24191  emit_move_insn (target, op0);
24192  ix86_expand_vector_set (true, target, op1, elt);
24193
24194  return target;
24195}
24196
24197/* Expand an expression EXP that calls a built-in function,
24198   with result going to TARGET if that's convenient
24199   (and in mode MODE if that's convenient).
24200   SUBTARGET may be used as the target for computing one of EXP's operands.
24201   IGNORE is nonzero if the value is to be ignored.  */
24202
24203static rtx
24204ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24205		     enum machine_mode mode ATTRIBUTE_UNUSED,
24206		     int ignore ATTRIBUTE_UNUSED)
24207{
24208  const struct builtin_description *d;
24209  size_t i;
24210  enum insn_code icode;
24211  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24212  tree arg0, arg1, arg2;
24213  rtx op0, op1, op2, pat;
24214  enum machine_mode mode0, mode1, mode2;
24215  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24216
24217  /* Determine whether the builtin function is available under the current ISA.
24218     Originally the builtin was not created if it wasn't applicable to the
24219     current ISA based on the command line switches.  With function specific
24220     options, we need to check in the context of the function making the call
24221     whether it is supported.  */
24222  if (ix86_builtins_isa[fcode].isa
24223      && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24224    {
24225      char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24226				       NULL, NULL, false);
24227
24228      if (!opts)
24229	error ("%qE needs unknown isa option", fndecl);
24230      else
24231	{
24232	  gcc_assert (opts != NULL);
24233	  error ("%qE needs isa option %s", fndecl, opts);
24234	  free (opts);
24235	}
24236      return const0_rtx;
24237    }
24238
24239  switch (fcode)
24240    {
24241    case IX86_BUILTIN_MASKMOVQ:
24242    case IX86_BUILTIN_MASKMOVDQU:
24243      icode = (fcode == IX86_BUILTIN_MASKMOVQ
24244	       ? CODE_FOR_mmx_maskmovq
24245	       : CODE_FOR_sse2_maskmovdqu);
24246      /* Note the arg order is different from the operand order.  */
24247      arg1 = CALL_EXPR_ARG (exp, 0);
24248      arg2 = CALL_EXPR_ARG (exp, 1);
24249      arg0 = CALL_EXPR_ARG (exp, 2);
24250      op0 = expand_normal (arg0);
24251      op1 = expand_normal (arg1);
24252      op2 = expand_normal (arg2);
24253      mode0 = insn_data[icode].operand[0].mode;
24254      mode1 = insn_data[icode].operand[1].mode;
24255      mode2 = insn_data[icode].operand[2].mode;
24256
24257      op0 = force_reg (Pmode, op0);
24258      op0 = gen_rtx_MEM (mode1, op0);
24259
24260      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
24261	op0 = copy_to_mode_reg (mode0, op0);
24262      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
24263	op1 = copy_to_mode_reg (mode1, op1);
24264      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
24265	op2 = copy_to_mode_reg (mode2, op2);
24266      pat = GEN_FCN (icode) (op0, op1, op2);
24267      if (! pat)
24268	return 0;
24269      emit_insn (pat);
24270      return 0;
24271
24272    case IX86_BUILTIN_LDMXCSR:
24273      op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
24274      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24275      emit_move_insn (target, op0);
24276      emit_insn (gen_sse_ldmxcsr (target));
24277      return 0;
24278
24279    case IX86_BUILTIN_STMXCSR:
24280      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24281      emit_insn (gen_sse_stmxcsr (target));
24282      return copy_to_mode_reg (SImode, target);
24283
24284    case IX86_BUILTIN_CLFLUSH:
24285	arg0 = CALL_EXPR_ARG (exp, 0);
24286	op0 = expand_normal (arg0);
24287	icode = CODE_FOR_sse2_clflush;
24288	if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24289	    op0 = copy_to_mode_reg (Pmode, op0);
24290
24291	emit_insn (gen_sse2_clflush (op0));
24292	return 0;
24293
24294    case IX86_BUILTIN_MONITOR:
24295      arg0 = CALL_EXPR_ARG (exp, 0);
24296      arg1 = CALL_EXPR_ARG (exp, 1);
24297      arg2 = CALL_EXPR_ARG (exp, 2);
24298      op0 = expand_normal (arg0);
24299      op1 = expand_normal (arg1);
24300      op2 = expand_normal (arg2);
24301      if (!REG_P (op0))
24302	op0 = copy_to_mode_reg (Pmode, op0);
24303      if (!REG_P (op1))
24304	op1 = copy_to_mode_reg (SImode, op1);
24305      if (!REG_P (op2))
24306	op2 = copy_to_mode_reg (SImode, op2);
24307      emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
24308      return 0;
24309
24310    case IX86_BUILTIN_MWAIT:
24311      arg0 = CALL_EXPR_ARG (exp, 0);
24312      arg1 = CALL_EXPR_ARG (exp, 1);
24313      op0 = expand_normal (arg0);
24314      op1 = expand_normal (arg1);
24315      if (!REG_P (op0))
24316	op0 = copy_to_mode_reg (SImode, op0);
24317      if (!REG_P (op1))
24318	op1 = copy_to_mode_reg (SImode, op1);
24319      emit_insn (gen_sse3_mwait (op0, op1));
24320      return 0;
24321
24322    case IX86_BUILTIN_VEC_INIT_V2SI:
24323    case IX86_BUILTIN_VEC_INIT_V4HI:
24324    case IX86_BUILTIN_VEC_INIT_V8QI:
24325      return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
24326
24327    case IX86_BUILTIN_VEC_EXT_V2DF:
24328    case IX86_BUILTIN_VEC_EXT_V2DI:
24329    case IX86_BUILTIN_VEC_EXT_V4SF:
24330    case IX86_BUILTIN_VEC_EXT_V4SI:
24331    case IX86_BUILTIN_VEC_EXT_V8HI:
24332    case IX86_BUILTIN_VEC_EXT_V2SI:
24333    case IX86_BUILTIN_VEC_EXT_V4HI:
24334    case IX86_BUILTIN_VEC_EXT_V16QI:
24335      return ix86_expand_vec_ext_builtin (exp, target);
24336
24337    case IX86_BUILTIN_VEC_SET_V2DI:
24338    case IX86_BUILTIN_VEC_SET_V4SF:
24339    case IX86_BUILTIN_VEC_SET_V4SI:
24340    case IX86_BUILTIN_VEC_SET_V8HI:
24341    case IX86_BUILTIN_VEC_SET_V4HI:
24342    case IX86_BUILTIN_VEC_SET_V16QI:
24343      return ix86_expand_vec_set_builtin (exp);
24344
24345    case IX86_BUILTIN_VEC_PERM_V2DF:
24346    case IX86_BUILTIN_VEC_PERM_V4SF:
24347    case IX86_BUILTIN_VEC_PERM_V2DI:
24348    case IX86_BUILTIN_VEC_PERM_V4SI:
24349    case IX86_BUILTIN_VEC_PERM_V8HI:
24350    case IX86_BUILTIN_VEC_PERM_V16QI:
24351    case IX86_BUILTIN_VEC_PERM_V2DI_U:
24352    case IX86_BUILTIN_VEC_PERM_V4SI_U:
24353    case IX86_BUILTIN_VEC_PERM_V8HI_U:
24354    case IX86_BUILTIN_VEC_PERM_V16QI_U:
24355    case IX86_BUILTIN_VEC_PERM_V4DF:
24356    case IX86_BUILTIN_VEC_PERM_V8SF:
24357      return ix86_expand_vec_perm_builtin (exp);
24358
24359    case IX86_BUILTIN_INFQ:
24360    case IX86_BUILTIN_HUGE_VALQ:
24361      {
24362	REAL_VALUE_TYPE inf;
24363	rtx tmp;
24364
24365	real_inf (&inf);
24366	tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
24367
24368	tmp = validize_mem (force_const_mem (mode, tmp));
24369
24370	if (target == 0)
24371	  target = gen_reg_rtx (mode);
24372
24373	emit_move_insn (target, tmp);
24374	return target;
24375      }
24376
24377    case IX86_BUILTIN_LLWPCB:
24378      arg0 = CALL_EXPR_ARG (exp, 0);
24379      op0 = expand_normal (arg0);
24380      icode = CODE_FOR_lwp_llwpcb;
24381      if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24382	op0 = copy_to_mode_reg (Pmode, op0);
24383      emit_insn (gen_lwp_llwpcb (op0));
24384      return 0;
24385
24386    case IX86_BUILTIN_SLWPCB:
24387      icode = CODE_FOR_lwp_slwpcb;
24388      if (!target
24389	  || ! (*insn_data[icode].operand[0].predicate) (target, Pmode))
24390	target = gen_reg_rtx (Pmode);
24391      emit_insn (gen_lwp_slwpcb (target));
24392      return target;
24393
24394    default:
24395      break;
24396    }
24397
24398  for (i = 0, d = bdesc_special_args;
24399       i < ARRAY_SIZE (bdesc_special_args);
24400       i++, d++)
24401    if (d->code == fcode)
24402      return ix86_expand_special_args_builtin (d, exp, target);
24403
24404  for (i = 0, d = bdesc_args;
24405       i < ARRAY_SIZE (bdesc_args);
24406       i++, d++)
24407    if (d->code == fcode)
24408      switch (fcode)
24409	{
24410	case IX86_BUILTIN_FABSQ:
24411	case IX86_BUILTIN_COPYSIGNQ:
24412	  if (!TARGET_SSE2)
24413	    /* Emit a normal call if SSE2 isn't available.  */
24414	    return expand_call (exp, target, ignore);
24415	default:
24416	  return ix86_expand_args_builtin (d, exp, target);
24417	}
24418
24419  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24420    if (d->code == fcode)
24421      return ix86_expand_sse_comi (d, exp, target);
24422
24423  for (i = 0, d = bdesc_pcmpestr;
24424       i < ARRAY_SIZE (bdesc_pcmpestr);
24425       i++, d++)
24426    if (d->code == fcode)
24427      return ix86_expand_sse_pcmpestr (d, exp, target);
24428
24429  for (i = 0, d = bdesc_pcmpistr;
24430       i < ARRAY_SIZE (bdesc_pcmpistr);
24431       i++, d++)
24432    if (d->code == fcode)
24433      return ix86_expand_sse_pcmpistr (d, exp, target);
24434
24435  for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24436    if (d->code == fcode)
24437      return ix86_expand_multi_arg_builtin (d->icode, exp, target,
24438					    (enum ix86_builtin_func_type)
24439					    d->flag, d->comparison);
24440
24441  gcc_unreachable ();
24442}
24443
24444/* Returns a function decl for a vectorized version of the builtin function
24445   with builtin function code FN and the result vector type TYPE, or NULL_TREE
24446   if it is not available.  */
24447
24448static tree
24449ix86_builtin_vectorized_function (tree fndecl, tree type_out,
24450				  tree type_in)
24451{
24452  enum machine_mode in_mode, out_mode;
24453  int in_n, out_n;
24454  enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
24455
24456  if (TREE_CODE (type_out) != VECTOR_TYPE
24457      || TREE_CODE (type_in) != VECTOR_TYPE
24458      || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
24459    return NULL_TREE;
24460
24461  out_mode = TYPE_MODE (TREE_TYPE (type_out));
24462  out_n = TYPE_VECTOR_SUBPARTS (type_out);
24463  in_mode = TYPE_MODE (TREE_TYPE (type_in));
24464  in_n = TYPE_VECTOR_SUBPARTS (type_in);
24465
24466  switch (fn)
24467    {
24468    case BUILT_IN_SQRT:
24469      if (out_mode == DFmode && out_n == 2
24470	  && in_mode == DFmode && in_n == 2)
24471	return ix86_builtins[IX86_BUILTIN_SQRTPD];
24472      break;
24473
24474    case BUILT_IN_SQRTF:
24475      if (out_mode == SFmode && out_n == 4
24476	  && in_mode == SFmode && in_n == 4)
24477	return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
24478      break;
24479
24480    case BUILT_IN_LRINT:
24481      if (out_mode == SImode && out_n == 4
24482	  && in_mode == DFmode && in_n == 2)
24483	return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
24484      break;
24485
24486    case BUILT_IN_LRINTF:
24487      if (out_mode == SImode && out_n == 4
24488	  && in_mode == SFmode && in_n == 4)
24489	return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
24490      break;
24491
24492    case BUILT_IN_COPYSIGN:
24493      if (out_mode == DFmode && out_n == 2
24494	  && in_mode == DFmode && in_n == 2)
24495	return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
24496      break;
24497
24498    case BUILT_IN_COPYSIGNF:
24499      if (out_mode == SFmode && out_n == 4
24500	  && in_mode == SFmode && in_n == 4)
24501	return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
24502      break;
24503
24504    default:
24505      ;
24506    }
24507
24508  /* Dispatch to a handler for a vectorization library.  */
24509  if (ix86_veclib_handler)
24510    return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
24511				   type_in);
24512
24513  return NULL_TREE;
24514}
24515
24516/* Handler for an SVML-style interface to
24517   a library with vectorized intrinsics.  */
24518
24519static tree
24520ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
24521{
24522  char name[20];
24523  tree fntype, new_fndecl, args;
24524  unsigned arity;
24525  const char *bname;
24526  enum machine_mode el_mode, in_mode;
24527  int n, in_n;
24528
24529  /* The SVML is suitable for unsafe math only.  */
24530  if (!flag_unsafe_math_optimizations)
24531    return NULL_TREE;
24532
24533  el_mode = TYPE_MODE (TREE_TYPE (type_out));
24534  n = TYPE_VECTOR_SUBPARTS (type_out);
24535  in_mode = TYPE_MODE (TREE_TYPE (type_in));
24536  in_n = TYPE_VECTOR_SUBPARTS (type_in);
24537  if (el_mode != in_mode
24538      || n != in_n)
24539    return NULL_TREE;
24540
24541  switch (fn)
24542    {
24543    case BUILT_IN_EXP:
24544    case BUILT_IN_LOG:
24545    case BUILT_IN_LOG10:
24546    case BUILT_IN_POW:
24547    case BUILT_IN_TANH:
24548    case BUILT_IN_TAN:
24549    case BUILT_IN_ATAN:
24550    case BUILT_IN_ATAN2:
24551    case BUILT_IN_ATANH:
24552    case BUILT_IN_CBRT:
24553    case BUILT_IN_SINH:
24554    case BUILT_IN_SIN:
24555    case BUILT_IN_ASINH:
24556    case BUILT_IN_ASIN:
24557    case BUILT_IN_COSH:
24558    case BUILT_IN_COS:
24559    case BUILT_IN_ACOSH:
24560    case BUILT_IN_ACOS:
24561      if (el_mode != DFmode || n != 2)
24562	return NULL_TREE;
24563      break;
24564
24565    case BUILT_IN_EXPF:
24566    case BUILT_IN_LOGF:
24567    case BUILT_IN_LOG10F:
24568    case BUILT_IN_POWF:
24569    case BUILT_IN_TANHF:
24570    case BUILT_IN_TANF:
24571    case BUILT_IN_ATANF:
24572    case BUILT_IN_ATAN2F:
24573    case BUILT_IN_ATANHF:
24574    case BUILT_IN_CBRTF:
24575    case BUILT_IN_SINHF:
24576    case BUILT_IN_SINF:
24577    case BUILT_IN_ASINHF:
24578    case BUILT_IN_ASINF:
24579    case BUILT_IN_COSHF:
24580    case BUILT_IN_COSF:
24581    case BUILT_IN_ACOSHF:
24582    case BUILT_IN_ACOSF:
24583      if (el_mode != SFmode || n != 4)
24584	return NULL_TREE;
24585      break;
24586
24587    default:
24588      return NULL_TREE;
24589    }
24590
24591  bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24592
24593  if (fn == BUILT_IN_LOGF)
24594    strcpy (name, "vmlsLn4");
24595  else if (fn == BUILT_IN_LOG)
24596    strcpy (name, "vmldLn2");
24597  else if (n == 4)
24598    {
24599      sprintf (name, "vmls%s", bname+10);
24600      name[strlen (name)-1] = '4';
24601    }
24602  else
24603    sprintf (name, "vmld%s2", bname+10);
24604
24605  /* Convert to uppercase. */
24606  name[4] &= ~0x20;
24607
24608  arity = 0;
24609  for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24610       args = TREE_CHAIN (args))
24611    arity++;
24612
24613  if (arity == 1)
24614    fntype = build_function_type_list (type_out, type_in, NULL);
24615  else
24616    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24617
24618  /* Build a function declaration for the vectorized function.  */
24619  new_fndecl = build_decl (BUILTINS_LOCATION,
24620			   FUNCTION_DECL, get_identifier (name), fntype);
24621  TREE_PUBLIC (new_fndecl) = 1;
24622  DECL_EXTERNAL (new_fndecl) = 1;
24623  DECL_IS_NOVOPS (new_fndecl) = 1;
24624  TREE_READONLY (new_fndecl) = 1;
24625
24626  return new_fndecl;
24627}
24628
24629/* Handler for an ACML-style interface to
24630   a library with vectorized intrinsics.  */
24631
24632static tree
24633ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
24634{
24635  char name[20] = "__vr.._";
24636  tree fntype, new_fndecl, args;
24637  unsigned arity;
24638  const char *bname;
24639  enum machine_mode el_mode, in_mode;
24640  int n, in_n;
24641
24642  /* The ACML is 64bits only and suitable for unsafe math only as
24643     it does not correctly support parts of IEEE with the required
24644     precision such as denormals.  */
24645  if (!TARGET_64BIT
24646      || !flag_unsafe_math_optimizations)
24647    return NULL_TREE;
24648
24649  el_mode = TYPE_MODE (TREE_TYPE (type_out));
24650  n = TYPE_VECTOR_SUBPARTS (type_out);
24651  in_mode = TYPE_MODE (TREE_TYPE (type_in));
24652  in_n = TYPE_VECTOR_SUBPARTS (type_in);
24653  if (el_mode != in_mode
24654      || n != in_n)
24655    return NULL_TREE;
24656
24657  switch (fn)
24658    {
24659    case BUILT_IN_SIN:
24660    case BUILT_IN_COS:
24661    case BUILT_IN_EXP:
24662    case BUILT_IN_LOG:
24663    case BUILT_IN_LOG2:
24664    case BUILT_IN_LOG10:
24665      name[4] = 'd';
24666      name[5] = '2';
24667      if (el_mode != DFmode
24668	  || n != 2)
24669	return NULL_TREE;
24670      break;
24671
24672    case BUILT_IN_SINF:
24673    case BUILT_IN_COSF:
24674    case BUILT_IN_EXPF:
24675    case BUILT_IN_POWF:
24676    case BUILT_IN_LOGF:
24677    case BUILT_IN_LOG2F:
24678    case BUILT_IN_LOG10F:
24679      name[4] = 's';
24680      name[5] = '4';
24681      if (el_mode != SFmode
24682	  || n != 4)
24683	return NULL_TREE;
24684      break;
24685
24686    default:
24687      return NULL_TREE;
24688    }
24689
24690  bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24691  sprintf (name + 7, "%s", bname+10);
24692
24693  arity = 0;
24694  for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24695       args = TREE_CHAIN (args))
24696    arity++;
24697
24698  if (arity == 1)
24699    fntype = build_function_type_list (type_out, type_in, NULL);
24700  else
24701    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24702
24703  /* Build a function declaration for the vectorized function.  */
24704  new_fndecl = build_decl (BUILTINS_LOCATION,
24705			   FUNCTION_DECL, get_identifier (name), fntype);
24706  TREE_PUBLIC (new_fndecl) = 1;
24707  DECL_EXTERNAL (new_fndecl) = 1;
24708  DECL_IS_NOVOPS (new_fndecl) = 1;
24709  TREE_READONLY (new_fndecl) = 1;
24710
24711  return new_fndecl;
24712}
24713
24714
24715/* Returns a decl of a function that implements conversion of an integer vector
24716   into a floating-point vector, or vice-versa. TYPE is the type of the integer
24717   side of the conversion.
24718   Return NULL_TREE if it is not available.  */
24719
24720static tree
24721ix86_vectorize_builtin_conversion (unsigned int code, tree type)
24722{
24723  if (! (TARGET_SSE2 && TREE_CODE (type) == VECTOR_TYPE))
24724    return NULL_TREE;
24725
24726  switch (code)
24727    {
24728    case FLOAT_EXPR:
24729      switch (TYPE_MODE (type))
24730	{
24731	case V4SImode:
24732	  return TYPE_UNSIGNED (type)
24733	    ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
24734	    : ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
24735	default:
24736	  return NULL_TREE;
24737	}
24738
24739    case FIX_TRUNC_EXPR:
24740      switch (TYPE_MODE (type))
24741	{
24742	case V4SImode:
24743	  return TYPE_UNSIGNED (type)
24744	    ? NULL_TREE
24745	    : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
24746	default:
24747	  return NULL_TREE;
24748	}
24749    default:
24750      return NULL_TREE;
24751
24752    }
24753}
24754
24755/* Returns a code for a target-specific builtin that implements
24756   reciprocal of the function, or NULL_TREE if not available.  */
24757
24758static tree
24759ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
24760			 bool sqrt ATTRIBUTE_UNUSED)
24761{
24762  if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
24763	 && flag_finite_math_only && !flag_trapping_math
24764	 && flag_unsafe_math_optimizations))
24765    return NULL_TREE;
24766
24767  if (md_fn)
24768    /* Machine dependent builtins.  */
24769    switch (fn)
24770      {
24771	/* Vectorized version of sqrt to rsqrt conversion.  */
24772      case IX86_BUILTIN_SQRTPS_NR:
24773	return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
24774
24775      default:
24776	return NULL_TREE;
24777      }
24778  else
24779    /* Normal builtins.  */
24780    switch (fn)
24781      {
24782	/* Sqrt to rsqrt conversion.  */
24783      case BUILT_IN_SQRTF:
24784	return ix86_builtins[IX86_BUILTIN_RSQRTF];
24785
24786      default:
24787	return NULL_TREE;
24788      }
24789}
24790
24791/* Helper for avx_vpermilps256_operand et al.  This is also used by
24792   the expansion functions to turn the parallel back into a mask.
24793   The return value is 0 for no match and the imm8+1 for a match.  */
24794
24795int
24796avx_vpermilp_parallel (rtx par, enum machine_mode mode)
24797{
24798  unsigned i, nelt = GET_MODE_NUNITS (mode);
24799  unsigned mask = 0;
24800  unsigned char ipar[8];
24801
24802  if (XVECLEN (par, 0) != (int) nelt)
24803    return 0;
24804
24805  /* Validate that all of the elements are constants, and not totally
24806     out of range.  Copy the data into an integral array to make the
24807     subsequent checks easier.  */
24808  for (i = 0; i < nelt; ++i)
24809    {
24810      rtx er = XVECEXP (par, 0, i);
24811      unsigned HOST_WIDE_INT ei;
24812
24813      if (!CONST_INT_P (er))
24814	return 0;
24815      ei = INTVAL (er);
24816      if (ei >= nelt)
24817	return 0;
24818      ipar[i] = ei;
24819    }
24820
24821  switch (mode)
24822    {
24823    case V4DFmode:
24824      /* In the 256-bit DFmode case, we can only move elements within
24825         a 128-bit lane.  */
24826      for (i = 0; i < 2; ++i)
24827	{
24828	  if (ipar[i] >= 2)
24829	    return 0;
24830	  mask |= ipar[i] << i;
24831	}
24832      for (i = 2; i < 4; ++i)
24833	{
24834	  if (ipar[i] < 2)
24835	    return 0;
24836	  mask |= (ipar[i] - 2) << i;
24837	}
24838      break;
24839
24840    case V8SFmode:
24841      /* In the 256-bit SFmode case, we have full freedom of movement
24842	 within the low 128-bit lane, but the high 128-bit lane must
24843	 mirror the exact same pattern.  */
24844      for (i = 0; i < 4; ++i)
24845	if (ipar[i] + 4 != ipar[i + 4])
24846	  return 0;
24847      nelt = 4;
24848      /* FALLTHRU */
24849
24850    case V2DFmode:
24851    case V4SFmode:
24852      /* In the 128-bit case, we've full freedom in the placement of
24853	 the elements from the source operand.  */
24854      for (i = 0; i < nelt; ++i)
24855	mask |= ipar[i] << (i * (nelt / 2));
24856      break;
24857
24858    default:
24859      gcc_unreachable ();
24860    }
24861
24862  /* Make sure success has a non-zero value by adding one.  */
24863  return mask + 1;
24864}
24865
24866/* Helper for avx_vperm2f128_v4df_operand et al.  This is also used by
24867   the expansion functions to turn the parallel back into a mask.
24868   The return value is 0 for no match and the imm8+1 for a match.  */
24869
24870int
24871avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
24872{
24873  unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
24874  unsigned mask = 0;
24875  unsigned char ipar[8];
24876
24877  if (XVECLEN (par, 0) != (int) nelt)
24878    return 0;
24879
24880  /* Validate that all of the elements are constants, and not totally
24881     out of range.  Copy the data into an integral array to make the
24882     subsequent checks easier.  */
24883  for (i = 0; i < nelt; ++i)
24884    {
24885      rtx er = XVECEXP (par, 0, i);
24886      unsigned HOST_WIDE_INT ei;
24887
24888      if (!CONST_INT_P (er))
24889	return 0;
24890      ei = INTVAL (er);
24891      if (ei >= 2 * nelt)
24892	return 0;
24893      ipar[i] = ei;
24894    }
24895
24896  /* Validate that the halves of the permute are halves.  */
24897  for (i = 0; i < nelt2 - 1; ++i)
24898    if (ipar[i] + 1 != ipar[i + 1])
24899      return 0;
24900  for (i = nelt2; i < nelt - 1; ++i)
24901    if (ipar[i] + 1 != ipar[i + 1])
24902      return 0;
24903
24904  /* Reconstruct the mask.  */
24905  for (i = 0; i < 2; ++i)
24906    {
24907      unsigned e = ipar[i * nelt2];
24908      if (e % nelt2)
24909	return 0;
24910      e /= nelt2;
24911      mask |= e << (i * 4);
24912    }
24913
24914  /* Make sure success has a non-zero value by adding one.  */
24915  return mask + 1;
24916}
24917
24918
24919/* Store OPERAND to the memory after reload is completed.  This means
24920   that we can't easily use assign_stack_local.  */
24921rtx
24922ix86_force_to_memory (enum machine_mode mode, rtx operand)
24923{
24924  rtx result;
24925
24926  gcc_assert (reload_completed);
24927  if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
24928    {
24929      result = gen_rtx_MEM (mode,
24930			    gen_rtx_PLUS (Pmode,
24931					  stack_pointer_rtx,
24932					  GEN_INT (-RED_ZONE_SIZE)));
24933      emit_move_insn (result, operand);
24934    }
24935  else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
24936    {
24937      switch (mode)
24938	{
24939	case HImode:
24940	case SImode:
24941	  operand = gen_lowpart (DImode, operand);
24942	  /* FALLTHRU */
24943	case DImode:
24944	  emit_insn (
24945		      gen_rtx_SET (VOIDmode,
24946				   gen_rtx_MEM (DImode,
24947						gen_rtx_PRE_DEC (DImode,
24948							stack_pointer_rtx)),
24949				   operand));
24950	  break;
24951	default:
24952	  gcc_unreachable ();
24953	}
24954      result = gen_rtx_MEM (mode, stack_pointer_rtx);
24955    }
24956  else
24957    {
24958      switch (mode)
24959	{
24960	case DImode:
24961	  {
24962	    rtx operands[2];
24963	    split_di (&operand, 1, operands, operands + 1);
24964	    emit_insn (
24965			gen_rtx_SET (VOIDmode,
24966				     gen_rtx_MEM (SImode,
24967						  gen_rtx_PRE_DEC (Pmode,
24968							stack_pointer_rtx)),
24969				     operands[1]));
24970	    emit_insn (
24971			gen_rtx_SET (VOIDmode,
24972				     gen_rtx_MEM (SImode,
24973						  gen_rtx_PRE_DEC (Pmode,
24974							stack_pointer_rtx)),
24975				     operands[0]));
24976	  }
24977	  break;
24978	case HImode:
24979	  /* Store HImodes as SImodes.  */
24980	  operand = gen_lowpart (SImode, operand);
24981	  /* FALLTHRU */
24982	case SImode:
24983	  emit_insn (
24984		      gen_rtx_SET (VOIDmode,
24985				   gen_rtx_MEM (GET_MODE (operand),
24986						gen_rtx_PRE_DEC (SImode,
24987							stack_pointer_rtx)),
24988				   operand));
24989	  break;
24990	default:
24991	  gcc_unreachable ();
24992	}
24993      result = gen_rtx_MEM (mode, stack_pointer_rtx);
24994    }
24995  return result;
24996}
24997
24998/* Free operand from the memory.  */
24999void
25000ix86_free_from_memory (enum machine_mode mode)
25001{
25002  if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25003    {
25004      int size;
25005
25006      if (mode == DImode || TARGET_64BIT)
25007	size = 8;
25008      else
25009	size = 4;
25010      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
25011         to pop or add instruction if registers are available.  */
25012      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25013			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25014					    GEN_INT (size))));
25015    }
25016}
25017
25018/* Implement TARGET_IRA_COVER_CLASSES.  If -mfpmath=sse, we prefer
25019   SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
25020   same.  */
25021static const enum reg_class *
25022i386_ira_cover_classes (void)
25023{
25024  static const enum reg_class sse_fpmath_classes[] = {
25025    GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
25026  };
25027  static const enum reg_class no_sse_fpmath_classes[] = {
25028    GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
25029  };
25030
25031 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
25032}
25033
25034/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25035   QImode must go into class Q_REGS.
25036   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
25037   movdf to do mem-to-mem moves through integer regs.  */
25038enum reg_class
25039ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25040{
25041  enum machine_mode mode = GET_MODE (x);
25042
25043  /* We're only allowed to return a subclass of CLASS.  Many of the
25044     following checks fail for NO_REGS, so eliminate that early.  */
25045  if (regclass == NO_REGS)
25046    return NO_REGS;
25047
25048  /* All classes can load zeros.  */
25049  if (x == CONST0_RTX (mode))
25050    return regclass;
25051
25052  /* Force constants into memory if we are loading a (nonzero) constant into
25053     an MMX or SSE register.  This is because there are no MMX/SSE instructions
25054     to load from a constant.  */
25055  if (CONSTANT_P (x)
25056      && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25057    return NO_REGS;
25058
25059  /* Prefer SSE regs only, if we can use them for math.  */
25060  if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25061    return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25062
25063  /* Floating-point constants need more complex checks.  */
25064  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25065    {
25066      /* General regs can load everything.  */
25067      if (reg_class_subset_p (regclass, GENERAL_REGS))
25068        return regclass;
25069
25070      /* Floats can load 0 and 1 plus some others.  Note that we eliminated
25071	 zero above.  We only want to wind up preferring 80387 registers if
25072	 we plan on doing computation with them.  */
25073      if (TARGET_80387
25074	  && standard_80387_constant_p (x))
25075	{
25076	  /* Limit class to non-sse.  */
25077	  if (regclass == FLOAT_SSE_REGS)
25078	    return FLOAT_REGS;
25079	  if (regclass == FP_TOP_SSE_REGS)
25080	    return FP_TOP_REG;
25081	  if (regclass == FP_SECOND_SSE_REGS)
25082	    return FP_SECOND_REG;
25083	  if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25084	    return regclass;
25085	}
25086
25087      return NO_REGS;
25088    }
25089
25090  /* Generally when we see PLUS here, it's the function invariant
25091     (plus soft-fp const_int).  Which can only be computed into general
25092     regs.  */
25093  if (GET_CODE (x) == PLUS)
25094    return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25095
25096  /* QImode constants are easy to load, but non-constant QImode data
25097     must go into Q_REGS.  */
25098  if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25099    {
25100      if (reg_class_subset_p (regclass, Q_REGS))
25101	return regclass;
25102      if (reg_class_subset_p (Q_REGS, regclass))
25103	return Q_REGS;
25104      return NO_REGS;
25105    }
25106
25107  return regclass;
25108}
25109
25110/* Discourage putting floating-point values in SSE registers unless
25111   SSE math is being used, and likewise for the 387 registers.  */
25112enum reg_class
25113ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25114{
25115  enum machine_mode mode = GET_MODE (x);
25116
25117  /* Restrict the output reload class to the register bank that we are doing
25118     math on.  If we would like not to return a subset of CLASS, reject this
25119     alternative: if reload cannot do this, it will still use its choice.  */
25120  mode = GET_MODE (x);
25121  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25122    return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25123
25124  if (X87_FLOAT_MODE_P (mode))
25125    {
25126      if (regclass == FP_TOP_SSE_REGS)
25127	return FP_TOP_REG;
25128      else if (regclass == FP_SECOND_SSE_REGS)
25129	return FP_SECOND_REG;
25130      else
25131	return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25132    }
25133
25134  return regclass;
25135}
25136
25137static enum reg_class
25138ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25139		       enum machine_mode mode,
25140		       secondary_reload_info *sri ATTRIBUTE_UNUSED)
25141{
25142  /* QImode spills from non-QI registers require
25143     intermediate register on 32bit targets.  */
25144  if (!TARGET_64BIT
25145      && !in_p && mode == QImode
25146      && (rclass == GENERAL_REGS
25147	  || rclass == LEGACY_REGS
25148	  || rclass == INDEX_REGS))
25149    {
25150      int regno;
25151
25152      if (REG_P (x))
25153	regno = REGNO (x);
25154      else
25155	regno = -1;
25156
25157      if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25158	regno = true_regnum (x);
25159
25160      /* Return Q_REGS if the operand is in memory.  */
25161      if (regno == -1)
25162	return Q_REGS;
25163    }
25164
25165  /* This condition handles corner case where an expression involving
25166     pointers gets vectorized.  We're trying to use the address of a
25167     stack slot as a vector initializer.
25168
25169     (set (reg:V2DI 74 [ vect_cst_.2 ])
25170          (vec_duplicate:V2DI (reg/f:DI 20 frame)))
25171
25172     Eventually frame gets turned into sp+offset like this:
25173
25174     (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
25175          (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
25176	                               (const_int 392 [0x188]))))
25177
25178     That later gets turned into:
25179
25180     (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
25181          (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
25182	    (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
25183
25184     We'll have the following reload recorded:
25185
25186     Reload 0: reload_in (DI) =
25187           (plus:DI (reg/f:DI 7 sp)
25188            (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
25189     reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
25190     SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
25191     reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
25192     reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
25193     reload_reg_rtx: (reg:V2DI 22 xmm1)
25194
25195     Which isn't going to work since SSE instructions can't handle scalar
25196     additions.  Returning GENERAL_REGS forces the addition into integer
25197     register and reload can handle subsequent reloads without problems.  */
25198
25199  if (in_p && GET_CODE (x) == PLUS
25200      && SSE_CLASS_P (rclass)
25201      && SCALAR_INT_MODE_P (mode))
25202    return GENERAL_REGS;
25203
25204  return NO_REGS;
25205}
25206
25207/* If we are copying between general and FP registers, we need a memory
25208   location. The same is true for SSE and MMX registers.
25209
25210   To optimize register_move_cost performance, allow inline variant.
25211
25212   The macro can't work reliably when one of the CLASSES is class containing
25213   registers from multiple units (SSE, MMX, integer).  We avoid this by never
25214   combining those units in single alternative in the machine description.
25215   Ensure that this constraint holds to avoid unexpected surprises.
25216
25217   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25218   enforce these sanity checks.  */
25219
25220static inline int
25221inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25222			      enum machine_mode mode, int strict)
25223{
25224  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25225      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25226      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25227      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25228      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25229      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25230    {
25231      gcc_assert (!strict);
25232      return true;
25233    }
25234
25235  if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25236    return true;
25237
25238  /* ??? This is a lie.  We do have moves between mmx/general, and for
25239     mmx/sse2.  But by saying we need secondary memory we discourage the
25240     register allocator from using the mmx registers unless needed.  */
25241  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25242    return true;
25243
25244  if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25245    {
25246      /* SSE1 doesn't have any direct moves from other classes.  */
25247      if (!TARGET_SSE2)
25248	return true;
25249
25250      /* If the target says that inter-unit moves are more expensive
25251	 than moving through memory, then don't generate them.  */
25252      if (!TARGET_INTER_UNIT_MOVES)
25253	return true;
25254
25255      /* Between SSE and general, we have moves no larger than word size.  */
25256      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25257	return true;
25258    }
25259
25260  return false;
25261}
25262
25263int
25264ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25265			      enum machine_mode mode, int strict)
25266{
25267  return inline_secondary_memory_needed (class1, class2, mode, strict);
25268}
25269
25270/* Return true if the registers in CLASS cannot represent the change from
25271   modes FROM to TO.  */
25272
25273bool
25274ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25275			       enum reg_class regclass)
25276{
25277  if (from == to)
25278    return false;
25279
25280  /* x87 registers can't do subreg at all, as all values are reformatted
25281     to extended precision.  */
25282  if (MAYBE_FLOAT_CLASS_P (regclass))
25283    return true;
25284
25285  if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25286    {
25287      /* Vector registers do not support QI or HImode loads.  If we don't
25288	 disallow a change to these modes, reload will assume it's ok to
25289	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
25290	 the vec_dupv4hi pattern.  */
25291      if (GET_MODE_SIZE (from) < 4)
25292	return true;
25293
25294      /* Vector registers do not support subreg with nonzero offsets, which
25295	 are otherwise valid for integer registers.  Since we can't see
25296	 whether we have a nonzero offset from here, prohibit all
25297         nonparadoxical subregs changing size.  */
25298      if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25299	return true;
25300    }
25301
25302  return false;
25303}
25304
25305/* Return the cost of moving data of mode M between a
25306   register and memory.  A value of 2 is the default; this cost is
25307   relative to those in `REGISTER_MOVE_COST'.
25308
25309   This function is used extensively by register_move_cost that is used to
25310   build tables at startup.  Make it inline in this case.
25311   When IN is 2, return maximum of in and out move cost.
25312
25313   If moving between registers and memory is more expensive than
25314   between two registers, you should define this macro to express the
25315   relative cost.
25316
25317   Model also increased moving costs of QImode registers in non
25318   Q_REGS classes.
25319 */
25320static inline int
25321inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
25322			 int in)
25323{
25324  int cost;
25325  if (FLOAT_CLASS_P (regclass))
25326    {
25327      int index;
25328      switch (mode)
25329	{
25330	  case SFmode:
25331	    index = 0;
25332	    break;
25333	  case DFmode:
25334	    index = 1;
25335	    break;
25336	  case XFmode:
25337	    index = 2;
25338	    break;
25339	  default:
25340	    return 100;
25341	}
25342      if (in == 2)
25343        return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
25344      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
25345    }
25346  if (SSE_CLASS_P (regclass))
25347    {
25348      int index;
25349      switch (GET_MODE_SIZE (mode))
25350	{
25351	  case 4:
25352	    index = 0;
25353	    break;
25354	  case 8:
25355	    index = 1;
25356	    break;
25357	  case 16:
25358	    index = 2;
25359	    break;
25360	  default:
25361	    return 100;
25362	}
25363      if (in == 2)
25364        return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
25365      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
25366    }
25367  if (MMX_CLASS_P (regclass))
25368    {
25369      int index;
25370      switch (GET_MODE_SIZE (mode))
25371	{
25372	  case 4:
25373	    index = 0;
25374	    break;
25375	  case 8:
25376	    index = 1;
25377	    break;
25378	  default:
25379	    return 100;
25380	}
25381      if (in)
25382        return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25383      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25384    }
25385  switch (GET_MODE_SIZE (mode))
25386    {
25387      case 1:
25388	if (Q_CLASS_P (regclass) || TARGET_64BIT)
25389	  {
25390	    if (!in)
25391	      return ix86_cost->int_store[0];
25392	    if (TARGET_PARTIAL_REG_DEPENDENCY
25393	        && optimize_function_for_speed_p (cfun))
25394	      cost = ix86_cost->movzbl_load;
25395	    else
25396	      cost = ix86_cost->int_load[0];
25397	    if (in == 2)
25398	      return MAX (cost, ix86_cost->int_store[0]);
25399	    return cost;
25400	  }
25401	else
25402	  {
25403	   if (in == 2)
25404	     return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25405	   if (in)
25406	     return ix86_cost->movzbl_load;
25407	   else
25408	     return ix86_cost->int_store[0] + 4;
25409	  }
25410	break;
25411      case 2:
25412	if (in == 2)
25413	  return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25414	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25415      default:
25416	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
25417	if (mode == TFmode)
25418	  mode = XFmode;
25419	if (in == 2)
25420	  cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25421	else if (in)
25422	  cost = ix86_cost->int_load[2];
25423	else
25424	  cost = ix86_cost->int_store[2];
25425	return (cost * (((int) GET_MODE_SIZE (mode)
25426		        + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25427    }
25428}
25429
25430int
25431ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25432{
25433  return inline_memory_move_cost (mode, regclass, in);
25434}
25435
25436
25437/* Return the cost of moving data from a register in class CLASS1 to
25438   one in class CLASS2.
25439
25440   It is not required that the cost always equal 2 when FROM is the same as TO;
25441   on some machines it is expensive to move between registers if they are not
25442   general registers.  */
25443
25444int
25445ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25446			 enum reg_class class2)
25447{
25448  /* In case we require secondary memory, compute cost of the store followed
25449     by load.  In order to avoid bad register allocation choices, we need
25450     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
25451
25452  if (inline_secondary_memory_needed (class1, class2, mode, 0))
25453    {
25454      int cost = 1;
25455
25456      cost += inline_memory_move_cost (mode, class1, 2);
25457      cost += inline_memory_move_cost (mode, class2, 2);
25458
25459      /* In case of copying from general_purpose_register we may emit multiple
25460         stores followed by single load causing memory size mismatch stall.
25461         Count this as arbitrarily high cost of 20.  */
25462      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25463	cost += 20;
25464
25465      /* In the case of FP/MMX moves, the registers actually overlap, and we
25466	 have to switch modes in order to treat them differently.  */
25467      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25468          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25469	cost += 20;
25470
25471      return cost;
25472    }
25473
25474  /* Moves between SSE/MMX and integer unit are expensive.  */
25475  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25476      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25477
25478    /* ??? By keeping returned value relatively high, we limit the number
25479       of moves between integer and MMX/SSE registers for all targets.
25480       Additionally, high value prevents problem with x86_modes_tieable_p(),
25481       where integer modes in MMX/SSE registers are not tieable
25482       because of missing QImode and HImode moves to, from or between
25483       MMX/SSE registers.  */
25484    return MAX (8, ix86_cost->mmxsse_to_integer);
25485
25486  if (MAYBE_FLOAT_CLASS_P (class1))
25487    return ix86_cost->fp_move;
25488  if (MAYBE_SSE_CLASS_P (class1))
25489    return ix86_cost->sse_move;
25490  if (MAYBE_MMX_CLASS_P (class1))
25491    return ix86_cost->mmx_move;
25492  return 2;
25493}
25494
25495/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
25496
25497bool
25498ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
25499{
25500  /* Flags and only flags can only hold CCmode values.  */
25501  if (CC_REGNO_P (regno))
25502    return GET_MODE_CLASS (mode) == MODE_CC;
25503  if (GET_MODE_CLASS (mode) == MODE_CC
25504      || GET_MODE_CLASS (mode) == MODE_RANDOM
25505      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
25506    return 0;
25507  if (FP_REGNO_P (regno))
25508    return VALID_FP_MODE_P (mode);
25509  if (SSE_REGNO_P (regno))
25510    {
25511      /* We implement the move patterns for all vector modes into and
25512	 out of SSE registers, even when no operation instructions
25513	 are available.  OImode move is available only when AVX is
25514	 enabled.  */
25515      return ((TARGET_AVX && mode == OImode)
25516	      || VALID_AVX256_REG_MODE (mode)
25517	      || VALID_SSE_REG_MODE (mode)
25518	      || VALID_SSE2_REG_MODE (mode)
25519	      || VALID_MMX_REG_MODE (mode)
25520	      || VALID_MMX_REG_MODE_3DNOW (mode));
25521    }
25522  if (MMX_REGNO_P (regno))
25523    {
25524      /* We implement the move patterns for 3DNOW modes even in MMX mode,
25525	 so if the register is available at all, then we can move data of
25526	 the given mode into or out of it.  */
25527      return (VALID_MMX_REG_MODE (mode)
25528	      || VALID_MMX_REG_MODE_3DNOW (mode));
25529    }
25530
25531  if (mode == QImode)
25532    {
25533      /* Take care for QImode values - they can be in non-QI regs,
25534	 but then they do cause partial register stalls.  */
25535      if (regno <= BX_REG || TARGET_64BIT)
25536	return 1;
25537      if (!TARGET_PARTIAL_REG_STALL)
25538	return 1;
25539      return reload_in_progress || reload_completed;
25540    }
25541  /* We handle both integer and floats in the general purpose registers.  */
25542  else if (VALID_INT_MODE_P (mode))
25543    return 1;
25544  else if (VALID_FP_MODE_P (mode))
25545    return 1;
25546  else if (VALID_DFP_MODE_P (mode))
25547    return 1;
25548  /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
25549     on to use that value in smaller contexts, this can easily force a
25550     pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
25551     supporting DImode, allow it.  */
25552  else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
25553    return 1;
25554
25555  return 0;
25556}
25557
25558/* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
25559   tieable integer mode.  */
25560
25561static bool
25562ix86_tieable_integer_mode_p (enum machine_mode mode)
25563{
25564  switch (mode)
25565    {
25566    case HImode:
25567    case SImode:
25568      return true;
25569
25570    case QImode:
25571      return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
25572
25573    case DImode:
25574      return TARGET_64BIT;
25575
25576    default:
25577      return false;
25578    }
25579}
25580
25581/* Return true if MODE1 is accessible in a register that can hold MODE2
25582   without copying.  That is, all register classes that can hold MODE2
25583   can also hold MODE1.  */
25584
25585bool
25586ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
25587{
25588  if (mode1 == mode2)
25589    return true;
25590
25591  if (ix86_tieable_integer_mode_p (mode1)
25592      && ix86_tieable_integer_mode_p (mode2))
25593    return true;
25594
25595  /* MODE2 being XFmode implies fp stack or general regs, which means we
25596     can tie any smaller floating point modes to it.  Note that we do not
25597     tie this with TFmode.  */
25598  if (mode2 == XFmode)
25599    return mode1 == SFmode || mode1 == DFmode;
25600
25601  /* MODE2 being DFmode implies fp stack, general or sse regs, which means
25602     that we can tie it with SFmode.  */
25603  if (mode2 == DFmode)
25604    return mode1 == SFmode;
25605
25606  /* If MODE2 is only appropriate for an SSE register, then tie with
25607     any other mode acceptable to SSE registers.  */
25608  if (GET_MODE_SIZE (mode2) == 16
25609      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
25610    return (GET_MODE_SIZE (mode1) == 16
25611	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
25612
25613  /* If MODE2 is appropriate for an MMX register, then tie
25614     with any other mode acceptable to MMX registers.  */
25615  if (GET_MODE_SIZE (mode2) == 8
25616      && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
25617    return (GET_MODE_SIZE (mode1) == 8
25618	    && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
25619
25620  return false;
25621}
25622
25623/* Compute a (partial) cost for rtx X.  Return true if the complete
25624   cost has been computed, and false if subexpressions should be
25625   scanned.  In either case, *TOTAL contains the cost result.  */
25626
25627static bool
25628ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
25629{
25630  enum rtx_code outer_code = (enum rtx_code) outer_code_i;
25631  enum machine_mode mode = GET_MODE (x);
25632  const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
25633
25634  switch (code)
25635    {
25636    case CONST_INT:
25637    case CONST:
25638    case LABEL_REF:
25639    case SYMBOL_REF:
25640      if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
25641	*total = 3;
25642      else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
25643	*total = 2;
25644      else if (flag_pic && SYMBOLIC_CONST (x)
25645	       && (!TARGET_64BIT
25646		   || (!GET_CODE (x) != LABEL_REF
25647		       && (GET_CODE (x) != SYMBOL_REF
25648		           || !SYMBOL_REF_LOCAL_P (x)))))
25649	*total = 1;
25650      else
25651	*total = 0;
25652      return true;
25653
25654    case CONST_DOUBLE:
25655      if (mode == VOIDmode)
25656	*total = 0;
25657      else
25658	switch (standard_80387_constant_p (x))
25659	  {
25660	  case 1: /* 0.0 */
25661	    *total = 1;
25662	    break;
25663	  default: /* Other constants */
25664	    *total = 2;
25665	    break;
25666	  case 0:
25667	  case -1:
25668	    /* Start with (MEM (SYMBOL_REF)), since that's where
25669	       it'll probably end up.  Add a penalty for size.  */
25670	    *total = (COSTS_N_INSNS (1)
25671		      + (flag_pic != 0 && !TARGET_64BIT)
25672		      + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
25673	    break;
25674	  }
25675      return true;
25676
25677    case ZERO_EXTEND:
25678      /* The zero extensions is often completely free on x86_64, so make
25679	 it as cheap as possible.  */
25680      if (TARGET_64BIT && mode == DImode
25681	  && GET_MODE (XEXP (x, 0)) == SImode)
25682	*total = 1;
25683      else if (TARGET_ZERO_EXTEND_WITH_AND)
25684	*total = cost->add;
25685      else
25686	*total = cost->movzx;
25687      return false;
25688
25689    case SIGN_EXTEND:
25690      *total = cost->movsx;
25691      return false;
25692
25693    case ASHIFT:
25694      if (CONST_INT_P (XEXP (x, 1))
25695	  && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
25696	{
25697	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25698	  if (value == 1)
25699	    {
25700	      *total = cost->add;
25701	      return false;
25702	    }
25703	  if ((value == 2 || value == 3)
25704	      && cost->lea <= cost->shift_const)
25705	    {
25706	      *total = cost->lea;
25707	      return false;
25708	    }
25709	}
25710      /* FALLTHRU */
25711
25712    case ROTATE:
25713    case ASHIFTRT:
25714    case LSHIFTRT:
25715    case ROTATERT:
25716      if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
25717	{
25718	  if (CONST_INT_P (XEXP (x, 1)))
25719	    {
25720	      if (INTVAL (XEXP (x, 1)) > 32)
25721		*total = cost->shift_const + COSTS_N_INSNS (2);
25722	      else
25723		*total = cost->shift_const * 2;
25724	    }
25725	  else
25726	    {
25727	      if (GET_CODE (XEXP (x, 1)) == AND)
25728		*total = cost->shift_var * 2;
25729	      else
25730		*total = cost->shift_var * 6 + COSTS_N_INSNS (2);
25731	    }
25732	}
25733      else
25734	{
25735	  if (CONST_INT_P (XEXP (x, 1)))
25736	    *total = cost->shift_const;
25737	  else
25738	    *total = cost->shift_var;
25739	}
25740      return false;
25741
25742    case MULT:
25743      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25744	{
25745	  /* ??? SSE scalar cost should be used here.  */
25746	  *total = cost->fmul;
25747	  return false;
25748	}
25749      else if (X87_FLOAT_MODE_P (mode))
25750	{
25751	  *total = cost->fmul;
25752	  return false;
25753	}
25754      else if (FLOAT_MODE_P (mode))
25755	{
25756	  /* ??? SSE vector cost should be used here.  */
25757	  *total = cost->fmul;
25758	  return false;
25759	}
25760      else
25761	{
25762	  rtx op0 = XEXP (x, 0);
25763	  rtx op1 = XEXP (x, 1);
25764	  int nbits;
25765	  if (CONST_INT_P (XEXP (x, 1)))
25766	    {
25767	      unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25768	      for (nbits = 0; value != 0; value &= value - 1)
25769	        nbits++;
25770	    }
25771	  else
25772	    /* This is arbitrary.  */
25773	    nbits = 7;
25774
25775	  /* Compute costs correctly for widening multiplication.  */
25776	  if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
25777	      && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
25778	         == GET_MODE_SIZE (mode))
25779	    {
25780	      int is_mulwiden = 0;
25781	      enum machine_mode inner_mode = GET_MODE (op0);
25782
25783	      if (GET_CODE (op0) == GET_CODE (op1))
25784		is_mulwiden = 1, op1 = XEXP (op1, 0);
25785	      else if (CONST_INT_P (op1))
25786		{
25787		  if (GET_CODE (op0) == SIGN_EXTEND)
25788		    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
25789			          == INTVAL (op1);
25790		  else
25791		    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
25792	        }
25793
25794	      if (is_mulwiden)
25795	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
25796	    }
25797
25798  	  *total = (cost->mult_init[MODE_INDEX (mode)]
25799		    + nbits * cost->mult_bit
25800	            + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
25801
25802          return true;
25803	}
25804
25805    case DIV:
25806    case UDIV:
25807    case MOD:
25808    case UMOD:
25809      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25810	/* ??? SSE cost should be used here.  */
25811	*total = cost->fdiv;
25812      else if (X87_FLOAT_MODE_P (mode))
25813	*total = cost->fdiv;
25814      else if (FLOAT_MODE_P (mode))
25815	/* ??? SSE vector cost should be used here.  */
25816	*total = cost->fdiv;
25817      else
25818	*total = cost->divide[MODE_INDEX (mode)];
25819      return false;
25820
25821    case PLUS:
25822      if (GET_MODE_CLASS (mode) == MODE_INT
25823	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
25824	{
25825	  if (GET_CODE (XEXP (x, 0)) == PLUS
25826	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
25827	      && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
25828	      && CONSTANT_P (XEXP (x, 1)))
25829	    {
25830	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
25831	      if (val == 2 || val == 4 || val == 8)
25832		{
25833		  *total = cost->lea;
25834		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25835		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
25836				      outer_code, speed);
25837		  *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25838		  return true;
25839		}
25840	    }
25841	  else if (GET_CODE (XEXP (x, 0)) == MULT
25842		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
25843	    {
25844	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
25845	      if (val == 2 || val == 4 || val == 8)
25846		{
25847		  *total = cost->lea;
25848		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25849		  *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25850		  return true;
25851		}
25852	    }
25853	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
25854	    {
25855	      *total = cost->lea;
25856	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25857	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25858	      *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25859	      return true;
25860	    }
25861	}
25862      /* FALLTHRU */
25863
25864    case MINUS:
25865      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25866	{
25867	  /* ??? SSE cost should be used here.  */
25868	  *total = cost->fadd;
25869	  return false;
25870	}
25871      else if (X87_FLOAT_MODE_P (mode))
25872	{
25873	  *total = cost->fadd;
25874	  return false;
25875	}
25876      else if (FLOAT_MODE_P (mode))
25877	{
25878	  /* ??? SSE vector cost should be used here.  */
25879	  *total = cost->fadd;
25880	  return false;
25881	}
25882      /* FALLTHRU */
25883
25884    case AND:
25885    case IOR:
25886    case XOR:
25887      if (!TARGET_64BIT && mode == DImode)
25888	{
25889	  *total = (cost->add * 2
25890		    + (rtx_cost (XEXP (x, 0), outer_code, speed)
25891		       << (GET_MODE (XEXP (x, 0)) != DImode))
25892		    + (rtx_cost (XEXP (x, 1), outer_code, speed)
25893	               << (GET_MODE (XEXP (x, 1)) != DImode)));
25894	  return true;
25895	}
25896      /* FALLTHRU */
25897
25898    case NEG:
25899      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25900	{
25901	  /* ??? SSE cost should be used here.  */
25902	  *total = cost->fchs;
25903	  return false;
25904	}
25905      else if (X87_FLOAT_MODE_P (mode))
25906	{
25907	  *total = cost->fchs;
25908	  return false;
25909	}
25910      else if (FLOAT_MODE_P (mode))
25911	{
25912	  /* ??? SSE vector cost should be used here.  */
25913	  *total = cost->fchs;
25914	  return false;
25915	}
25916      /* FALLTHRU */
25917
25918    case NOT:
25919      if (!TARGET_64BIT && mode == DImode)
25920	*total = cost->add * 2;
25921      else
25922	*total = cost->add;
25923      return false;
25924
25925    case COMPARE:
25926      if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
25927	  && XEXP (XEXP (x, 0), 1) == const1_rtx
25928	  && CONST_INT_P (XEXP (XEXP (x, 0), 2))
25929	  && XEXP (x, 1) == const0_rtx)
25930	{
25931	  /* This kind of construct is implemented using test[bwl].
25932	     Treat it as if we had an AND.  */
25933	  *total = (cost->add
25934		    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
25935		    + rtx_cost (const1_rtx, outer_code, speed));
25936	  return true;
25937	}
25938      return false;
25939
25940    case FLOAT_EXTEND:
25941      if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
25942	*total = 0;
25943      return false;
25944
25945    case ABS:
25946      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25947	/* ??? SSE cost should be used here.  */
25948	*total = cost->fabs;
25949      else if (X87_FLOAT_MODE_P (mode))
25950	*total = cost->fabs;
25951      else if (FLOAT_MODE_P (mode))
25952	/* ??? SSE vector cost should be used here.  */
25953	*total = cost->fabs;
25954      return false;
25955
25956    case SQRT:
25957      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25958	/* ??? SSE cost should be used here.  */
25959	*total = cost->fsqrt;
25960      else if (X87_FLOAT_MODE_P (mode))
25961	*total = cost->fsqrt;
25962      else if (FLOAT_MODE_P (mode))
25963	/* ??? SSE vector cost should be used here.  */
25964	*total = cost->fsqrt;
25965      return false;
25966
25967    case UNSPEC:
25968      if (XINT (x, 1) == UNSPEC_TP)
25969	*total = 0;
25970      return false;
25971
25972    case VEC_SELECT:
25973    case VEC_CONCAT:
25974    case VEC_MERGE:
25975    case VEC_DUPLICATE:
25976      /* ??? Assume all of these vector manipulation patterns are
25977	 recognizable.  In which case they all pretty much have the
25978	 same cost.  */
25979     *total = COSTS_N_INSNS (1);
25980     return true;
25981
25982    default:
25983      return false;
25984    }
25985}
25986
25987#if TARGET_MACHO
25988
25989static int current_machopic_label_num;
25990
25991/* Given a symbol name and its associated stub, write out the
25992   definition of the stub.  */
25993
25994void
25995machopic_output_stub (FILE *file, const char *symb, const char *stub)
25996{
25997  unsigned int length;
25998  char *binder_name, *symbol_name, lazy_ptr_name[32];
25999  int label = ++current_machopic_label_num;
26000
26001  /* For 64-bit we shouldn't get here.  */
26002  gcc_assert (!TARGET_64BIT);
26003
26004  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
26005  symb = (*targetm.strip_name_encoding) (symb);
26006
26007  length = strlen (stub);
26008  binder_name = XALLOCAVEC (char, length + 32);
26009  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26010
26011  length = strlen (symb);
26012  symbol_name = XALLOCAVEC (char, length + 32);
26013  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26014
26015  sprintf (lazy_ptr_name, "L%d$lz", label);
26016
26017  if (MACHOPIC_PURE)
26018    switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26019  else
26020    switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26021
26022  fprintf (file, "%s:\n", stub);
26023  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26024
26025  if (MACHOPIC_PURE)
26026    {
26027      fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26028      fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26029      fprintf (file, "\tjmp\t*%%edx\n");
26030    }
26031  else
26032    fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26033
26034  fprintf (file, "%s:\n", binder_name);
26035
26036  if (MACHOPIC_PURE)
26037    {
26038      fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26039      fputs ("\tpushl\t%eax\n", file);
26040    }
26041  else
26042    fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26043
26044  fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
26045
26046  switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26047  fprintf (file, "%s:\n", lazy_ptr_name);
26048  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26049  fprintf (file, ASM_LONG "%s\n", binder_name);
26050}
26051#endif /* TARGET_MACHO */
26052
26053/* Order the registers for register allocator.  */
26054
26055void
26056x86_order_regs_for_local_alloc (void)
26057{
26058   int pos = 0;
26059   int i;
26060
26061   /* First allocate the local general purpose registers.  */
26062   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26063     if (GENERAL_REGNO_P (i) && call_used_regs[i])
26064	reg_alloc_order [pos++] = i;
26065
26066   /* Global general purpose registers.  */
26067   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26068     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26069	reg_alloc_order [pos++] = i;
26070
26071   /* x87 registers come first in case we are doing FP math
26072      using them.  */
26073   if (!TARGET_SSE_MATH)
26074     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26075       reg_alloc_order [pos++] = i;
26076
26077   /* SSE registers.  */
26078   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26079     reg_alloc_order [pos++] = i;
26080   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26081     reg_alloc_order [pos++] = i;
26082
26083   /* x87 registers.  */
26084   if (TARGET_SSE_MATH)
26085     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26086       reg_alloc_order [pos++] = i;
26087
26088   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26089     reg_alloc_order [pos++] = i;
26090
26091   /* Initialize the rest of array as we do not allocate some registers
26092      at all.  */
26093   while (pos < FIRST_PSEUDO_REGISTER)
26094     reg_alloc_order [pos++] = 0;
26095}
26096
26097/* Handle a "ms_abi" or "sysv" attribute; arguments as in
26098   struct attribute_spec.handler.  */
26099static tree
26100ix86_handle_abi_attribute (tree *node, tree name,
26101			      tree args ATTRIBUTE_UNUSED,
26102			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26103{
26104  if (TREE_CODE (*node) != FUNCTION_TYPE
26105      && TREE_CODE (*node) != METHOD_TYPE
26106      && TREE_CODE (*node) != FIELD_DECL
26107      && TREE_CODE (*node) != TYPE_DECL)
26108    {
26109      warning (OPT_Wattributes, "%qE attribute only applies to functions",
26110	       name);
26111      *no_add_attrs = true;
26112      return NULL_TREE;
26113    }
26114  if (!TARGET_64BIT)
26115    {
26116      warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
26117	       name);
26118      *no_add_attrs = true;
26119      return NULL_TREE;
26120    }
26121
26122  /* Can combine regparm with all attributes but fastcall.  */
26123  if (is_attribute_p ("ms_abi", name))
26124    {
26125      if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26126        {
26127	  error ("ms_abi and sysv_abi attributes are not compatible");
26128	}
26129
26130      return NULL_TREE;
26131    }
26132  else if (is_attribute_p ("sysv_abi", name))
26133    {
26134      if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26135        {
26136	  error ("ms_abi and sysv_abi attributes are not compatible");
26137	}
26138
26139      return NULL_TREE;
26140    }
26141
26142  return NULL_TREE;
26143}
26144
26145/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26146   struct attribute_spec.handler.  */
26147static tree
26148ix86_handle_struct_attribute (tree *node, tree name,
26149			      tree args ATTRIBUTE_UNUSED,
26150			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26151{
26152  tree *type = NULL;
26153  if (DECL_P (*node))
26154    {
26155      if (TREE_CODE (*node) == TYPE_DECL)
26156	type = &TREE_TYPE (*node);
26157    }
26158  else
26159    type = node;
26160
26161  if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26162		 || TREE_CODE (*type) == UNION_TYPE)))
26163    {
26164      warning (OPT_Wattributes, "%qE attribute ignored",
26165	       name);
26166      *no_add_attrs = true;
26167    }
26168
26169  else if ((is_attribute_p ("ms_struct", name)
26170	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26171	   || ((is_attribute_p ("gcc_struct", name)
26172		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26173    {
26174      warning (OPT_Wattributes, "%qE incompatible attribute ignored",
26175               name);
26176      *no_add_attrs = true;
26177    }
26178
26179  return NULL_TREE;
26180}
26181
26182static tree
26183ix86_handle_fndecl_attribute (tree *node, tree name,
26184                              tree args ATTRIBUTE_UNUSED,
26185                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26186{
26187  if (TREE_CODE (*node) != FUNCTION_DECL)
26188    {
26189      warning (OPT_Wattributes, "%qE attribute only applies to functions",
26190               name);
26191      *no_add_attrs = true;
26192      return NULL_TREE;
26193    }
26194
26195  if (TARGET_64BIT)
26196    {
26197      warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
26198               name);
26199      return NULL_TREE;
26200    }
26201
26202#ifndef HAVE_AS_IX86_SWAP
26203  sorry ("ms_hook_prologue attribute needs assembler swap suffix support");
26204#endif
26205
26206    return NULL_TREE;
26207}
26208
26209static bool
26210ix86_ms_bitfield_layout_p (const_tree record_type)
26211{
26212  return (TARGET_MS_BITFIELD_LAYOUT &&
26213	  !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26214    || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26215}
26216
26217/* Returns an expression indicating where the this parameter is
26218   located on entry to the FUNCTION.  */
26219
26220static rtx
26221x86_this_parameter (tree function)
26222{
26223  tree type = TREE_TYPE (function);
26224  bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26225  int nregs;
26226
26227  if (TARGET_64BIT)
26228    {
26229      const int *parm_regs;
26230
26231      if (ix86_function_type_abi (type) == MS_ABI)
26232        parm_regs = x86_64_ms_abi_int_parameter_registers;
26233      else
26234        parm_regs = x86_64_int_parameter_registers;
26235      return gen_rtx_REG (DImode, parm_regs[aggr]);
26236    }
26237
26238  nregs = ix86_function_regparm (type, function);
26239
26240  if (nregs > 0 && !stdarg_p (type))
26241    {
26242      int regno;
26243
26244      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26245	regno = aggr ? DX_REG : CX_REG;
26246      else
26247        {
26248	  regno = AX_REG;
26249	  if (aggr)
26250	    {
26251	      regno = DX_REG;
26252	      if (nregs == 1)
26253		return gen_rtx_MEM (SImode,
26254				    plus_constant (stack_pointer_rtx, 4));
26255	    }
26256	}
26257      return gen_rtx_REG (SImode, regno);
26258    }
26259
26260  return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26261}
26262
26263/* Determine whether x86_output_mi_thunk can succeed.  */
26264
26265static bool
26266x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26267			 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26268			 HOST_WIDE_INT vcall_offset, const_tree function)
26269{
26270  /* 64-bit can handle anything.  */
26271  if (TARGET_64BIT)
26272    return true;
26273
26274  /* For 32-bit, everything's fine if we have one free register.  */
26275  if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26276    return true;
26277
26278  /* Need a free register for vcall_offset.  */
26279  if (vcall_offset)
26280    return false;
26281
26282  /* Need a free register for GOT references.  */
26283  if (flag_pic && !(*targetm.binds_local_p) (function))
26284    return false;
26285
26286  /* Otherwise ok.  */
26287  return true;
26288}
26289
26290/* Output the assembler code for a thunk function.  THUNK_DECL is the
26291   declaration for the thunk function itself, FUNCTION is the decl for
26292   the target function.  DELTA is an immediate constant offset to be
26293   added to THIS.  If VCALL_OFFSET is nonzero, the word at
26294   *(*this + vcall_offset) should be added to THIS.  */
26295
26296static void
26297x86_output_mi_thunk (FILE *file,
26298		     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26299		     HOST_WIDE_INT vcall_offset, tree function)
26300{
26301  rtx xops[3];
26302  rtx this_param = x86_this_parameter (function);
26303  rtx this_reg, tmp;
26304
26305  /* Make sure unwind info is emitted for the thunk if needed.  */
26306  final_start_function (emit_barrier (), file, 1);
26307
26308  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
26309     pull it in now and let DELTA benefit.  */
26310  if (REG_P (this_param))
26311    this_reg = this_param;
26312  else if (vcall_offset)
26313    {
26314      /* Put the this parameter into %eax.  */
26315      xops[0] = this_param;
26316      xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26317      output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26318    }
26319  else
26320    this_reg = NULL_RTX;
26321
26322  /* Adjust the this parameter by a fixed constant.  */
26323  if (delta)
26324    {
26325      /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
26326         Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
26327      bool sub = delta < 0 || delta == 128;
26328      xops[0] = GEN_INT (sub ? -delta : delta);
26329      xops[1] = this_reg ? this_reg : this_param;
26330      if (TARGET_64BIT)
26331	{
26332	  if (!x86_64_general_operand (xops[0], DImode))
26333	    {
26334	      tmp = gen_rtx_REG (DImode, R10_REG);
26335	      xops[1] = tmp;
26336	      output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26337	      xops[0] = tmp;
26338	      xops[1] = this_param;
26339	    }
26340	  if (sub)
26341	    output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
26342	  else
26343	    output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26344	}
26345      else if (sub)
26346	output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
26347      else
26348	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
26349    }
26350
26351  /* Adjust the this parameter by a value stored in the vtable.  */
26352  if (vcall_offset)
26353    {
26354      if (TARGET_64BIT)
26355	tmp = gen_rtx_REG (DImode, R10_REG);
26356      else
26357	{
26358	  int tmp_regno = CX_REG;
26359	  if (lookup_attribute ("fastcall",
26360				TYPE_ATTRIBUTES (TREE_TYPE (function))))
26361	    tmp_regno = AX_REG;
26362	  tmp = gen_rtx_REG (SImode, tmp_regno);
26363	}
26364
26365      xops[0] = gen_rtx_MEM (Pmode, this_reg);
26366      xops[1] = tmp;
26367      output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26368
26369      /* Adjust the this parameter.  */
26370      xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
26371      if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
26372	{
26373	  rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
26374	  xops[0] = GEN_INT (vcall_offset);
26375	  xops[1] = tmp2;
26376	  output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
26377	  xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
26378	}
26379      xops[1] = this_reg;
26380      output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
26381    }
26382
26383  /* If necessary, drop THIS back to its stack slot.  */
26384  if (this_reg && this_reg != this_param)
26385    {
26386      xops[0] = this_reg;
26387      xops[1] = this_param;
26388      output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26389    }
26390
26391  xops[0] = XEXP (DECL_RTL (function), 0);
26392  if (TARGET_64BIT)
26393    {
26394      if (!flag_pic || (*targetm.binds_local_p) (function))
26395	output_asm_insn ("jmp\t%P0", xops);
26396      /* All thunks should be in the same object as their target,
26397	 and thus binds_local_p should be true.  */
26398      else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
26399	gcc_unreachable ();
26400      else
26401	{
26402	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
26403	  tmp = gen_rtx_CONST (Pmode, tmp);
26404	  tmp = gen_rtx_MEM (QImode, tmp);
26405	  xops[0] = tmp;
26406	  output_asm_insn ("jmp\t%A0", xops);
26407	}
26408    }
26409  else
26410    {
26411      if (!flag_pic || (*targetm.binds_local_p) (function))
26412	output_asm_insn ("jmp\t%P0", xops);
26413      else
26414#if TARGET_MACHO
26415	if (TARGET_MACHO)
26416	  {
26417	    rtx sym_ref = XEXP (DECL_RTL (function), 0);
26418	    tmp = (gen_rtx_SYMBOL_REF
26419		   (Pmode,
26420		    machopic_indirection_name (sym_ref, /*stub_p=*/true)));
26421	    tmp = gen_rtx_MEM (QImode, tmp);
26422	    xops[0] = tmp;
26423	    output_asm_insn ("jmp\t%0", xops);
26424	  }
26425	else
26426#endif /* TARGET_MACHO */
26427	{
26428	  tmp = gen_rtx_REG (SImode, CX_REG);
26429	  output_set_got (tmp, NULL_RTX);
26430
26431	  xops[1] = tmp;
26432	  output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26433	  output_asm_insn ("jmp\t{*}%1", xops);
26434	}
26435    }
26436  final_end_function ();
26437}
26438
26439static void
26440x86_file_start (void)
26441{
26442  default_file_start ();
26443#if TARGET_MACHO
26444  darwin_file_start ();
26445#endif
26446  if (X86_FILE_START_VERSION_DIRECTIVE)
26447    fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26448  if (X86_FILE_START_FLTUSED)
26449    fputs ("\t.global\t__fltused\n", asm_out_file);
26450  if (ix86_asm_dialect == ASM_INTEL)
26451    fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26452}
26453
26454int
26455x86_field_alignment (tree field, int computed)
26456{
26457  enum machine_mode mode;
26458  tree type = TREE_TYPE (field);
26459
26460  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26461    return computed;
26462  mode = TYPE_MODE (strip_array_types (type));
26463  if (mode == DFmode || mode == DCmode
26464      || GET_MODE_CLASS (mode) == MODE_INT
26465      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26466    return MIN (32, computed);
26467  return computed;
26468}
26469
26470/* Output assembler code to FILE to increment profiler label # LABELNO
26471   for profiling a function entry.  */
26472void
26473x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26474{
26475  if (TARGET_64BIT)
26476    {
26477#ifndef NO_PROFILE_COUNTERS
26478      fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
26479#endif
26480
26481      if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26482	fputs ("\tcall\t*" MCOUNT_NAME "@GOTPCREL(%rip)\n", file);
26483      else
26484	fputs ("\tcall\t" MCOUNT_NAME "\n", file);
26485    }
26486  else if (flag_pic)
26487    {
26488#ifndef NO_PROFILE_COUNTERS
26489      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
26490	       LPREFIX, labelno);
26491#endif
26492      fputs ("\tcall\t*" MCOUNT_NAME "@GOT(%ebx)\n", file);
26493    }
26494  else
26495    {
26496#ifndef NO_PROFILE_COUNTERS
26497      fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
26498	       LPREFIX, labelno);
26499#endif
26500      fputs ("\tcall\t" MCOUNT_NAME "\n", file);
26501    }
26502}
26503
26504#ifdef ASM_OUTPUT_MAX_SKIP_PAD
26505/* We don't have exact information about the insn sizes, but we may assume
26506   quite safely that we are informed about all 1 byte insns and memory
26507   address sizes.  This is enough to eliminate unnecessary padding in
26508   99% of cases.  */
26509
26510static int
26511min_insn_size (rtx insn)
26512{
26513  int l = 0, len;
26514
26515  if (!INSN_P (insn) || !active_insn_p (insn))
26516    return 0;
26517
26518  /* Discard alignments we've emit and jump instructions.  */
26519  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26520      && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26521    return 0;
26522  if (JUMP_TABLE_DATA_P (insn))
26523    return 0;
26524
26525  /* Important case - calls are always 5 bytes.
26526     It is common to have many calls in the row.  */
26527  if (CALL_P (insn)
26528      && symbolic_reference_mentioned_p (PATTERN (insn))
26529      && !SIBLING_CALL_P (insn))
26530    return 5;
26531  len = get_attr_length (insn);
26532  if (len <= 1)
26533    return 1;
26534
26535  /* For normal instructions we rely on get_attr_length being exact,
26536     with a few exceptions.  */
26537  if (!JUMP_P (insn))
26538    {
26539      enum attr_type type = get_attr_type (insn);
26540
26541      switch (type)
26542	{
26543	case TYPE_MULTI:
26544	  if (GET_CODE (PATTERN (insn)) == ASM_INPUT
26545	      || asm_noperands (PATTERN (insn)) >= 0)
26546	    return 0;
26547	  break;
26548	case TYPE_OTHER:
26549	case TYPE_FCMP:
26550	  break;
26551	default:
26552	  /* Otherwise trust get_attr_length.  */
26553	  return len;
26554	}
26555
26556      l = get_attr_length_address (insn);
26557      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
26558	l = 4;
26559    }
26560  if (l)
26561    return 1+l;
26562  else
26563    return 2;
26564}
26565
26566/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
26567   window.  */
26568
26569static void
26570ix86_avoid_jump_mispredicts (void)
26571{
26572  rtx insn, start = get_insns ();
26573  int nbytes = 0, njumps = 0;
26574  int isjump = 0;
26575
26576  /* Look for all minimal intervals of instructions containing 4 jumps.
26577     The intervals are bounded by START and INSN.  NBYTES is the total
26578     size of instructions in the interval including INSN and not including
26579     START.  When the NBYTES is smaller than 16 bytes, it is possible
26580     that the end of START and INSN ends up in the same 16byte page.
26581
26582     The smallest offset in the page INSN can start is the case where START
26583     ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
26584     We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
26585     */
26586  for (insn = start; insn; insn = NEXT_INSN (insn))
26587    {
26588      int min_size;
26589
26590      if (LABEL_P (insn))
26591	{
26592	  int align = label_to_alignment (insn);
26593	  int max_skip = label_to_max_skip (insn);
26594
26595	  if (max_skip > 15)
26596	    max_skip = 15;
26597	  /* If align > 3, only up to 16 - max_skip - 1 bytes can be
26598	     already in the current 16 byte page, because otherwise
26599	     ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
26600	     bytes to reach 16 byte boundary.  */
26601	  if (align <= 0
26602	      || (align <= 3 && max_skip != (1 << align) - 1))
26603	    max_skip = 0;
26604	  if (dump_file)
26605	    fprintf (dump_file, "Label %i with max_skip %i\n",
26606		     INSN_UID (insn), max_skip);
26607	  if (max_skip)
26608	    {
26609	      while (nbytes + max_skip >= 16)
26610		{
26611		  start = NEXT_INSN (start);
26612		  if ((JUMP_P (start)
26613		       && GET_CODE (PATTERN (start)) != ADDR_VEC
26614		       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26615		      || CALL_P (start))
26616		    njumps--, isjump = 1;
26617		  else
26618		    isjump = 0;
26619		  nbytes -= min_insn_size (start);
26620		}
26621	    }
26622	  continue;
26623	}
26624
26625      min_size = min_insn_size (insn);
26626      nbytes += min_size;
26627      if (dump_file)
26628	fprintf (dump_file, "Insn %i estimated to %i bytes\n",
26629		 INSN_UID (insn), min_size);
26630      if ((JUMP_P (insn)
26631	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
26632	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
26633	  || CALL_P (insn))
26634	njumps++;
26635      else
26636	continue;
26637
26638      while (njumps > 3)
26639	{
26640	  start = NEXT_INSN (start);
26641	  if ((JUMP_P (start)
26642	       && GET_CODE (PATTERN (start)) != ADDR_VEC
26643	       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26644	      || CALL_P (start))
26645	    njumps--, isjump = 1;
26646	  else
26647	    isjump = 0;
26648	  nbytes -= min_insn_size (start);
26649	}
26650      gcc_assert (njumps >= 0);
26651      if (dump_file)
26652        fprintf (dump_file, "Interval %i to %i has %i bytes\n",
26653		 INSN_UID (start), INSN_UID (insn), nbytes);
26654
26655      if (njumps == 3 && isjump && nbytes < 16)
26656	{
26657	  int padsize = 15 - nbytes + min_insn_size (insn);
26658
26659	  if (dump_file)
26660	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
26661		     INSN_UID (insn), padsize);
26662          emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
26663	}
26664    }
26665}
26666#endif
26667
26668/* AMD Athlon works faster
26669   when RET is not destination of conditional jump or directly preceded
26670   by other jump instruction.  We avoid the penalty by inserting NOP just
26671   before the RET instructions in such cases.  */
26672static void
26673ix86_pad_returns (void)
26674{
26675  edge e;
26676  edge_iterator ei;
26677
26678  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
26679    {
26680      basic_block bb = e->src;
26681      rtx ret = BB_END (bb);
26682      rtx prev;
26683      bool replace = false;
26684
26685      if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
26686	  || optimize_bb_for_size_p (bb))
26687	continue;
26688      for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
26689	if (active_insn_p (prev) || LABEL_P (prev))
26690	  break;
26691      if (prev && LABEL_P (prev))
26692	{
26693	  edge e;
26694	  edge_iterator ei;
26695
26696	  FOR_EACH_EDGE (e, ei, bb->preds)
26697	    if (EDGE_FREQUENCY (e) && e->src->index >= 0
26698		&& !(e->flags & EDGE_FALLTHRU))
26699	      replace = true;
26700	}
26701      if (!replace)
26702	{
26703	  prev = prev_active_insn (ret);
26704	  if (prev
26705	      && ((JUMP_P (prev) && any_condjump_p (prev))
26706		  || CALL_P (prev)))
26707	    replace = true;
26708	  /* Empty functions get branch mispredict even when the jump destination
26709	     is not visible to us.  */
26710	  if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
26711	    replace = true;
26712	}
26713      if (replace)
26714	{
26715	  emit_jump_insn_before (gen_return_internal_long (), ret);
26716	  delete_insn (ret);
26717	}
26718    }
26719}
26720
26721/* Implement machine specific optimizations.  We implement padding of returns
26722   for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
26723static void
26724ix86_reorg (void)
26725{
26726  if (optimize && optimize_function_for_speed_p (cfun))
26727    {
26728      if (TARGET_PAD_RETURNS)
26729	ix86_pad_returns ();
26730#ifdef ASM_OUTPUT_MAX_SKIP_PAD
26731      if (TARGET_FOUR_JUMP_LIMIT)
26732	ix86_avoid_jump_mispredicts ();
26733#endif
26734    }
26735}
26736
26737/* Return nonzero when QImode register that must be represented via REX prefix
26738   is used.  */
26739bool
26740x86_extended_QIreg_mentioned_p (rtx insn)
26741{
26742  int i;
26743  extract_insn_cached (insn);
26744  for (i = 0; i < recog_data.n_operands; i++)
26745    if (REG_P (recog_data.operand[i])
26746	&& REGNO (recog_data.operand[i]) > BX_REG)
26747       return true;
26748  return false;
26749}
26750
26751/* Return nonzero when P points to register encoded via REX prefix.
26752   Called via for_each_rtx.  */
26753static int
26754extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
26755{
26756   unsigned int regno;
26757   if (!REG_P (*p))
26758     return 0;
26759   regno = REGNO (*p);
26760   return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
26761}
26762
26763/* Return true when INSN mentions register that must be encoded using REX
26764   prefix.  */
26765bool
26766x86_extended_reg_mentioned_p (rtx insn)
26767{
26768  return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
26769		       extended_reg_mentioned_1, NULL);
26770}
26771
26772/* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
26773   optabs would emit if we didn't have TFmode patterns.  */
26774
26775void
26776x86_emit_floatuns (rtx operands[2])
26777{
26778  rtx neglab, donelab, i0, i1, f0, in, out;
26779  enum machine_mode mode, inmode;
26780
26781  inmode = GET_MODE (operands[1]);
26782  gcc_assert (inmode == SImode || inmode == DImode);
26783
26784  out = operands[0];
26785  in = force_reg (inmode, operands[1]);
26786  mode = GET_MODE (out);
26787  neglab = gen_label_rtx ();
26788  donelab = gen_label_rtx ();
26789  f0 = gen_reg_rtx (mode);
26790
26791  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
26792
26793  expand_float (out, in, 0);
26794
26795  emit_jump_insn (gen_jump (donelab));
26796  emit_barrier ();
26797
26798  emit_label (neglab);
26799
26800  i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
26801			    1, OPTAB_DIRECT);
26802  i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
26803			    1, OPTAB_DIRECT);
26804  i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
26805
26806  expand_float (f0, i0, 0);
26807
26808  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
26809
26810  emit_label (donelab);
26811}
26812
26813/* AVX does not support 32-byte integer vector operations,
26814   thus the longest vector we are faced with is V16QImode.  */
26815#define MAX_VECT_LEN	16
26816
26817struct expand_vec_perm_d
26818{
26819  rtx target, op0, op1;
26820  unsigned char perm[MAX_VECT_LEN];
26821  enum machine_mode vmode;
26822  unsigned char nelt;
26823  bool testing_p;
26824};
26825
26826static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
26827static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
26828
26829/* Get a vector mode of the same size as the original but with elements
26830   twice as wide.  This is only guaranteed to apply to integral vectors.  */
26831
26832static inline enum machine_mode
26833get_mode_wider_vector (enum machine_mode o)
26834{
26835  /* ??? Rely on the ordering that genmodes.c gives to vectors.  */
26836  enum machine_mode n = GET_MODE_WIDER_MODE (o);
26837  gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
26838  gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
26839  return n;
26840}
26841
26842/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
26843   with all elements equal to VAR.  Return true if successful.  */
26844
26845static bool
26846ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
26847				   rtx target, rtx val)
26848{
26849  bool ok;
26850
26851  switch (mode)
26852    {
26853    case V2SImode:
26854    case V2SFmode:
26855      if (!mmx_ok)
26856	return false;
26857      /* FALLTHRU */
26858
26859    case V4DFmode:
26860    case V4DImode:
26861    case V8SFmode:
26862    case V8SImode:
26863    case V2DFmode:
26864    case V2DImode:
26865    case V4SFmode:
26866    case V4SImode:
26867      {
26868	rtx insn, dup;
26869
26870	/* First attempt to recognize VAL as-is.  */
26871	dup = gen_rtx_VEC_DUPLICATE (mode, val);
26872	insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
26873	if (recog_memoized (insn) < 0)
26874	  {
26875	    rtx seq;
26876	    /* If that fails, force VAL into a register.  */
26877
26878	    start_sequence ();
26879	    XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
26880	    seq = get_insns ();
26881	    end_sequence ();
26882	    if (seq)
26883	      emit_insn_before (seq, insn);
26884
26885	    ok = recog_memoized (insn) >= 0;
26886	    gcc_assert (ok);
26887	  }
26888      }
26889      return true;
26890
26891    case V4HImode:
26892      if (!mmx_ok)
26893	return false;
26894      if (TARGET_SSE || TARGET_3DNOW_A)
26895	{
26896	  rtx x;
26897
26898	  val = gen_lowpart (SImode, val);
26899	  x = gen_rtx_TRUNCATE (HImode, val);
26900	  x = gen_rtx_VEC_DUPLICATE (mode, x);
26901	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
26902	  return true;
26903	}
26904      goto widen;
26905
26906    case V8QImode:
26907      if (!mmx_ok)
26908	return false;
26909      goto widen;
26910
26911    case V8HImode:
26912      if (TARGET_SSE2)
26913	{
26914	  struct expand_vec_perm_d dperm;
26915	  rtx tmp1, tmp2;
26916
26917	permute:
26918	  memset (&dperm, 0, sizeof (dperm));
26919	  dperm.target = target;
26920	  dperm.vmode = mode;
26921	  dperm.nelt = GET_MODE_NUNITS (mode);
26922	  dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
26923
26924	  /* Extend to SImode using a paradoxical SUBREG.  */
26925	  tmp1 = gen_reg_rtx (SImode);
26926	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
26927
26928	  /* Insert the SImode value as low element of a V4SImode vector. */
26929	  tmp2 = gen_lowpart (V4SImode, dperm.op0);
26930	  emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
26931
26932	  ok = (expand_vec_perm_1 (&dperm)
26933		|| expand_vec_perm_broadcast_1 (&dperm));
26934	  gcc_assert (ok);
26935	  return ok;
26936	}
26937      goto widen;
26938
26939    case V16QImode:
26940      if (TARGET_SSE2)
26941	goto permute;
26942      goto widen;
26943
26944    widen:
26945      /* Replicate the value once into the next wider mode and recurse.  */
26946      {
26947	enum machine_mode smode, wsmode, wvmode;
26948	rtx x;
26949
26950	smode = GET_MODE_INNER (mode);
26951	wvmode = get_mode_wider_vector (mode);
26952	wsmode = GET_MODE_INNER (wvmode);
26953
26954	val = convert_modes (wsmode, smode, val, true);
26955	x = expand_simple_binop (wsmode, ASHIFT, val,
26956				 GEN_INT (GET_MODE_BITSIZE (smode)),
26957				 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26958	val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
26959
26960	x = gen_lowpart (wvmode, target);
26961	ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
26962	gcc_assert (ok);
26963	return ok;
26964      }
26965
26966    case V16HImode:
26967    case V32QImode:
26968      {
26969	enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
26970	rtx x = gen_reg_rtx (hvmode);
26971
26972	ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
26973	gcc_assert (ok);
26974
26975	x = gen_rtx_VEC_CONCAT (mode, x, x);
26976	emit_insn (gen_rtx_SET (VOIDmode, target, x));
26977      }
26978      return true;
26979
26980    default:
26981      return false;
26982    }
26983}
26984
26985/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
26986   whose ONE_VAR element is VAR, and other elements are zero.  Return true
26987   if successful.  */
26988
26989static bool
26990ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
26991				     rtx target, rtx var, int one_var)
26992{
26993  enum machine_mode vsimode;
26994  rtx new_target;
26995  rtx x, tmp;
26996  bool use_vector_set = false;
26997
26998  switch (mode)
26999    {
27000    case V2DImode:
27001      /* For SSE4.1, we normally use vector set.  But if the second
27002	 element is zero and inter-unit moves are OK, we use movq
27003	 instead.  */
27004      use_vector_set = (TARGET_64BIT
27005			&& TARGET_SSE4_1
27006			&& !(TARGET_INTER_UNIT_MOVES
27007			     && one_var == 0));
27008      break;
27009    case V16QImode:
27010    case V4SImode:
27011    case V4SFmode:
27012      use_vector_set = TARGET_SSE4_1;
27013      break;
27014    case V8HImode:
27015      use_vector_set = TARGET_SSE2;
27016      break;
27017    case V4HImode:
27018      use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27019      break;
27020    case V32QImode:
27021    case V16HImode:
27022    case V8SImode:
27023    case V8SFmode:
27024    case V4DFmode:
27025      use_vector_set = TARGET_AVX;
27026      break;
27027    case V4DImode:
27028      /* Use ix86_expand_vector_set in 64bit mode only.  */
27029      use_vector_set = TARGET_AVX && TARGET_64BIT;
27030      break;
27031    default:
27032      break;
27033    }
27034
27035  if (use_vector_set)
27036    {
27037      emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27038      var = force_reg (GET_MODE_INNER (mode), var);
27039      ix86_expand_vector_set (mmx_ok, target, var, one_var);
27040      return true;
27041    }
27042
27043  switch (mode)
27044    {
27045    case V2SFmode:
27046    case V2SImode:
27047      if (!mmx_ok)
27048	return false;
27049      /* FALLTHRU */
27050
27051    case V2DFmode:
27052    case V2DImode:
27053      if (one_var != 0)
27054	return false;
27055      var = force_reg (GET_MODE_INNER (mode), var);
27056      x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27057      emit_insn (gen_rtx_SET (VOIDmode, target, x));
27058      return true;
27059
27060    case V4SFmode:
27061    case V4SImode:
27062      if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27063	new_target = gen_reg_rtx (mode);
27064      else
27065	new_target = target;
27066      var = force_reg (GET_MODE_INNER (mode), var);
27067      x = gen_rtx_VEC_DUPLICATE (mode, var);
27068      x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27069      emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27070      if (one_var != 0)
27071	{
27072	  /* We need to shuffle the value to the correct position, so
27073	     create a new pseudo to store the intermediate result.  */
27074
27075	  /* With SSE2, we can use the integer shuffle insns.  */
27076	  if (mode != V4SFmode && TARGET_SSE2)
27077	    {
27078	      emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27079					    const1_rtx,
27080					    GEN_INT (one_var == 1 ? 0 : 1),
27081					    GEN_INT (one_var == 2 ? 0 : 1),
27082					    GEN_INT (one_var == 3 ? 0 : 1)));
27083	      if (target != new_target)
27084		emit_move_insn (target, new_target);
27085	      return true;
27086	    }
27087
27088	  /* Otherwise convert the intermediate result to V4SFmode and
27089	     use the SSE1 shuffle instructions.  */
27090	  if (mode != V4SFmode)
27091	    {
27092	      tmp = gen_reg_rtx (V4SFmode);
27093	      emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27094	    }
27095	  else
27096	    tmp = new_target;
27097
27098	  emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27099				       const1_rtx,
27100				       GEN_INT (one_var == 1 ? 0 : 1),
27101				       GEN_INT (one_var == 2 ? 0+4 : 1+4),
27102				       GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27103
27104	  if (mode != V4SFmode)
27105	    emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27106	  else if (tmp != target)
27107	    emit_move_insn (target, tmp);
27108	}
27109      else if (target != new_target)
27110	emit_move_insn (target, new_target);
27111      return true;
27112
27113    case V8HImode:
27114    case V16QImode:
27115      vsimode = V4SImode;
27116      goto widen;
27117    case V4HImode:
27118    case V8QImode:
27119      if (!mmx_ok)
27120	return false;
27121      vsimode = V2SImode;
27122      goto widen;
27123    widen:
27124      if (one_var != 0)
27125	return false;
27126
27127      /* Zero extend the variable element to SImode and recurse.  */
27128      var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27129
27130      x = gen_reg_rtx (vsimode);
27131      if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27132						var, one_var))
27133	gcc_unreachable ();
27134
27135      emit_move_insn (target, gen_lowpart (mode, x));
27136      return true;
27137
27138    default:
27139      return false;
27140    }
27141}
27142
27143/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
27144   consisting of the values in VALS.  It is known that all elements
27145   except ONE_VAR are constants.  Return true if successful.  */
27146
27147static bool
27148ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27149				 rtx target, rtx vals, int one_var)
27150{
27151  rtx var = XVECEXP (vals, 0, one_var);
27152  enum machine_mode wmode;
27153  rtx const_vec, x;
27154
27155  const_vec = copy_rtx (vals);
27156  XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27157  const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27158
27159  switch (mode)
27160    {
27161    case V2DFmode:
27162    case V2DImode:
27163    case V2SFmode:
27164    case V2SImode:
27165      /* For the two element vectors, it's just as easy to use
27166	 the general case.  */
27167      return false;
27168
27169    case V4DImode:
27170      /* Use ix86_expand_vector_set in 64bit mode only.  */
27171      if (!TARGET_64BIT)
27172	return false;
27173    case V4DFmode:
27174    case V8SFmode:
27175    case V8SImode:
27176    case V16HImode:
27177    case V32QImode:
27178    case V4SFmode:
27179    case V4SImode:
27180    case V8HImode:
27181    case V4HImode:
27182      break;
27183
27184    case V16QImode:
27185      if (TARGET_SSE4_1)
27186	break;
27187      wmode = V8HImode;
27188      goto widen;
27189    case V8QImode:
27190      wmode = V4HImode;
27191      goto widen;
27192    widen:
27193      /* There's no way to set one QImode entry easily.  Combine
27194	 the variable value with its adjacent constant value, and
27195	 promote to an HImode set.  */
27196      x = XVECEXP (vals, 0, one_var ^ 1);
27197      if (one_var & 1)
27198	{
27199	  var = convert_modes (HImode, QImode, var, true);
27200	  var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27201				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
27202	  x = GEN_INT (INTVAL (x) & 0xff);
27203	}
27204      else
27205	{
27206	  var = convert_modes (HImode, QImode, var, true);
27207	  x = gen_int_mode (INTVAL (x) << 8, HImode);
27208	}
27209      if (x != const0_rtx)
27210	var = expand_simple_binop (HImode, IOR, var, x, var,
27211				   1, OPTAB_LIB_WIDEN);
27212
27213      x = gen_reg_rtx (wmode);
27214      emit_move_insn (x, gen_lowpart (wmode, const_vec));
27215      ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27216
27217      emit_move_insn (target, gen_lowpart (mode, x));
27218      return true;
27219
27220    default:
27221      return false;
27222    }
27223
27224  emit_move_insn (target, const_vec);
27225  ix86_expand_vector_set (mmx_ok, target, var, one_var);
27226  return true;
27227}
27228
27229/* A subroutine of ix86_expand_vector_init_general.  Use vector
27230   concatenate to handle the most general case: all values variable,
27231   and none identical.  */
27232
27233static void
27234ix86_expand_vector_init_concat (enum machine_mode mode,
27235				rtx target, rtx *ops, int n)
27236{
27237  enum machine_mode cmode, hmode = VOIDmode;
27238  rtx first[8], second[4];
27239  rtvec v;
27240  int i, j;
27241
27242  switch (n)
27243    {
27244    case 2:
27245      switch (mode)
27246	{
27247	case V8SImode:
27248	  cmode = V4SImode;
27249	  break;
27250	case V8SFmode:
27251	  cmode = V4SFmode;
27252	  break;
27253	case V4DImode:
27254	  cmode = V2DImode;
27255	  break;
27256	case V4DFmode:
27257	  cmode = V2DFmode;
27258	  break;
27259	case V4SImode:
27260	  cmode = V2SImode;
27261	  break;
27262	case V4SFmode:
27263	  cmode = V2SFmode;
27264	  break;
27265	case V2DImode:
27266	  cmode = DImode;
27267	  break;
27268	case V2SImode:
27269	  cmode = SImode;
27270	  break;
27271	case V2DFmode:
27272	  cmode = DFmode;
27273	  break;
27274	case V2SFmode:
27275	  cmode = SFmode;
27276	  break;
27277	default:
27278	  gcc_unreachable ();
27279	}
27280
27281      if (!register_operand (ops[1], cmode))
27282	ops[1] = force_reg (cmode, ops[1]);
27283      if (!register_operand (ops[0], cmode))
27284	ops[0] = force_reg (cmode, ops[0]);
27285      emit_insn (gen_rtx_SET (VOIDmode, target,
27286			      gen_rtx_VEC_CONCAT (mode, ops[0],
27287						  ops[1])));
27288      break;
27289
27290    case 4:
27291      switch (mode)
27292	{
27293	case V4DImode:
27294	  cmode = V2DImode;
27295	  break;
27296	case V4DFmode:
27297	  cmode = V2DFmode;
27298	  break;
27299	case V4SImode:
27300	  cmode = V2SImode;
27301	  break;
27302	case V4SFmode:
27303	  cmode = V2SFmode;
27304	  break;
27305	default:
27306	  gcc_unreachable ();
27307	}
27308      goto half;
27309
27310    case 8:
27311      switch (mode)
27312	{
27313	case V8SImode:
27314	  cmode = V2SImode;
27315	  hmode = V4SImode;
27316	  break;
27317	case V8SFmode:
27318	  cmode = V2SFmode;
27319	  hmode = V4SFmode;
27320	  break;
27321	default:
27322	  gcc_unreachable ();
27323	}
27324      goto half;
27325
27326half:
27327      /* FIXME: We process inputs backward to help RA.  PR 36222.  */
27328      i = n - 1;
27329      j = (n >> 1) - 1;
27330      for (; i > 0; i -= 2, j--)
27331	{
27332	  first[j] = gen_reg_rtx (cmode);
27333	  v = gen_rtvec (2, ops[i - 1], ops[i]);
27334	  ix86_expand_vector_init (false, first[j],
27335				   gen_rtx_PARALLEL (cmode, v));
27336	}
27337
27338      n >>= 1;
27339      if (n > 2)
27340	{
27341	  gcc_assert (hmode != VOIDmode);
27342	  for (i = j = 0; i < n; i += 2, j++)
27343	    {
27344	      second[j] = gen_reg_rtx (hmode);
27345	      ix86_expand_vector_init_concat (hmode, second [j],
27346					      &first [i], 2);
27347	    }
27348	  n >>= 1;
27349	  ix86_expand_vector_init_concat (mode, target, second, n);
27350	}
27351      else
27352	ix86_expand_vector_init_concat (mode, target, first, n);
27353      break;
27354
27355    default:
27356      gcc_unreachable ();
27357    }
27358}
27359
27360/* A subroutine of ix86_expand_vector_init_general.  Use vector
27361   interleave to handle the most general case: all values variable,
27362   and none identical.  */
27363
27364static void
27365ix86_expand_vector_init_interleave (enum machine_mode mode,
27366				    rtx target, rtx *ops, int n)
27367{
27368  enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27369  int i, j;
27370  rtx op0, op1;
27371  rtx (*gen_load_even) (rtx, rtx, rtx);
27372  rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27373  rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27374
27375  switch (mode)
27376    {
27377    case V8HImode:
27378      gen_load_even = gen_vec_setv8hi;
27379      gen_interleave_first_low = gen_vec_interleave_lowv4si;
27380      gen_interleave_second_low = gen_vec_interleave_lowv2di;
27381      inner_mode = HImode;
27382      first_imode = V4SImode;
27383      second_imode = V2DImode;
27384      third_imode = VOIDmode;
27385      break;
27386    case V16QImode:
27387      gen_load_even = gen_vec_setv16qi;
27388      gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27389      gen_interleave_second_low = gen_vec_interleave_lowv4si;
27390      inner_mode = QImode;
27391      first_imode = V8HImode;
27392      second_imode = V4SImode;
27393      third_imode = V2DImode;
27394      break;
27395    default:
27396      gcc_unreachable ();
27397    }
27398
27399  for (i = 0; i < n; i++)
27400    {
27401      /* Extend the odd elment to SImode using a paradoxical SUBREG.  */
27402      op0 = gen_reg_rtx (SImode);
27403      emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27404
27405      /* Insert the SImode value as low element of V4SImode vector. */
27406      op1 = gen_reg_rtx (V4SImode);
27407      op0 = gen_rtx_VEC_MERGE (V4SImode,
27408			       gen_rtx_VEC_DUPLICATE (V4SImode,
27409						      op0),
27410			       CONST0_RTX (V4SImode),
27411			       const1_rtx);
27412      emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27413
27414      /* Cast the V4SImode vector back to a vector in orignal mode.  */
27415      op0 = gen_reg_rtx (mode);
27416      emit_move_insn (op0, gen_lowpart (mode, op1));
27417
27418      /* Load even elements into the second positon.  */
27419      emit_insn ((*gen_load_even) (op0,
27420				   force_reg (inner_mode,
27421					      ops [i + i + 1]),
27422				   const1_rtx));
27423
27424      /* Cast vector to FIRST_IMODE vector.  */
27425      ops[i] = gen_reg_rtx (first_imode);
27426      emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
27427    }
27428
27429  /* Interleave low FIRST_IMODE vectors.  */
27430  for (i = j = 0; i < n; i += 2, j++)
27431    {
27432      op0 = gen_reg_rtx (first_imode);
27433      emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
27434
27435      /* Cast FIRST_IMODE vector to SECOND_IMODE vector.  */
27436      ops[j] = gen_reg_rtx (second_imode);
27437      emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
27438    }
27439
27440  /* Interleave low SECOND_IMODE vectors.  */
27441  switch (second_imode)
27442    {
27443    case V4SImode:
27444      for (i = j = 0; i < n / 2; i += 2, j++)
27445	{
27446	  op0 = gen_reg_rtx (second_imode);
27447	  emit_insn ((*gen_interleave_second_low) (op0, ops[i],
27448						   ops[i + 1]));
27449
27450	  /* Cast the SECOND_IMODE vector to the THIRD_IMODE
27451	     vector.  */
27452	  ops[j] = gen_reg_rtx (third_imode);
27453	  emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
27454	}
27455      second_imode = V2DImode;
27456      gen_interleave_second_low = gen_vec_interleave_lowv2di;
27457      /* FALLTHRU */
27458
27459    case V2DImode:
27460      op0 = gen_reg_rtx (second_imode);
27461      emit_insn ((*gen_interleave_second_low) (op0, ops[0],
27462					       ops[1]));
27463
27464      /* Cast the SECOND_IMODE vector back to a vector on original
27465	 mode.  */
27466      emit_insn (gen_rtx_SET (VOIDmode, target,
27467			      gen_lowpart (mode, op0)));
27468      break;
27469
27470    default:
27471      gcc_unreachable ();
27472    }
27473}
27474
27475/* A subroutine of ix86_expand_vector_init.  Handle the most general case:
27476   all values variable, and none identical.  */
27477
27478static void
27479ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
27480				 rtx target, rtx vals)
27481{
27482  rtx ops[32], op0, op1;
27483  enum machine_mode half_mode = VOIDmode;
27484  int n, i;
27485
27486  switch (mode)
27487    {
27488    case V2SFmode:
27489    case V2SImode:
27490      if (!mmx_ok && !TARGET_SSE)
27491	break;
27492      /* FALLTHRU */
27493
27494    case V8SFmode:
27495    case V8SImode:
27496    case V4DFmode:
27497    case V4DImode:
27498    case V4SFmode:
27499    case V4SImode:
27500    case V2DFmode:
27501    case V2DImode:
27502      n = GET_MODE_NUNITS (mode);
27503      for (i = 0; i < n; i++)
27504	ops[i] = XVECEXP (vals, 0, i);
27505      ix86_expand_vector_init_concat (mode, target, ops, n);
27506      return;
27507
27508    case V32QImode:
27509      half_mode = V16QImode;
27510      goto half;
27511
27512    case V16HImode:
27513      half_mode = V8HImode;
27514      goto half;
27515
27516half:
27517      n = GET_MODE_NUNITS (mode);
27518      for (i = 0; i < n; i++)
27519	ops[i] = XVECEXP (vals, 0, i);
27520      op0 = gen_reg_rtx (half_mode);
27521      op1 = gen_reg_rtx (half_mode);
27522      ix86_expand_vector_init_interleave (half_mode, op0, ops,
27523					  n >> 2);
27524      ix86_expand_vector_init_interleave (half_mode, op1,
27525					  &ops [n >> 1], n >> 2);
27526      emit_insn (gen_rtx_SET (VOIDmode, target,
27527			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
27528      return;
27529
27530    case V16QImode:
27531      if (!TARGET_SSE4_1)
27532	break;
27533      /* FALLTHRU */
27534
27535    case V8HImode:
27536      if (!TARGET_SSE2)
27537	break;
27538
27539      /* Don't use ix86_expand_vector_init_interleave if we can't
27540	 move from GPR to SSE register directly.  */
27541      if (!TARGET_INTER_UNIT_MOVES)
27542	break;
27543
27544      n = GET_MODE_NUNITS (mode);
27545      for (i = 0; i < n; i++)
27546	ops[i] = XVECEXP (vals, 0, i);
27547      ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27548      return;
27549
27550    case V4HImode:
27551    case V8QImode:
27552      break;
27553
27554    default:
27555      gcc_unreachable ();
27556    }
27557
27558    {
27559      int i, j, n_elts, n_words, n_elt_per_word;
27560      enum machine_mode inner_mode;
27561      rtx words[4], shift;
27562
27563      inner_mode = GET_MODE_INNER (mode);
27564      n_elts = GET_MODE_NUNITS (mode);
27565      n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27566      n_elt_per_word = n_elts / n_words;
27567      shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27568
27569      for (i = 0; i < n_words; ++i)
27570	{
27571	  rtx word = NULL_RTX;
27572
27573	  for (j = 0; j < n_elt_per_word; ++j)
27574	    {
27575	      rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27576	      elt = convert_modes (word_mode, inner_mode, elt, true);
27577
27578	      if (j == 0)
27579		word = elt;
27580	      else
27581		{
27582		  word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27583					      word, 1, OPTAB_LIB_WIDEN);
27584		  word = expand_simple_binop (word_mode, IOR, word, elt,
27585					      word, 1, OPTAB_LIB_WIDEN);
27586		}
27587	    }
27588
27589	  words[i] = word;
27590	}
27591
27592      if (n_words == 1)
27593	emit_move_insn (target, gen_lowpart (mode, words[0]));
27594      else if (n_words == 2)
27595	{
27596	  rtx tmp = gen_reg_rtx (mode);
27597	  emit_clobber (tmp);
27598	  emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27599	  emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27600	  emit_move_insn (target, tmp);
27601	}
27602      else if (n_words == 4)
27603	{
27604	  rtx tmp = gen_reg_rtx (V4SImode);
27605	  gcc_assert (word_mode == SImode);
27606	  vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
27607	  ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
27608	  emit_move_insn (target, gen_lowpart (mode, tmp));
27609	}
27610      else
27611	gcc_unreachable ();
27612    }
27613}
27614
27615/* Initialize vector TARGET via VALS.  Suppress the use of MMX
27616   instructions unless MMX_OK is true.  */
27617
27618void
27619ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
27620{
27621  enum machine_mode mode = GET_MODE (target);
27622  enum machine_mode inner_mode = GET_MODE_INNER (mode);
27623  int n_elts = GET_MODE_NUNITS (mode);
27624  int n_var = 0, one_var = -1;
27625  bool all_same = true, all_const_zero = true;
27626  int i;
27627  rtx x;
27628
27629  for (i = 0; i < n_elts; ++i)
27630    {
27631      x = XVECEXP (vals, 0, i);
27632      if (!(CONST_INT_P (x)
27633	    || GET_CODE (x) == CONST_DOUBLE
27634	    || GET_CODE (x) == CONST_FIXED))
27635	n_var++, one_var = i;
27636      else if (x != CONST0_RTX (inner_mode))
27637	all_const_zero = false;
27638      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
27639	all_same = false;
27640    }
27641
27642  /* Constants are best loaded from the constant pool.  */
27643  if (n_var == 0)
27644    {
27645      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
27646      return;
27647    }
27648
27649  /* If all values are identical, broadcast the value.  */
27650  if (all_same
27651      && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
27652					    XVECEXP (vals, 0, 0)))
27653    return;
27654
27655  /* Values where only one field is non-constant are best loaded from
27656     the pool and overwritten via move later.  */
27657  if (n_var == 1)
27658    {
27659      if (all_const_zero
27660	  && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
27661						  XVECEXP (vals, 0, one_var),
27662						  one_var))
27663	return;
27664
27665      if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
27666	return;
27667    }
27668
27669  ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
27670}
27671
27672void
27673ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
27674{
27675  enum machine_mode mode = GET_MODE (target);
27676  enum machine_mode inner_mode = GET_MODE_INNER (mode);
27677  enum machine_mode half_mode;
27678  bool use_vec_merge = false;
27679  rtx tmp;
27680  static rtx (*gen_extract[6][2]) (rtx, rtx)
27681    = {
27682	{ gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
27683	{ gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
27684	{ gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
27685	{ gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
27686	{ gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
27687	{ gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
27688      };
27689  static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
27690    = {
27691	{ gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
27692	{ gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
27693	{ gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
27694	{ gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
27695	{ gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
27696	{ gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
27697      };
27698  int i, j, n;
27699
27700  switch (mode)
27701    {
27702    case V2SFmode:
27703    case V2SImode:
27704      if (mmx_ok)
27705	{
27706	  tmp = gen_reg_rtx (GET_MODE_INNER (mode));
27707	  ix86_expand_vector_extract (true, tmp, target, 1 - elt);
27708	  if (elt == 0)
27709	    tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
27710	  else
27711	    tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
27712	  emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27713	  return;
27714	}
27715      break;
27716
27717    case V2DImode:
27718      use_vec_merge = TARGET_SSE4_1;
27719      if (use_vec_merge)
27720	break;
27721
27722    case V2DFmode:
27723      {
27724	rtx op0, op1;
27725
27726	/* For the two element vectors, we implement a VEC_CONCAT with
27727	   the extraction of the other element.  */
27728
27729	tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
27730	tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
27731
27732	if (elt == 0)
27733	  op0 = val, op1 = tmp;
27734	else
27735	  op0 = tmp, op1 = val;
27736
27737	tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
27738	emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27739      }
27740      return;
27741
27742    case V4SFmode:
27743      use_vec_merge = TARGET_SSE4_1;
27744      if (use_vec_merge)
27745	break;
27746
27747      switch (elt)
27748	{
27749	case 0:
27750	  use_vec_merge = true;
27751	  break;
27752
27753	case 1:
27754	  /* tmp = target = A B C D */
27755	  tmp = copy_to_reg (target);
27756	  /* target = A A B B */
27757	  emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
27758	  /* target = X A B B */
27759	  ix86_expand_vector_set (false, target, val, 0);
27760	  /* target = A X C D  */
27761	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27762					  const1_rtx, const0_rtx,
27763					  GEN_INT (2+4), GEN_INT (3+4)));
27764	  return;
27765
27766	case 2:
27767	  /* tmp = target = A B C D */
27768	  tmp = copy_to_reg (target);
27769	  /* tmp = X B C D */
27770	  ix86_expand_vector_set (false, tmp, val, 0);
27771	  /* target = A B X D */
27772	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27773					  const0_rtx, const1_rtx,
27774					  GEN_INT (0+4), GEN_INT (3+4)));
27775	  return;
27776
27777	case 3:
27778	  /* tmp = target = A B C D */
27779	  tmp = copy_to_reg (target);
27780	  /* tmp = X B C D */
27781	  ix86_expand_vector_set (false, tmp, val, 0);
27782	  /* target = A B X D */
27783	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27784					  const0_rtx, const1_rtx,
27785					  GEN_INT (2+4), GEN_INT (0+4)));
27786	  return;
27787
27788	default:
27789	  gcc_unreachable ();
27790	}
27791      break;
27792
27793    case V4SImode:
27794      use_vec_merge = TARGET_SSE4_1;
27795      if (use_vec_merge)
27796	break;
27797
27798      /* Element 0 handled by vec_merge below.  */
27799      if (elt == 0)
27800	{
27801	  use_vec_merge = true;
27802	  break;
27803	}
27804
27805      if (TARGET_SSE2)
27806	{
27807	  /* With SSE2, use integer shuffles to swap element 0 and ELT,
27808	     store into element 0, then shuffle them back.  */
27809
27810	  rtx order[4];
27811
27812	  order[0] = GEN_INT (elt);
27813	  order[1] = const1_rtx;
27814	  order[2] = const2_rtx;
27815	  order[3] = GEN_INT (3);
27816	  order[elt] = const0_rtx;
27817
27818	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27819					order[1], order[2], order[3]));
27820
27821	  ix86_expand_vector_set (false, target, val, 0);
27822
27823	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27824					order[1], order[2], order[3]));
27825	}
27826      else
27827	{
27828	  /* For SSE1, we have to reuse the V4SF code.  */
27829	  ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
27830				  gen_lowpart (SFmode, val), elt);
27831	}
27832      return;
27833
27834    case V8HImode:
27835      use_vec_merge = TARGET_SSE2;
27836      break;
27837    case V4HImode:
27838      use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27839      break;
27840
27841    case V16QImode:
27842      use_vec_merge = TARGET_SSE4_1;
27843      break;
27844
27845    case V8QImode:
27846      break;
27847
27848    case V32QImode:
27849      half_mode = V16QImode;
27850      j = 0;
27851      n = 16;
27852      goto half;
27853
27854    case V16HImode:
27855      half_mode = V8HImode;
27856      j = 1;
27857      n = 8;
27858      goto half;
27859
27860    case V8SImode:
27861      half_mode = V4SImode;
27862      j = 2;
27863      n = 4;
27864      goto half;
27865
27866    case V4DImode:
27867      half_mode = V2DImode;
27868      j = 3;
27869      n = 2;
27870      goto half;
27871
27872    case V8SFmode:
27873      half_mode = V4SFmode;
27874      j = 4;
27875      n = 4;
27876      goto half;
27877
27878    case V4DFmode:
27879      half_mode = V2DFmode;
27880      j = 5;
27881      n = 2;
27882      goto half;
27883
27884half:
27885      /* Compute offset.  */
27886      i = elt / n;
27887      elt %= n;
27888
27889      gcc_assert (i <= 1);
27890
27891      /* Extract the half.  */
27892      tmp = gen_reg_rtx (half_mode);
27893      emit_insn ((*gen_extract[j][i]) (tmp, target));
27894
27895      /* Put val in tmp at elt.  */
27896      ix86_expand_vector_set (false, tmp, val, elt);
27897
27898      /* Put it back.  */
27899      emit_insn ((*gen_insert[j][i]) (target, target, tmp));
27900      return;
27901
27902    default:
27903      break;
27904    }
27905
27906  if (use_vec_merge)
27907    {
27908      tmp = gen_rtx_VEC_DUPLICATE (mode, val);
27909      tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
27910      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27911    }
27912  else
27913    {
27914      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27915
27916      emit_move_insn (mem, target);
27917
27918      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27919      emit_move_insn (tmp, val);
27920
27921      emit_move_insn (target, mem);
27922    }
27923}
27924
27925void
27926ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
27927{
27928  enum machine_mode mode = GET_MODE (vec);
27929  enum machine_mode inner_mode = GET_MODE_INNER (mode);
27930  bool use_vec_extr = false;
27931  rtx tmp;
27932
27933  switch (mode)
27934    {
27935    case V2SImode:
27936    case V2SFmode:
27937      if (!mmx_ok)
27938	break;
27939      /* FALLTHRU */
27940
27941    case V2DFmode:
27942    case V2DImode:
27943      use_vec_extr = true;
27944      break;
27945
27946    case V4SFmode:
27947      use_vec_extr = TARGET_SSE4_1;
27948      if (use_vec_extr)
27949	break;
27950
27951      switch (elt)
27952	{
27953	case 0:
27954	  tmp = vec;
27955	  break;
27956
27957	case 1:
27958	case 3:
27959	  tmp = gen_reg_rtx (mode);
27960	  emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
27961				       GEN_INT (elt), GEN_INT (elt),
27962				       GEN_INT (elt+4), GEN_INT (elt+4)));
27963	  break;
27964
27965	case 2:
27966	  tmp = gen_reg_rtx (mode);
27967	  emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
27968	  break;
27969
27970	default:
27971	  gcc_unreachable ();
27972	}
27973      vec = tmp;
27974      use_vec_extr = true;
27975      elt = 0;
27976      break;
27977
27978    case V4SImode:
27979      use_vec_extr = TARGET_SSE4_1;
27980      if (use_vec_extr)
27981	break;
27982
27983      if (TARGET_SSE2)
27984	{
27985	  switch (elt)
27986	    {
27987	    case 0:
27988	      tmp = vec;
27989	      break;
27990
27991	    case 1:
27992	    case 3:
27993	      tmp = gen_reg_rtx (mode);
27994	      emit_insn (gen_sse2_pshufd_1 (tmp, vec,
27995					    GEN_INT (elt), GEN_INT (elt),
27996					    GEN_INT (elt), GEN_INT (elt)));
27997	      break;
27998
27999	    case 2:
28000	      tmp = gen_reg_rtx (mode);
28001	      emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
28002	      break;
28003
28004	    default:
28005	      gcc_unreachable ();
28006	    }
28007	  vec = tmp;
28008	  use_vec_extr = true;
28009	  elt = 0;
28010	}
28011      else
28012	{
28013	  /* For SSE1, we have to reuse the V4SF code.  */
28014	  ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28015				      gen_lowpart (V4SFmode, vec), elt);
28016	  return;
28017	}
28018      break;
28019
28020    case V8HImode:
28021      use_vec_extr = TARGET_SSE2;
28022      break;
28023    case V4HImode:
28024      use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28025      break;
28026
28027    case V16QImode:
28028      use_vec_extr = TARGET_SSE4_1;
28029      break;
28030
28031    case V8QImode:
28032      /* ??? Could extract the appropriate HImode element and shift.  */
28033    default:
28034      break;
28035    }
28036
28037  if (use_vec_extr)
28038    {
28039      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28040      tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28041
28042      /* Let the rtl optimizers know about the zero extension performed.  */
28043      if (inner_mode == QImode || inner_mode == HImode)
28044	{
28045	  tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28046	  target = gen_lowpart (SImode, target);
28047	}
28048
28049      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28050    }
28051  else
28052    {
28053      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28054
28055      emit_move_insn (mem, vec);
28056
28057      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28058      emit_move_insn (target, tmp);
28059    }
28060}
28061
28062/* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
28063   pattern to reduce; DEST is the destination; IN is the input vector.  */
28064
28065void
28066ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28067{
28068  rtx tmp1, tmp2, tmp3;
28069
28070  tmp1 = gen_reg_rtx (V4SFmode);
28071  tmp2 = gen_reg_rtx (V4SFmode);
28072  tmp3 = gen_reg_rtx (V4SFmode);
28073
28074  emit_insn (gen_sse_movhlps (tmp1, in, in));
28075  emit_insn (fn (tmp2, tmp1, in));
28076
28077  emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28078				  const1_rtx, const1_rtx,
28079				  GEN_INT (1+4), GEN_INT (1+4)));
28080  emit_insn (fn (dest, tmp2, tmp3));
28081}
28082
28083/* Target hook for scalar_mode_supported_p.  */
28084static bool
28085ix86_scalar_mode_supported_p (enum machine_mode mode)
28086{
28087  if (DECIMAL_FLOAT_MODE_P (mode))
28088    return default_decimal_float_supported_p ();
28089  else if (mode == TFmode)
28090    return true;
28091  else
28092    return default_scalar_mode_supported_p (mode);
28093}
28094
28095/* Implements target hook vector_mode_supported_p.  */
28096static bool
28097ix86_vector_mode_supported_p (enum machine_mode mode)
28098{
28099  if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28100    return true;
28101  if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28102    return true;
28103  if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28104    return true;
28105  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28106    return true;
28107  if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28108    return true;
28109  return false;
28110}
28111
28112/* Target hook for c_mode_for_suffix.  */
28113static enum machine_mode
28114ix86_c_mode_for_suffix (char suffix)
28115{
28116  if (suffix == 'q')
28117    return TFmode;
28118  if (suffix == 'w')
28119    return XFmode;
28120
28121  return VOIDmode;
28122}
28123
28124/* Worker function for TARGET_MD_ASM_CLOBBERS.
28125
28126   We do this in the new i386 backend to maintain source compatibility
28127   with the old cc0-based compiler.  */
28128
28129static tree
28130ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28131		      tree inputs ATTRIBUTE_UNUSED,
28132		      tree clobbers)
28133{
28134  clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28135			clobbers);
28136  clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28137			clobbers);
28138  return clobbers;
28139}
28140
28141/* Implements target vector targetm.asm.encode_section_info.  This
28142   is not used by netware.  */
28143
28144static void ATTRIBUTE_UNUSED
28145ix86_encode_section_info (tree decl, rtx rtl, int first)
28146{
28147  default_encode_section_info (decl, rtl, first);
28148
28149  if (TREE_CODE (decl) == VAR_DECL
28150      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28151      && ix86_in_large_data_p (decl))
28152    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28153}
28154
28155/* Worker function for REVERSE_CONDITION.  */
28156
28157enum rtx_code
28158ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28159{
28160  return (mode != CCFPmode && mode != CCFPUmode
28161	  ? reverse_condition (code)
28162	  : reverse_condition_maybe_unordered (code));
28163}
28164
28165/* Output code to perform an x87 FP register move, from OPERANDS[1]
28166   to OPERANDS[0].  */
28167
28168const char *
28169output_387_reg_move (rtx insn, rtx *operands)
28170{
28171  if (REG_P (operands[0]))
28172    {
28173      if (REG_P (operands[1])
28174	  && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28175	{
28176	  if (REGNO (operands[0]) == FIRST_STACK_REG)
28177	    return output_387_ffreep (operands, 0);
28178	  return "fstp\t%y0";
28179	}
28180      if (STACK_TOP_P (operands[0]))
28181	return "fld%Z1\t%y1";
28182      return "fst\t%y0";
28183    }
28184  else if (MEM_P (operands[0]))
28185    {
28186      gcc_assert (REG_P (operands[1]));
28187      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28188	return "fstp%Z0\t%y0";
28189      else
28190	{
28191	  /* There is no non-popping store to memory for XFmode.
28192	     So if we need one, follow the store with a load.  */
28193	  if (GET_MODE (operands[0]) == XFmode)
28194	    return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
28195	  else
28196	    return "fst%Z0\t%y0";
28197	}
28198    }
28199  else
28200    gcc_unreachable();
28201}
28202
28203/* Output code to perform a conditional jump to LABEL, if C2 flag in
28204   FP status register is set.  */
28205
28206void
28207ix86_emit_fp_unordered_jump (rtx label)
28208{
28209  rtx reg = gen_reg_rtx (HImode);
28210  rtx temp;
28211
28212  emit_insn (gen_x86_fnstsw_1 (reg));
28213
28214  if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28215    {
28216      emit_insn (gen_x86_sahf_1 (reg));
28217
28218      temp = gen_rtx_REG (CCmode, FLAGS_REG);
28219      temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28220    }
28221  else
28222    {
28223      emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28224
28225      temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28226      temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28227    }
28228
28229  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28230			      gen_rtx_LABEL_REF (VOIDmode, label),
28231			      pc_rtx);
28232  temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28233
28234  emit_jump_insn (temp);
28235  predict_jump (REG_BR_PROB_BASE * 10 / 100);
28236}
28237
28238/* Output code to perform a log1p XFmode calculation.  */
28239
28240void ix86_emit_i387_log1p (rtx op0, rtx op1)
28241{
28242  rtx label1 = gen_label_rtx ();
28243  rtx label2 = gen_label_rtx ();
28244
28245  rtx tmp = gen_reg_rtx (XFmode);
28246  rtx tmp2 = gen_reg_rtx (XFmode);
28247  rtx test;
28248
28249  emit_insn (gen_absxf2 (tmp, op1));
28250  test = gen_rtx_GE (VOIDmode, tmp,
28251    CONST_DOUBLE_FROM_REAL_VALUE (
28252       REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28253       XFmode));
28254  emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
28255
28256  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28257  emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28258  emit_jump (label2);
28259
28260  emit_label (label1);
28261  emit_move_insn (tmp, CONST1_RTX (XFmode));
28262  emit_insn (gen_addxf3 (tmp, op1, tmp));
28263  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28264  emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28265
28266  emit_label (label2);
28267}
28268
28269/* Output code to perform a Newton-Rhapson approximation of a single precision
28270   floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm].  */
28271
28272void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28273{
28274  rtx x0, x1, e0, e1, two;
28275
28276  x0 = gen_reg_rtx (mode);
28277  e0 = gen_reg_rtx (mode);
28278  e1 = gen_reg_rtx (mode);
28279  x1 = gen_reg_rtx (mode);
28280
28281  two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28282
28283  if (VECTOR_MODE_P (mode))
28284    two = ix86_build_const_vector (SFmode, true, two);
28285
28286  two = force_reg (mode, two);
28287
28288  /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28289
28290  /* x0 = rcp(b) estimate */
28291  emit_insn (gen_rtx_SET (VOIDmode, x0,
28292			  gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28293					  UNSPEC_RCP)));
28294  /* e0 = x0 * a */
28295  emit_insn (gen_rtx_SET (VOIDmode, e0,
28296			  gen_rtx_MULT (mode, x0, a)));
28297  /* e1 = x0 * b */
28298  emit_insn (gen_rtx_SET (VOIDmode, e1,
28299			  gen_rtx_MULT (mode, x0, b)));
28300  /* x1 = 2. - e1 */
28301  emit_insn (gen_rtx_SET (VOIDmode, x1,
28302			  gen_rtx_MINUS (mode, two, e1)));
28303  /* res = e0 * x1 */
28304  emit_insn (gen_rtx_SET (VOIDmode, res,
28305			  gen_rtx_MULT (mode, e0, x1)));
28306}
28307
28308/* Output code to perform a Newton-Rhapson approximation of a
28309   single precision floating point [reciprocal] square root.  */
28310
28311void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28312			 bool recip)
28313{
28314  rtx x0, e0, e1, e2, e3, mthree, mhalf;
28315  REAL_VALUE_TYPE r;
28316
28317  x0 = gen_reg_rtx (mode);
28318  e0 = gen_reg_rtx (mode);
28319  e1 = gen_reg_rtx (mode);
28320  e2 = gen_reg_rtx (mode);
28321  e3 = gen_reg_rtx (mode);
28322
28323  real_from_integer (&r, VOIDmode, -3, -1, 0);
28324  mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28325
28326  real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28327  mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28328
28329  if (VECTOR_MODE_P (mode))
28330    {
28331      mthree = ix86_build_const_vector (SFmode, true, mthree);
28332      mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28333    }
28334
28335  /* sqrt(a)  = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28336     rsqrt(a) = -0.5     * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28337
28338  /* x0 = rsqrt(a) estimate */
28339  emit_insn (gen_rtx_SET (VOIDmode, x0,
28340			  gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28341					  UNSPEC_RSQRT)));
28342
28343  /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0).  */
28344  if (!recip)
28345    {
28346      rtx zero, mask;
28347
28348      zero = gen_reg_rtx (mode);
28349      mask = gen_reg_rtx (mode);
28350
28351      zero = force_reg (mode, CONST0_RTX(mode));
28352      emit_insn (gen_rtx_SET (VOIDmode, mask,
28353			      gen_rtx_NE (mode, zero, a)));
28354
28355      emit_insn (gen_rtx_SET (VOIDmode, x0,
28356			      gen_rtx_AND (mode, x0, mask)));
28357    }
28358
28359  /* e0 = x0 * a */
28360  emit_insn (gen_rtx_SET (VOIDmode, e0,
28361			  gen_rtx_MULT (mode, x0, a)));
28362  /* e1 = e0 * x0 */
28363  emit_insn (gen_rtx_SET (VOIDmode, e1,
28364			  gen_rtx_MULT (mode, e0, x0)));
28365
28366  /* e2 = e1 - 3. */
28367  mthree = force_reg (mode, mthree);
28368  emit_insn (gen_rtx_SET (VOIDmode, e2,
28369			  gen_rtx_PLUS (mode, e1, mthree)));
28370
28371  mhalf = force_reg (mode, mhalf);
28372  if (recip)
28373    /* e3 = -.5 * x0 */
28374    emit_insn (gen_rtx_SET (VOIDmode, e3,
28375			    gen_rtx_MULT (mode, x0, mhalf)));
28376  else
28377    /* e3 = -.5 * e0 */
28378    emit_insn (gen_rtx_SET (VOIDmode, e3,
28379			    gen_rtx_MULT (mode, e0, mhalf)));
28380  /* ret = e2 * e3 */
28381  emit_insn (gen_rtx_SET (VOIDmode, res,
28382			  gen_rtx_MULT (mode, e2, e3)));
28383}
28384
28385/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
28386
28387static void ATTRIBUTE_UNUSED
28388i386_solaris_elf_named_section (const char *name, unsigned int flags,
28389				tree decl)
28390{
28391  /* With Binutils 2.15, the "@unwind" marker must be specified on
28392     every occurrence of the ".eh_frame" section, not just the first
28393     one.  */
28394  if (TARGET_64BIT
28395      && strcmp (name, ".eh_frame") == 0)
28396    {
28397      fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
28398	       flags & SECTION_WRITE ? "aw" : "a");
28399      return;
28400    }
28401  default_elf_asm_named_section (name, flags, decl);
28402}
28403
28404/* Return the mangling of TYPE if it is an extended fundamental type.  */
28405
28406static const char *
28407ix86_mangle_type (const_tree type)
28408{
28409  type = TYPE_MAIN_VARIANT (type);
28410
28411  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28412      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28413    return NULL;
28414
28415  switch (TYPE_MODE (type))
28416    {
28417    case TFmode:
28418      /* __float128 is "g".  */
28419      return "g";
28420    case XFmode:
28421      /* "long double" or __float80 is "e".  */
28422      return "e";
28423    default:
28424      return NULL;
28425    }
28426}
28427
28428/* For 32-bit code we can save PIC register setup by using
28429   __stack_chk_fail_local hidden function instead of calling
28430   __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
28431   register, so it is better to call __stack_chk_fail directly.  */
28432
28433static tree
28434ix86_stack_protect_fail (void)
28435{
28436  return TARGET_64BIT
28437	 ? default_external_stack_protect_fail ()
28438	 : default_hidden_stack_protect_fail ();
28439}
28440
28441/* Select a format to encode pointers in exception handling data.  CODE
28442   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
28443   true if the symbol may be affected by dynamic relocations.
28444
28445   ??? All x86 object file formats are capable of representing this.
28446   After all, the relocation needed is the same as for the call insn.
28447   Whether or not a particular assembler allows us to enter such, I
28448   guess we'll have to see.  */
28449int
28450asm_preferred_eh_data_format (int code, int global)
28451{
28452  if (flag_pic)
28453    {
28454      int type = DW_EH_PE_sdata8;
28455      if (!TARGET_64BIT
28456	  || ix86_cmodel == CM_SMALL_PIC
28457	  || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
28458	type = DW_EH_PE_sdata4;
28459      return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
28460    }
28461  if (ix86_cmodel == CM_SMALL
28462      || (ix86_cmodel == CM_MEDIUM && code))
28463    return DW_EH_PE_udata4;
28464  return DW_EH_PE_absptr;
28465}
28466
28467/* Expand copysign from SIGN to the positive value ABS_VALUE
28468   storing in RESULT.  If MASK is non-null, it shall be a mask to mask out
28469   the sign-bit.  */
28470static void
28471ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
28472{
28473  enum machine_mode mode = GET_MODE (sign);
28474  rtx sgn = gen_reg_rtx (mode);
28475  if (mask == NULL_RTX)
28476    {
28477      mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
28478      if (!VECTOR_MODE_P (mode))
28479	{
28480	  /* We need to generate a scalar mode mask in this case.  */
28481	  rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28482	  tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28483	  mask = gen_reg_rtx (mode);
28484	  emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28485	}
28486    }
28487  else
28488    mask = gen_rtx_NOT (mode, mask);
28489  emit_insn (gen_rtx_SET (VOIDmode, sgn,
28490			  gen_rtx_AND (mode, mask, sign)));
28491  emit_insn (gen_rtx_SET (VOIDmode, result,
28492			  gen_rtx_IOR (mode, abs_value, sgn)));
28493}
28494
28495/* Expand fabs (OP0) and return a new rtx that holds the result.  The
28496   mask for masking out the sign-bit is stored in *SMASK, if that is
28497   non-null.  */
28498static rtx
28499ix86_expand_sse_fabs (rtx op0, rtx *smask)
28500{
28501  enum machine_mode mode = GET_MODE (op0);
28502  rtx xa, mask;
28503
28504  xa = gen_reg_rtx (mode);
28505  mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28506  if (!VECTOR_MODE_P (mode))
28507    {
28508      /* We need to generate a scalar mode mask in this case.  */
28509      rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28510      tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28511      mask = gen_reg_rtx (mode);
28512      emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28513    }
28514  emit_insn (gen_rtx_SET (VOIDmode, xa,
28515			  gen_rtx_AND (mode, op0, mask)));
28516
28517  if (smask)
28518    *smask = mask;
28519
28520  return xa;
28521}
28522
28523/* Expands a comparison of OP0 with OP1 using comparison code CODE,
28524   swapping the operands if SWAP_OPERANDS is true.  The expanded
28525   code is a forward jump to a newly created label in case the
28526   comparison is true.  The generated label rtx is returned.  */
28527static rtx
28528ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28529                                  bool swap_operands)
28530{
28531  rtx label, tmp;
28532
28533  if (swap_operands)
28534    {
28535      tmp = op0;
28536      op0 = op1;
28537      op1 = tmp;
28538    }
28539
28540  label = gen_label_rtx ();
28541  tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28542  emit_insn (gen_rtx_SET (VOIDmode, tmp,
28543			  gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28544  tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28545  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28546			      gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28547  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28548  JUMP_LABEL (tmp) = label;
28549
28550  return label;
28551}
28552
28553/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28554   using comparison code CODE.  Operands are swapped for the comparison if
28555   SWAP_OPERANDS is true.  Returns a rtx for the generated mask.  */
28556static rtx
28557ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28558			      bool swap_operands)
28559{
28560  enum machine_mode mode = GET_MODE (op0);
28561  rtx mask = gen_reg_rtx (mode);
28562
28563  if (swap_operands)
28564    {
28565      rtx tmp = op0;
28566      op0 = op1;
28567      op1 = tmp;
28568    }
28569
28570  if (mode == DFmode)
28571    emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28572				    gen_rtx_fmt_ee (code, mode, op0, op1)));
28573  else
28574    emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28575				   gen_rtx_fmt_ee (code, mode, op0, op1)));
28576
28577  return mask;
28578}
28579
28580/* Generate and return a rtx of mode MODE for 2**n where n is the number
28581   of bits of the mantissa of MODE, which must be one of DFmode or SFmode.  */
28582static rtx
28583ix86_gen_TWO52 (enum machine_mode mode)
28584{
28585  REAL_VALUE_TYPE TWO52r;
28586  rtx TWO52;
28587
28588  real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28589  TWO52 = const_double_from_real_value (TWO52r, mode);
28590  TWO52 = force_reg (mode, TWO52);
28591
28592  return TWO52;
28593}
28594
28595/* Expand SSE sequence for computing lround from OP1 storing
28596   into OP0.  */
28597void
28598ix86_expand_lround (rtx op0, rtx op1)
28599{
28600  /* C code for the stuff we're doing below:
28601       tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
28602       return (long)tmp;
28603   */
28604  enum machine_mode mode = GET_MODE (op1);
28605  const struct real_format *fmt;
28606  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28607  rtx adj;
28608
28609  /* load nextafter (0.5, 0.0) */
28610  fmt = REAL_MODE_FORMAT (mode);
28611  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28612  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28613
28614  /* adj = copysign (0.5, op1) */
28615  adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
28616  ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
28617
28618  /* adj = op1 + adj */
28619  adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
28620
28621  /* op0 = (imode)adj */
28622  expand_fix (op0, adj, 0);
28623}
28624
28625/* Expand SSE2 sequence for computing lround from OPERAND1 storing
28626   into OPERAND0.  */
28627void
28628ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
28629{
28630  /* C code for the stuff we're doing below (for do_floor):
28631	xi = (long)op1;
28632        xi -= (double)xi > op1 ? 1 : 0;
28633        return xi;
28634   */
28635  enum machine_mode fmode = GET_MODE (op1);
28636  enum machine_mode imode = GET_MODE (op0);
28637  rtx ireg, freg, label, tmp;
28638
28639  /* reg = (long)op1 */
28640  ireg = gen_reg_rtx (imode);
28641  expand_fix (ireg, op1, 0);
28642
28643  /* freg = (double)reg */
28644  freg = gen_reg_rtx (fmode);
28645  expand_float (freg, ireg, 0);
28646
28647  /* ireg = (freg > op1) ? ireg - 1 : ireg */
28648  label = ix86_expand_sse_compare_and_jump (UNLE,
28649					    freg, op1, !do_floor);
28650  tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
28651			     ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
28652  emit_move_insn (ireg, tmp);
28653
28654  emit_label (label);
28655  LABEL_NUSES (label) = 1;
28656
28657  emit_move_insn (op0, ireg);
28658}
28659
28660/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
28661   result in OPERAND0.  */
28662void
28663ix86_expand_rint (rtx operand0, rtx operand1)
28664{
28665  /* C code for the stuff we're doing below:
28666	xa = fabs (operand1);
28667        if (!isless (xa, 2**52))
28668	  return operand1;
28669        xa = xa + 2**52 - 2**52;
28670        return copysign (xa, operand1);
28671   */
28672  enum machine_mode mode = GET_MODE (operand0);
28673  rtx res, xa, label, TWO52, mask;
28674
28675  res = gen_reg_rtx (mode);
28676  emit_move_insn (res, operand1);
28677
28678  /* xa = abs (operand1) */
28679  xa = ix86_expand_sse_fabs (res, &mask);
28680
28681  /* if (!isless (xa, TWO52)) goto label; */
28682  TWO52 = ix86_gen_TWO52 (mode);
28683  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28684
28685  xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28686  xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28687
28688  ix86_sse_copysign_to_positive (res, xa, res, mask);
28689
28690  emit_label (label);
28691  LABEL_NUSES (label) = 1;
28692
28693  emit_move_insn (operand0, res);
28694}
28695
28696/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28697   into OPERAND0.  */
28698void
28699ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
28700{
28701  /* C code for the stuff we expand below.
28702        double xa = fabs (x), x2;
28703        if (!isless (xa, TWO52))
28704          return x;
28705        xa = xa + TWO52 - TWO52;
28706        x2 = copysign (xa, x);
28707     Compensate.  Floor:
28708        if (x2 > x)
28709          x2 -= 1;
28710     Compensate.  Ceil:
28711        if (x2 < x)
28712          x2 -= -1;
28713        return x2;
28714   */
28715  enum machine_mode mode = GET_MODE (operand0);
28716  rtx xa, TWO52, tmp, label, one, res, mask;
28717
28718  TWO52 = ix86_gen_TWO52 (mode);
28719
28720  /* Temporary for holding the result, initialized to the input
28721     operand to ease control flow.  */
28722  res = gen_reg_rtx (mode);
28723  emit_move_insn (res, operand1);
28724
28725  /* xa = abs (operand1) */
28726  xa = ix86_expand_sse_fabs (res, &mask);
28727
28728  /* if (!isless (xa, TWO52)) goto label; */
28729  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28730
28731  /* xa = xa + TWO52 - TWO52; */
28732  xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28733  xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28734
28735  /* xa = copysign (xa, operand1) */
28736  ix86_sse_copysign_to_positive (xa, xa, res, mask);
28737
28738  /* generate 1.0 or -1.0 */
28739  one = force_reg (mode,
28740	           const_double_from_real_value (do_floor
28741						 ? dconst1 : dconstm1, mode));
28742
28743  /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28744  tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28745  emit_insn (gen_rtx_SET (VOIDmode, tmp,
28746                          gen_rtx_AND (mode, one, tmp)));
28747  /* We always need to subtract here to preserve signed zero.  */
28748  tmp = expand_simple_binop (mode, MINUS,
28749			     xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28750  emit_move_insn (res, tmp);
28751
28752  emit_label (label);
28753  LABEL_NUSES (label) = 1;
28754
28755  emit_move_insn (operand0, res);
28756}
28757
28758/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28759   into OPERAND0.  */
28760void
28761ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
28762{
28763  /* C code for the stuff we expand below.
28764	double xa = fabs (x), x2;
28765        if (!isless (xa, TWO52))
28766          return x;
28767	x2 = (double)(long)x;
28768     Compensate.  Floor:
28769	if (x2 > x)
28770	  x2 -= 1;
28771     Compensate.  Ceil:
28772	if (x2 < x)
28773	  x2 += 1;
28774	if (HONOR_SIGNED_ZEROS (mode))
28775	  return copysign (x2, x);
28776	return x2;
28777   */
28778  enum machine_mode mode = GET_MODE (operand0);
28779  rtx xa, xi, TWO52, tmp, label, one, res, mask;
28780
28781  TWO52 = ix86_gen_TWO52 (mode);
28782
28783  /* Temporary for holding the result, initialized to the input
28784     operand to ease control flow.  */
28785  res = gen_reg_rtx (mode);
28786  emit_move_insn (res, operand1);
28787
28788  /* xa = abs (operand1) */
28789  xa = ix86_expand_sse_fabs (res, &mask);
28790
28791  /* if (!isless (xa, TWO52)) goto label; */
28792  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28793
28794  /* xa = (double)(long)x */
28795  xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28796  expand_fix (xi, res, 0);
28797  expand_float (xa, xi, 0);
28798
28799  /* generate 1.0 */
28800  one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28801
28802  /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28803  tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28804  emit_insn (gen_rtx_SET (VOIDmode, tmp,
28805                          gen_rtx_AND (mode, one, tmp)));
28806  tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
28807			     xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28808  emit_move_insn (res, tmp);
28809
28810  if (HONOR_SIGNED_ZEROS (mode))
28811    ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28812
28813  emit_label (label);
28814  LABEL_NUSES (label) = 1;
28815
28816  emit_move_insn (operand0, res);
28817}
28818
28819/* Expand SSE sequence for computing round from OPERAND1 storing
28820   into OPERAND0.  Sequence that works without relying on DImode truncation
28821   via cvttsd2siq that is only available on 64bit targets.  */
28822void
28823ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
28824{
28825  /* C code for the stuff we expand below.
28826        double xa = fabs (x), xa2, x2;
28827        if (!isless (xa, TWO52))
28828          return x;
28829     Using the absolute value and copying back sign makes
28830     -0.0 -> -0.0 correct.
28831        xa2 = xa + TWO52 - TWO52;
28832     Compensate.
28833	dxa = xa2 - xa;
28834        if (dxa <= -0.5)
28835          xa2 += 1;
28836        else if (dxa > 0.5)
28837          xa2 -= 1;
28838        x2 = copysign (xa2, x);
28839        return x2;
28840   */
28841  enum machine_mode mode = GET_MODE (operand0);
28842  rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
28843
28844  TWO52 = ix86_gen_TWO52 (mode);
28845
28846  /* Temporary for holding the result, initialized to the input
28847     operand to ease control flow.  */
28848  res = gen_reg_rtx (mode);
28849  emit_move_insn (res, operand1);
28850
28851  /* xa = abs (operand1) */
28852  xa = ix86_expand_sse_fabs (res, &mask);
28853
28854  /* if (!isless (xa, TWO52)) goto label; */
28855  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28856
28857  /* xa2 = xa + TWO52 - TWO52; */
28858  xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28859  xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
28860
28861  /* dxa = xa2 - xa; */
28862  dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
28863
28864  /* generate 0.5, 1.0 and -0.5 */
28865  half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
28866  one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
28867  mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
28868			       0, OPTAB_DIRECT);
28869
28870  /* Compensate.  */
28871  tmp = gen_reg_rtx (mode);
28872  /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
28873  tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
28874  emit_insn (gen_rtx_SET (VOIDmode, tmp,
28875                          gen_rtx_AND (mode, one, tmp)));
28876  xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28877  /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
28878  tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
28879  emit_insn (gen_rtx_SET (VOIDmode, tmp,
28880                          gen_rtx_AND (mode, one, tmp)));
28881  xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28882
28883  /* res = copysign (xa2, operand1) */
28884  ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
28885
28886  emit_label (label);
28887  LABEL_NUSES (label) = 1;
28888
28889  emit_move_insn (operand0, res);
28890}
28891
28892/* Expand SSE sequence for computing trunc from OPERAND1 storing
28893   into OPERAND0.  */
28894void
28895ix86_expand_trunc (rtx operand0, rtx operand1)
28896{
28897  /* C code for SSE variant we expand below.
28898        double xa = fabs (x), x2;
28899        if (!isless (xa, TWO52))
28900          return x;
28901        x2 = (double)(long)x;
28902	if (HONOR_SIGNED_ZEROS (mode))
28903	  return copysign (x2, x);
28904	return x2;
28905   */
28906  enum machine_mode mode = GET_MODE (operand0);
28907  rtx xa, xi, TWO52, label, res, mask;
28908
28909  TWO52 = ix86_gen_TWO52 (mode);
28910
28911  /* Temporary for holding the result, initialized to the input
28912     operand to ease control flow.  */
28913  res = gen_reg_rtx (mode);
28914  emit_move_insn (res, operand1);
28915
28916  /* xa = abs (operand1) */
28917  xa = ix86_expand_sse_fabs (res, &mask);
28918
28919  /* if (!isless (xa, TWO52)) goto label; */
28920  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28921
28922  /* x = (double)(long)x */
28923  xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28924  expand_fix (xi, res, 0);
28925  expand_float (res, xi, 0);
28926
28927  if (HONOR_SIGNED_ZEROS (mode))
28928    ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28929
28930  emit_label (label);
28931  LABEL_NUSES (label) = 1;
28932
28933  emit_move_insn (operand0, res);
28934}
28935
28936/* Expand SSE sequence for computing trunc from OPERAND1 storing
28937   into OPERAND0.  */
28938void
28939ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
28940{
28941  enum machine_mode mode = GET_MODE (operand0);
28942  rtx xa, mask, TWO52, label, one, res, smask, tmp;
28943
28944  /* C code for SSE variant we expand below.
28945        double xa = fabs (x), x2;
28946        if (!isless (xa, TWO52))
28947          return x;
28948        xa2 = xa + TWO52 - TWO52;
28949     Compensate:
28950        if (xa2 > xa)
28951          xa2 -= 1.0;
28952        x2 = copysign (xa2, x);
28953        return x2;
28954   */
28955
28956  TWO52 = ix86_gen_TWO52 (mode);
28957
28958  /* Temporary for holding the result, initialized to the input
28959     operand to ease control flow.  */
28960  res = gen_reg_rtx (mode);
28961  emit_move_insn (res, operand1);
28962
28963  /* xa = abs (operand1) */
28964  xa = ix86_expand_sse_fabs (res, &smask);
28965
28966  /* if (!isless (xa, TWO52)) goto label; */
28967  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28968
28969  /* res = xa + TWO52 - TWO52; */
28970  tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28971  tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
28972  emit_move_insn (res, tmp);
28973
28974  /* generate 1.0 */
28975  one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28976
28977  /* Compensate: res = xa2 - (res > xa ? 1 : 0)  */
28978  mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
28979  emit_insn (gen_rtx_SET (VOIDmode, mask,
28980                          gen_rtx_AND (mode, mask, one)));
28981  tmp = expand_simple_binop (mode, MINUS,
28982			     res, mask, NULL_RTX, 0, OPTAB_DIRECT);
28983  emit_move_insn (res, tmp);
28984
28985  /* res = copysign (res, operand1) */
28986  ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
28987
28988  emit_label (label);
28989  LABEL_NUSES (label) = 1;
28990
28991  emit_move_insn (operand0, res);
28992}
28993
28994/* Expand SSE sequence for computing round from OPERAND1 storing
28995   into OPERAND0.  */
28996void
28997ix86_expand_round (rtx operand0, rtx operand1)
28998{
28999  /* C code for the stuff we're doing below:
29000        double xa = fabs (x);
29001        if (!isless (xa, TWO52))
29002          return x;
29003        xa = (double)(long)(xa + nextafter (0.5, 0.0));
29004        return copysign (xa, x);
29005   */
29006  enum machine_mode mode = GET_MODE (operand0);
29007  rtx res, TWO52, xa, label, xi, half, mask;
29008  const struct real_format *fmt;
29009  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29010
29011  /* Temporary for holding the result, initialized to the input
29012     operand to ease control flow.  */
29013  res = gen_reg_rtx (mode);
29014  emit_move_insn (res, operand1);
29015
29016  TWO52 = ix86_gen_TWO52 (mode);
29017  xa = ix86_expand_sse_fabs (res, &mask);
29018  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29019
29020  /* load nextafter (0.5, 0.0) */
29021  fmt = REAL_MODE_FORMAT (mode);
29022  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29023  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29024
29025  /* xa = xa + 0.5 */
29026  half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29027  xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29028
29029  /* xa = (double)(int64_t)xa */
29030  xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29031  expand_fix (xi, xa, 0);
29032  expand_float (xa, xi, 0);
29033
29034  /* res = copysign (xa, operand1) */
29035  ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29036
29037  emit_label (label);
29038  LABEL_NUSES (label) = 1;
29039
29040  emit_move_insn (operand0, res);
29041}
29042
29043
29044/* Table of valid machine attributes.  */
29045static const struct attribute_spec ix86_attribute_table[] =
29046{
29047  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29048  /* Stdcall attribute says callee is responsible for popping arguments
29049     if they are not variable.  */
29050  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
29051  /* Fastcall attribute says callee is responsible for popping arguments
29052     if they are not variable.  */
29053  { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
29054  /* Cdecl attribute says the callee is a normal C declaration */
29055  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
29056  /* Regparm attribute specifies how many integer arguments are to be
29057     passed in registers.  */
29058  { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
29059  /* Sseregparm attribute says we are using x86_64 calling conventions
29060     for FP arguments.  */
29061  { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29062  /* force_align_arg_pointer says this function realigns the stack at entry.  */
29063  { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29064    false, true,  true, ix86_handle_cconv_attribute },
29065#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29066  { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29067  { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29068  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
29069#endif
29070  { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
29071  { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
29072#ifdef SUBTARGET_ATTRIBUTE_TABLE
29073  SUBTARGET_ATTRIBUTE_TABLE,
29074#endif
29075  /* ms_abi and sysv_abi calling convention function attributes.  */
29076  { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29077  { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29078  { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
29079  /* End element.  */
29080  { NULL,        0, 0, false, false, false, NULL }
29081};
29082
29083/* Implement targetm.vectorize.builtin_vectorization_cost.  */
29084static int
29085ix86_builtin_vectorization_cost (bool runtime_test)
29086{
29087  /* If the branch of the runtime test is taken - i.e. - the vectorized
29088     version is skipped - this incurs a misprediction cost (because the
29089     vectorized version is expected to be the fall-through).  So we subtract
29090     the latency of a mispredicted branch from the costs that are incured
29091     when the vectorized version is executed.
29092
29093     TODO: The values in individual target tables have to be tuned or new
29094     fields may be needed. For eg. on K8, the default branch path is the
29095     not-taken path. If the taken path is predicted correctly, the minimum
29096     penalty of going down the taken-path is 1 cycle. If the taken-path is
29097     not predicted correctly, then the minimum penalty is 10 cycles.  */
29098
29099  if (runtime_test)
29100    {
29101      return (-(ix86_cost->cond_taken_branch_cost));
29102    }
29103  else
29104    return 0;
29105}
29106
29107/* Implement targetm.vectorize.builtin_vec_perm.  */
29108
29109static tree
29110ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
29111{
29112  tree itype = TREE_TYPE (vec_type);
29113  bool u = TYPE_UNSIGNED (itype);
29114  enum machine_mode vmode = TYPE_MODE (vec_type);
29115  enum ix86_builtins fcode = fcode; /* Silence bogus warning.  */
29116  bool ok = TARGET_SSE2;
29117
29118  switch (vmode)
29119    {
29120    case V4DFmode:
29121      ok = TARGET_AVX;
29122      fcode = IX86_BUILTIN_VEC_PERM_V4DF;
29123      goto get_di;
29124    case V2DFmode:
29125      fcode = IX86_BUILTIN_VEC_PERM_V2DF;
29126    get_di:
29127      itype = ix86_get_builtin_type (IX86_BT_DI);
29128      break;
29129
29130    case V8SFmode:
29131      ok = TARGET_AVX;
29132      fcode = IX86_BUILTIN_VEC_PERM_V8SF;
29133      goto get_si;
29134    case V4SFmode:
29135      ok = TARGET_SSE;
29136      fcode = IX86_BUILTIN_VEC_PERM_V4SF;
29137    get_si:
29138      itype = ix86_get_builtin_type (IX86_BT_SI);
29139      break;
29140
29141    case V2DImode:
29142      fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
29143      break;
29144    case V4SImode:
29145      fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
29146      break;
29147    case V8HImode:
29148      fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
29149      break;
29150    case V16QImode:
29151      fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
29152      break;
29153    default:
29154      ok = false;
29155      break;
29156    }
29157
29158  if (!ok)
29159    return NULL_TREE;
29160
29161  *mask_type = itype;
29162  return ix86_builtins[(int) fcode];
29163}
29164
29165/* Return a vector mode with twice as many elements as VMODE.  */
29166/* ??? Consider moving this to a table generated by genmodes.c.  */
29167
29168static enum machine_mode
29169doublesize_vector_mode (enum machine_mode vmode)
29170{
29171  switch (vmode)
29172    {
29173    case V2SFmode:	return V4SFmode;
29174    case V1DImode:	return V2DImode;
29175    case V2SImode:	return V4SImode;
29176    case V4HImode:	return V8HImode;
29177    case V8QImode:	return V16QImode;
29178
29179    case V2DFmode:	return V4DFmode;
29180    case V4SFmode:	return V8SFmode;
29181    case V2DImode:	return V4DImode;
29182    case V4SImode:	return V8SImode;
29183    case V8HImode:	return V16HImode;
29184    case V16QImode:	return V32QImode;
29185
29186    case V4DFmode:	return V8DFmode;
29187    case V8SFmode:	return V16SFmode;
29188    case V4DImode:	return V8DImode;
29189    case V8SImode:	return V16SImode;
29190    case V16HImode:	return V32HImode;
29191    case V32QImode:	return V64QImode;
29192
29193    default:
29194      gcc_unreachable ();
29195    }
29196}
29197
29198/* Construct (set target (vec_select op0 (parallel perm))) and
29199   return true if that's a valid instruction in the active ISA.  */
29200
29201static bool
29202expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
29203{
29204  rtx rperm[MAX_VECT_LEN], x;
29205  unsigned i;
29206
29207  for (i = 0; i < nelt; ++i)
29208    rperm[i] = GEN_INT (perm[i]);
29209
29210  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
29211  x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
29212  x = gen_rtx_SET (VOIDmode, target, x);
29213
29214  x = emit_insn (x);
29215  if (recog_memoized (x) < 0)
29216    {
29217      remove_insn (x);
29218      return false;
29219    }
29220  return true;
29221}
29222
29223/* Similar, but generate a vec_concat from op0 and op1 as well.  */
29224
29225static bool
29226expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
29227			const unsigned char *perm, unsigned nelt)
29228{
29229  enum machine_mode v2mode;
29230  rtx x;
29231
29232  v2mode = doublesize_vector_mode (GET_MODE (op0));
29233  x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
29234  return expand_vselect (target, x, perm, nelt);
29235}
29236
29237/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
29238   in terms of blendp[sd] / pblendw / pblendvb.  */
29239
29240static bool
29241expand_vec_perm_blend (struct expand_vec_perm_d *d)
29242{
29243  enum machine_mode vmode = d->vmode;
29244  unsigned i, mask, nelt = d->nelt;
29245  rtx target, op0, op1, x;
29246
29247  if (!TARGET_SSE4_1 || d->op0 == d->op1)
29248    return false;
29249  if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
29250    return false;
29251
29252  /* This is a blend, not a permute.  Elements must stay in their
29253     respective lanes.  */
29254  for (i = 0; i < nelt; ++i)
29255    {
29256      unsigned e = d->perm[i];
29257      if (!(e == i || e == i + nelt))
29258	return false;
29259    }
29260
29261  if (d->testing_p)
29262    return true;
29263
29264  /* ??? Without SSE4.1, we could implement this with and/andn/or.  This
29265     decision should be extracted elsewhere, so that we only try that
29266     sequence once all budget==3 options have been tried.  */
29267
29268  /* For bytes, see if bytes move in pairs so we can use pblendw with
29269     an immediate argument, rather than pblendvb with a vector argument.  */
29270  if (vmode == V16QImode)
29271    {
29272      bool pblendw_ok = true;
29273      for (i = 0; i < 16 && pblendw_ok; i += 2)
29274	pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
29275
29276      if (!pblendw_ok)
29277	{
29278	  rtx rperm[16], vperm;
29279
29280	  for (i = 0; i < nelt; ++i)
29281	    rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
29282
29283	  vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
29284	  vperm = force_reg (V16QImode, vperm);
29285
29286	  emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
29287	  return true;
29288	}
29289    }
29290
29291  target = d->target;
29292  op0 = d->op0;
29293  op1 = d->op1;
29294  mask = 0;
29295
29296  switch (vmode)
29297    {
29298    case V4DFmode:
29299    case V8SFmode:
29300    case V2DFmode:
29301    case V4SFmode:
29302    case V8HImode:
29303      for (i = 0; i < nelt; ++i)
29304	mask |= (d->perm[i] >= nelt) << i;
29305      break;
29306
29307    case V2DImode:
29308      for (i = 0; i < 2; ++i)
29309	mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
29310      goto do_subreg;
29311
29312    case V4SImode:
29313      for (i = 0; i < 4; ++i)
29314	mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
29315      goto do_subreg;
29316
29317    case V16QImode:
29318      for (i = 0; i < 8; ++i)
29319	mask |= (d->perm[i * 2] >= 16) << i;
29320
29321    do_subreg:
29322      vmode = V8HImode;
29323      target = gen_lowpart (vmode, target);
29324      op0 = gen_lowpart (vmode, op0);
29325      op1 = gen_lowpart (vmode, op1);
29326      break;
29327
29328    default:
29329      gcc_unreachable ();
29330    }
29331
29332  /* This matches five different patterns with the different modes.  */
29333  x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
29334  x = gen_rtx_SET (VOIDmode, target, x);
29335  emit_insn (x);
29336
29337  return true;
29338}
29339
29340/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
29341   in terms of the variable form of vpermilps.
29342
29343   Note that we will have already failed the immediate input vpermilps,
29344   which requires that the high and low part shuffle be identical; the
29345   variable form doesn't require that.  */
29346
29347static bool
29348expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
29349{
29350  rtx rperm[8], vperm;
29351  unsigned i;
29352
29353  if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
29354    return false;
29355
29356  /* We can only permute within the 128-bit lane.  */
29357  for (i = 0; i < 8; ++i)
29358    {
29359      unsigned e = d->perm[i];
29360      if (i < 4 ? e >= 4 : e < 4)
29361	return false;
29362    }
29363
29364  if (d->testing_p)
29365    return true;
29366
29367  for (i = 0; i < 8; ++i)
29368    {
29369      unsigned e = d->perm[i];
29370
29371      /* Within each 128-bit lane, the elements of op0 are numbered
29372	 from 0 and the elements of op1 are numbered from 4.  */
29373      if (e >= 8 + 4)
29374	e -= 8;
29375      else if (e >= 4)
29376	e -= 4;
29377
29378      rperm[i] = GEN_INT (e);
29379    }
29380
29381  vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
29382  vperm = force_reg (V8SImode, vperm);
29383  emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
29384
29385  return true;
29386}
29387
29388/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
29389   in terms of pshufb or vpperm.  */
29390
29391static bool
29392expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
29393{
29394  unsigned i, nelt, eltsz;
29395  rtx rperm[16], vperm, target, op0, op1;
29396
29397  if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
29398    return false;
29399  if (GET_MODE_SIZE (d->vmode) != 16)
29400    return false;
29401
29402  if (d->testing_p)
29403    return true;
29404
29405  nelt = d->nelt;
29406  eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
29407
29408  for (i = 0; i < nelt; ++i)
29409    {
29410      unsigned j, e = d->perm[i];
29411      for (j = 0; j < eltsz; ++j)
29412	rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
29413    }
29414
29415  vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
29416  vperm = force_reg (V16QImode, vperm);
29417
29418  target = gen_lowpart (V16QImode, d->target);
29419  op0 = gen_lowpart (V16QImode, d->op0);
29420  if (d->op0 == d->op1)
29421    emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
29422  else
29423    {
29424      op1 = gen_lowpart (V16QImode, d->op1);
29425      emit_insn (gen_xop_pperm (target, op0, op1, vperm));
29426    }
29427
29428  return true;
29429}
29430
29431/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to instantiate D
29432   in a single instruction.  */
29433
29434static bool
29435expand_vec_perm_1 (struct expand_vec_perm_d *d)
29436{
29437  unsigned i, nelt = d->nelt;
29438  unsigned char perm2[MAX_VECT_LEN];
29439
29440  /* Check plain VEC_SELECT first, because AVX has instructions that could
29441     match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
29442     input where SEL+CONCAT may not.  */
29443  if (d->op0 == d->op1)
29444    {
29445      int mask = nelt - 1;
29446
29447      for (i = 0; i < nelt; i++)
29448	perm2[i] = d->perm[i] & mask;
29449
29450      if (expand_vselect (d->target, d->op0, perm2, nelt))
29451	return true;
29452
29453      /* There are plenty of patterns in sse.md that are written for
29454	 SEL+CONCAT and are not replicated for a single op.  Perhaps
29455	 that should be changed, to avoid the nastiness here.  */
29456
29457      /* Recognize interleave style patterns, which means incrementing
29458	 every other permutation operand.  */
29459      for (i = 0; i < nelt; i += 2)
29460	{
29461	  perm2[i] = d->perm[i] & mask;
29462	  perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
29463	}
29464      if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
29465	return true;
29466
29467      /* Recognize shufps, which means adding {0, 0, nelt, nelt}.  */
29468      if (nelt >= 4)
29469	{
29470	  for (i = 0; i < nelt; i += 4)
29471	    {
29472	      perm2[i + 0] = d->perm[i + 0] & mask;
29473	      perm2[i + 1] = d->perm[i + 1] & mask;
29474	      perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
29475	      perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
29476	    }
29477
29478	  if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
29479	    return true;
29480	}
29481    }
29482
29483  /* Finally, try the fully general two operand permute.  */
29484  if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
29485    return true;
29486
29487  /* Recognize interleave style patterns with reversed operands.  */
29488  if (d->op0 != d->op1)
29489    {
29490      for (i = 0; i < nelt; ++i)
29491	{
29492	  unsigned e = d->perm[i];
29493	  if (e >= nelt)
29494	    e -= nelt;
29495	  else
29496	    e += nelt;
29497	  perm2[i] = e;
29498	}
29499
29500      if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
29501	return true;
29502    }
29503
29504  /* Try the SSE4.1 blend variable merge instructions.  */
29505  if (expand_vec_perm_blend (d))
29506    return true;
29507
29508  /* Try one of the AVX vpermil variable permutations.  */
29509  if (expand_vec_perm_vpermil (d))
29510    return true;
29511
29512  /* Try the SSSE3 pshufb or XOP vpperm variable permutation.  */
29513  if (expand_vec_perm_pshufb (d))
29514    return true;
29515
29516  return false;
29517}
29518
29519/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
29520   in terms of a pair of pshuflw + pshufhw instructions.  */
29521
29522static bool
29523expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
29524{
29525  unsigned char perm2[MAX_VECT_LEN];
29526  unsigned i;
29527  bool ok;
29528
29529  if (d->vmode != V8HImode || d->op0 != d->op1)
29530    return false;
29531
29532  /* The two permutations only operate in 64-bit lanes.  */
29533  for (i = 0; i < 4; ++i)
29534    if (d->perm[i] >= 4)
29535      return false;
29536  for (i = 4; i < 8; ++i)
29537    if (d->perm[i] < 4)
29538      return false;
29539
29540  if (d->testing_p)
29541    return true;
29542
29543  /* Emit the pshuflw.  */
29544  memcpy (perm2, d->perm, 4);
29545  for (i = 4; i < 8; ++i)
29546    perm2[i] = i;
29547  ok = expand_vselect (d->target, d->op0, perm2, 8);
29548  gcc_assert (ok);
29549
29550  /* Emit the pshufhw.  */
29551  memcpy (perm2 + 4, d->perm + 4, 4);
29552  for (i = 0; i < 4; ++i)
29553    perm2[i] = i;
29554  ok = expand_vselect (d->target, d->target, perm2, 8);
29555  gcc_assert (ok);
29556
29557  return true;
29558}
29559
29560/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
29561   the permutation using the SSSE3 palignr instruction.  This succeeds
29562   when all of the elements in PERM fit within one vector and we merely
29563   need to shift them down so that a single vector permutation has a
29564   chance to succeed.  */
29565
29566static bool
29567expand_vec_perm_palignr (struct expand_vec_perm_d *d)
29568{
29569  unsigned i, nelt = d->nelt;
29570  unsigned min, max;
29571  bool in_order, ok;
29572  rtx shift;
29573
29574  /* Even with AVX, palignr only operates on 128-bit vectors.  */
29575  if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
29576    return false;
29577
29578  min = nelt, max = 0;
29579  for (i = 0; i < nelt; ++i)
29580    {
29581      unsigned e = d->perm[i];
29582      if (e < min)
29583	min = e;
29584      if (e > max)
29585	max = e;
29586    }
29587  if (min == 0 || max - min >= nelt)
29588    return false;
29589
29590  /* Given that we have SSSE3, we know we'll be able to implement the
29591     single operand permutation after the palignr with pshufb.  */
29592  if (d->testing_p)
29593    return true;
29594
29595  shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
29596  emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
29597				  gen_lowpart (TImode, d->op1),
29598				  gen_lowpart (TImode, d->op0), shift));
29599
29600  d->op0 = d->op1 = d->target;
29601
29602  in_order = true;
29603  for (i = 0; i < nelt; ++i)
29604    {
29605      unsigned e = d->perm[i] - min;
29606      if (e != i)
29607	in_order = false;
29608      d->perm[i] = e;
29609    }
29610
29611  /* Test for the degenerate case where the alignment by itself
29612     produces the desired permutation.  */
29613  if (in_order)
29614    return true;
29615
29616  ok = expand_vec_perm_1 (d);
29617  gcc_assert (ok);
29618
29619  return ok;
29620}
29621
29622/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
29623   a two vector permutation into a single vector permutation by using
29624   an interleave operation to merge the vectors.  */
29625
29626static bool
29627expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
29628{
29629  struct expand_vec_perm_d dremap, dfinal;
29630  unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
29631  unsigned contents, h1, h2, h3, h4;
29632  unsigned char remap[2 * MAX_VECT_LEN];
29633  rtx seq;
29634  bool ok;
29635
29636  if (d->op0 == d->op1)
29637    return false;
29638
29639  /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
29640     lanes.  We can use similar techniques with the vperm2f128 instruction,
29641     but it requires slightly different logic.  */
29642  if (GET_MODE_SIZE (d->vmode) != 16)
29643    return false;
29644
29645  /* Examine from whence the elements come.  */
29646  contents = 0;
29647  for (i = 0; i < nelt; ++i)
29648    contents |= 1u << d->perm[i];
29649
29650  /* Split the two input vectors into 4 halves.  */
29651  h1 = (1u << nelt2) - 1;
29652  h2 = h1 << nelt2;
29653  h3 = h2 << nelt2;
29654  h4 = h3 << nelt2;
29655
29656  memset (remap, 0xff, sizeof (remap));
29657  dremap = *d;
29658
29659  /* If the elements from the low halves use interleave low, and similarly
29660     for interleave high.  If the elements are from mis-matched halves, we
29661     can use shufps for V4SF/V4SI or do a DImode shuffle.  */
29662  if ((contents & (h1 | h3)) == contents)
29663    {
29664      for (i = 0; i < nelt2; ++i)
29665	{
29666	  remap[i] = i * 2;
29667	  remap[i + nelt] = i * 2 + 1;
29668	  dremap.perm[i * 2] = i;
29669	  dremap.perm[i * 2 + 1] = i + nelt;
29670	}
29671    }
29672  else if ((contents & (h2 | h4)) == contents)
29673    {
29674      for (i = 0; i < nelt2; ++i)
29675	{
29676	  remap[i + nelt2] = i * 2;
29677	  remap[i + nelt + nelt2] = i * 2 + 1;
29678	  dremap.perm[i * 2] = i + nelt2;
29679	  dremap.perm[i * 2 + 1] = i + nelt + nelt2;
29680	}
29681    }
29682  else if ((contents & (h1 | h4)) == contents)
29683    {
29684      for (i = 0; i < nelt2; ++i)
29685	{
29686	  remap[i] = i;
29687	  remap[i + nelt + nelt2] = i + nelt2;
29688	  dremap.perm[i] = i;
29689	  dremap.perm[i + nelt2] = i + nelt + nelt2;
29690	}
29691      if (nelt != 4)
29692	{
29693	  dremap.vmode = V2DImode;
29694	  dremap.nelt = 2;
29695	  dremap.perm[0] = 0;
29696	  dremap.perm[1] = 3;
29697	}
29698    }
29699  else if ((contents & (h2 | h3)) == contents)
29700    {
29701      for (i = 0; i < nelt2; ++i)
29702	{
29703	  remap[i + nelt2] = i;
29704	  remap[i + nelt] = i + nelt2;
29705	  dremap.perm[i] = i + nelt2;
29706	  dremap.perm[i + nelt2] = i + nelt;
29707	}
29708      if (nelt != 4)
29709	{
29710	  dremap.vmode = V2DImode;
29711	  dremap.nelt = 2;
29712	  dremap.perm[0] = 1;
29713	  dremap.perm[1] = 2;
29714	}
29715    }
29716  else
29717    return false;
29718
29719  /* Use the remapping array set up above to move the elements from their
29720     swizzled locations into their final destinations.  */
29721  dfinal = *d;
29722  for (i = 0; i < nelt; ++i)
29723    {
29724      unsigned e = remap[d->perm[i]];
29725      gcc_assert (e < nelt);
29726      dfinal.perm[i] = e;
29727    }
29728  dfinal.op0 = gen_reg_rtx (dfinal.vmode);
29729  dfinal.op1 = dfinal.op0;
29730  dremap.target = dfinal.op0;
29731
29732  /* Test if the final remap can be done with a single insn.  For V4SFmode or
29733     V4SImode this *will* succeed.  For V8HImode or V16QImode it may not.  */
29734  start_sequence ();
29735  ok = expand_vec_perm_1 (&dfinal);
29736  seq = get_insns ();
29737  end_sequence ();
29738
29739  if (!ok)
29740    return false;
29741
29742  if (dremap.vmode != dfinal.vmode)
29743    {
29744      dremap.target = gen_lowpart (dremap.vmode, dremap.target);
29745      dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
29746      dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
29747    }
29748
29749  ok = expand_vec_perm_1 (&dremap);
29750  gcc_assert (ok);
29751
29752  emit_insn (seq);
29753  return true;
29754}
29755
29756/* A subroutine of expand_vec_perm_even_odd_1.  Implement the double-word
29757   permutation with two pshufb insns and an ior.  We should have already
29758   failed all two instruction sequences.  */
29759
29760static bool
29761expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
29762{
29763  rtx rperm[2][16], vperm, l, h, op, m128;
29764  unsigned int i, nelt, eltsz;
29765
29766  if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
29767    return false;
29768  gcc_assert (d->op0 != d->op1);
29769
29770  nelt = d->nelt;
29771  eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
29772
29773  /* Generate two permutation masks.  If the required element is within
29774     the given vector it is shuffled into the proper lane.  If the required
29775     element is in the other vector, force a zero into the lane by setting
29776     bit 7 in the permutation mask.  */
29777  m128 = GEN_INT (-128);
29778  for (i = 0; i < nelt; ++i)
29779    {
29780      unsigned j, e = d->perm[i];
29781      unsigned which = (e >= nelt);
29782      if (e >= nelt)
29783	e -= nelt;
29784
29785      for (j = 0; j < eltsz; ++j)
29786	{
29787	  rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
29788	  rperm[1-which][i*eltsz + j] = m128;
29789	}
29790    }
29791
29792  vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
29793  vperm = force_reg (V16QImode, vperm);
29794
29795  l = gen_reg_rtx (V16QImode);
29796  op = gen_lowpart (V16QImode, d->op0);
29797  emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
29798
29799  vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
29800  vperm = force_reg (V16QImode, vperm);
29801
29802  h = gen_reg_rtx (V16QImode);
29803  op = gen_lowpart (V16QImode, d->op1);
29804  emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
29805
29806  op = gen_lowpart (V16QImode, d->target);
29807  emit_insn (gen_iorv16qi3 (op, l, h));
29808
29809  return true;
29810}
29811
29812/* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement extract-even
29813   and extract-odd permutations.  */
29814
29815static bool
29816expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
29817{
29818  rtx t1, t2, t3, t4;
29819
29820  switch (d->vmode)
29821    {
29822    case V4DFmode:
29823      t1 = gen_reg_rtx (V4DFmode);
29824      t2 = gen_reg_rtx (V4DFmode);
29825
29826      /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }.  */
29827      emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
29828      emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
29829
29830      /* Now an unpck[lh]pd will produce the result required.  */
29831      if (odd)
29832	t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
29833      else
29834	t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
29835      emit_insn (t3);
29836      break;
29837
29838    case V8SFmode:
29839      {
29840	static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
29841	static const unsigned char perme[8] = { 0, 1,  8,  9, 4, 5, 12, 13 };
29842	static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
29843
29844	t1 = gen_reg_rtx (V8SFmode);
29845	t2 = gen_reg_rtx (V8SFmode);
29846	t3 = gen_reg_rtx (V8SFmode);
29847	t4 = gen_reg_rtx (V8SFmode);
29848
29849	/* Shuffle within the 128-bit lanes to produce:
29850	   { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }.  */
29851	expand_vselect (t1, d->op0, perm1, 8);
29852	expand_vselect (t2, d->op1, perm1, 8);
29853
29854	/* Shuffle the lanes around to produce:
29855	   { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }.  */
29856	emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
29857	emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
29858
29859	/* Now a vpermil2p will produce the result required.  */
29860	/* ??? The vpermil2p requires a vector constant.  Another option
29861	   is a unpck[lh]ps to merge the two vectors to produce
29862	   { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }.  Then use another
29863	   vpermilps to get the elements into the final order.  */
29864	d->op0 = t3;
29865	d->op1 = t4;
29866	memcpy (d->perm, odd ? permo: perme, 8);
29867	expand_vec_perm_vpermil (d);
29868      }
29869      break;
29870
29871    case V2DFmode:
29872    case V4SFmode:
29873    case V2DImode:
29874    case V4SImode:
29875      /* These are always directly implementable by expand_vec_perm_1.  */
29876      gcc_unreachable ();
29877
29878    case V8HImode:
29879      if (TARGET_SSSE3)
29880	return expand_vec_perm_pshufb2 (d);
29881      else
29882	{
29883	  /* We need 2*log2(N)-1 operations to achieve odd/even
29884	     with interleave. */
29885	  t1 = gen_reg_rtx (V8HImode);
29886	  t2 = gen_reg_rtx (V8HImode);
29887	  emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
29888	  emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
29889	  emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
29890	  emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
29891	  if (odd)
29892	    t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
29893	  else
29894	    t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
29895	  emit_insn (t3);
29896	}
29897      break;
29898
29899    case V16QImode:
29900      if (TARGET_SSSE3)
29901	return expand_vec_perm_pshufb2 (d);
29902      else
29903	{
29904	  t1 = gen_reg_rtx (V16QImode);
29905	  t2 = gen_reg_rtx (V16QImode);
29906	  t3 = gen_reg_rtx (V16QImode);
29907	  emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
29908	  emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
29909	  emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
29910	  emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
29911	  emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
29912	  emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
29913	  if (odd)
29914	    t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
29915	  else
29916	    t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
29917	  emit_insn (t3);
29918	}
29919      break;
29920
29921    default:
29922      gcc_unreachable ();
29923    }
29924
29925  return true;
29926}
29927
29928/* A subroutine of ix86_expand_vec_perm_builtin_1.  Pattern match
29929   extract-even and extract-odd permutations.  */
29930
29931static bool
29932expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
29933{
29934  unsigned i, odd, nelt = d->nelt;
29935
29936  odd = d->perm[0];
29937  if (odd != 0 && odd != 1)
29938    return false;
29939
29940  for (i = 1; i < nelt; ++i)
29941    if (d->perm[i] != 2 * i + odd)
29942      return false;
29943
29944  return expand_vec_perm_even_odd_1 (d, odd);
29945}
29946
29947/* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement broadcast
29948   permutations.  We assume that expand_vec_perm_1 has already failed.  */
29949
29950static bool
29951expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
29952{
29953  unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
29954  enum machine_mode vmode = d->vmode;
29955  unsigned char perm2[4];
29956  rtx op0 = d->op0;
29957  bool ok;
29958
29959  switch (vmode)
29960    {
29961    case V4DFmode:
29962    case V8SFmode:
29963      /* These are special-cased in sse.md so that we can optionally
29964	 use the vbroadcast instruction.  They expand to two insns
29965	 if the input happens to be in a register.  */
29966      gcc_unreachable ();
29967
29968    case V2DFmode:
29969    case V2DImode:
29970    case V4SFmode:
29971    case V4SImode:
29972      /* These are always implementable using standard shuffle patterns.  */
29973      gcc_unreachable ();
29974
29975    case V8HImode:
29976    case V16QImode:
29977      /* These can be implemented via interleave.  We save one insn by
29978	 stopping once we have promoted to V4SImode and then use pshufd.  */
29979      do
29980	{
29981	  optab otab = vec_interleave_low_optab;
29982
29983	  if (elt >= nelt2)
29984	    {
29985	      otab = vec_interleave_high_optab;
29986	      elt -= nelt2;
29987	    }
29988	  nelt2 /= 2;
29989
29990	  op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
29991	  vmode = get_mode_wider_vector (vmode);
29992	  op0 = gen_lowpart (vmode, op0);
29993	}
29994      while (vmode != V4SImode);
29995
29996      memset (perm2, elt, 4);
29997      ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
29998      gcc_assert (ok);
29999      return true;
30000
30001    default:
30002      gcc_unreachable ();
30003    }
30004}
30005
30006/* A subroutine of ix86_expand_vec_perm_builtin_1.  Pattern match
30007   broadcast permutations.  */
30008
30009static bool
30010expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
30011{
30012  unsigned i, elt, nelt = d->nelt;
30013
30014  if (d->op0 != d->op1)
30015    return false;
30016
30017  elt = d->perm[0];
30018  for (i = 1; i < nelt; ++i)
30019    if (d->perm[i] != elt)
30020      return false;
30021
30022  return expand_vec_perm_broadcast_1 (d);
30023}
30024
30025/* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
30026   With all of the interface bits taken care of, perform the expansion
30027   in D and return true on success.  */
30028
30029static bool
30030ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
30031{
30032  /* Try a single instruction expansion.  */
30033  if (expand_vec_perm_1 (d))
30034    return true;
30035
30036  /* Try sequences of two instructions.  */
30037
30038  if (expand_vec_perm_pshuflw_pshufhw (d))
30039    return true;
30040
30041  if (expand_vec_perm_palignr (d))
30042    return true;
30043
30044  if (expand_vec_perm_interleave2 (d))
30045    return true;
30046
30047  if (expand_vec_perm_broadcast (d))
30048    return true;
30049
30050  /* Try sequences of three instructions.  */
30051
30052  if (expand_vec_perm_pshufb2 (d))
30053    return true;
30054
30055  /* ??? Look for narrow permutations whose element orderings would
30056     allow the promotion to a wider mode.  */
30057
30058  /* ??? Look for sequences of interleave or a wider permute that place
30059     the data into the correct lanes for a half-vector shuffle like
30060     pshuf[lh]w or vpermilps.  */
30061
30062  /* ??? Look for sequences of interleave that produce the desired results.
30063     The combinatorics of punpck[lh] get pretty ugly... */
30064
30065  if (expand_vec_perm_even_odd (d))
30066    return true;
30067
30068  return false;
30069}
30070
30071/* Extract the values from the vector CST into the permutation array in D.
30072   Return 0 on error, 1 if all values from the permutation come from the
30073   first vector, 2 if all values from the second vector, and 3 otherwise.  */
30074
30075static int
30076extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
30077{
30078  tree list = TREE_VECTOR_CST_ELTS (cst);
30079  unsigned i, nelt = d->nelt;
30080  int ret = 0;
30081
30082  for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
30083    {
30084      unsigned HOST_WIDE_INT e;
30085
30086      if (!host_integerp (TREE_VALUE (list), 1))
30087	return 0;
30088      e = tree_low_cst (TREE_VALUE (list), 1);
30089      if (e >= 2 * nelt)
30090	return 0;
30091
30092      ret |= (e < nelt ? 1 : 2);
30093      d->perm[i] = e;
30094    }
30095  gcc_assert (list == NULL);
30096
30097  /* For all elements from second vector, fold the elements to first.  */
30098  if (ret == 2)
30099    for (i = 0; i < nelt; ++i)
30100      d->perm[i] -= nelt;
30101
30102  return ret;
30103}
30104
30105static rtx
30106ix86_expand_vec_perm_builtin (tree exp)
30107{
30108  struct expand_vec_perm_d d;
30109  tree arg0, arg1, arg2;
30110
30111  arg0 = CALL_EXPR_ARG (exp, 0);
30112  arg1 = CALL_EXPR_ARG (exp, 1);
30113  arg2 = CALL_EXPR_ARG (exp, 2);
30114
30115  d.vmode = TYPE_MODE (TREE_TYPE (arg0));
30116  d.nelt = GET_MODE_NUNITS (d.vmode);
30117  d.testing_p = false;
30118  gcc_assert (VECTOR_MODE_P (d.vmode));
30119
30120  if (TREE_CODE (arg2) != VECTOR_CST)
30121    {
30122      error_at (EXPR_LOCATION (exp),
30123		"vector permutation requires vector constant");
30124      goto exit_error;
30125    }
30126
30127  switch (extract_vec_perm_cst (&d, arg2))
30128    {
30129    default:
30130      gcc_unreachable();
30131
30132    case 0:
30133      error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
30134      goto exit_error;
30135
30136    case 3:
30137      if (!operand_equal_p (arg0, arg1, 0))
30138	{
30139	  d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
30140	  d.op0 = force_reg (d.vmode, d.op0);
30141	  d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
30142	  d.op1 = force_reg (d.vmode, d.op1);
30143	  break;
30144	}
30145
30146      /* The elements of PERM do not suggest that only the first operand
30147	 is used, but both operands are identical.  Allow easier matching
30148	 of the permutation by folding the permutation into the single
30149	 input vector.  */
30150      {
30151	unsigned i, nelt = d.nelt;
30152	for (i = 0; i < nelt; ++i)
30153	  if (d.perm[i] >= nelt)
30154	    d.perm[i] -= nelt;
30155      }
30156      /* FALLTHRU */
30157
30158    case 1:
30159      d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
30160      d.op0 = force_reg (d.vmode, d.op0);
30161      d.op1 = d.op0;
30162      break;
30163
30164    case 2:
30165      d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
30166      d.op0 = force_reg (d.vmode, d.op0);
30167      d.op1 = d.op0;
30168      break;
30169    }
30170
30171  d.target = gen_reg_rtx (d.vmode);
30172  if (ix86_expand_vec_perm_builtin_1 (&d))
30173    return d.target;
30174
30175  /* For compiler generated permutations, we should never got here, because
30176     the compiler should also be checking the ok hook.  But since this is a
30177     builtin the user has access too, so don't abort.  */
30178  switch (d.nelt)
30179    {
30180    case 2:
30181      sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
30182      break;
30183    case 4:
30184      sorry ("vector permutation (%d %d %d %d)",
30185	     d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
30186      break;
30187    case 8:
30188      sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
30189	     d.perm[0], d.perm[1], d.perm[2], d.perm[3],
30190	     d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
30191      break;
30192    case 16:
30193      sorry ("vector permutation "
30194	     "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
30195	     d.perm[0], d.perm[1], d.perm[2], d.perm[3],
30196	     d.perm[4], d.perm[5], d.perm[6], d.perm[7],
30197	     d.perm[8], d.perm[9], d.perm[10], d.perm[11],
30198	     d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
30199      break;
30200    default:
30201      gcc_unreachable ();
30202    }
30203 exit_error:
30204  return CONST0_RTX (d.vmode);
30205}
30206
30207/* Implement targetm.vectorize.builtin_vec_perm_ok.  */
30208
30209static bool
30210ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
30211{
30212  struct expand_vec_perm_d d;
30213  int vec_mask;
30214  bool ret, one_vec;
30215
30216  d.vmode = TYPE_MODE (vec_type);
30217  d.nelt = GET_MODE_NUNITS (d.vmode);
30218  d.testing_p = true;
30219
30220  /* Given sufficient ISA support we can just return true here
30221     for selected vector modes.  */
30222  if (GET_MODE_SIZE (d.vmode) == 16)
30223    {
30224      /* All implementable with a single vpperm insn.  */
30225      if (TARGET_XOP)
30226	return true;
30227      /* All implementable with 2 pshufb + 1 ior.  */
30228      if (TARGET_SSSE3)
30229	return true;
30230      /* All implementable with shufpd or unpck[lh]pd.  */
30231      if (d.nelt == 2)
30232	return true;
30233    }
30234
30235  vec_mask = extract_vec_perm_cst (&d, mask);
30236
30237  /* This hook is cannot be called in response to something that the
30238     user does (unlike the builtin expander) so we shouldn't ever see
30239     an error generated from the extract.  */
30240  gcc_assert (vec_mask > 0 && vec_mask <= 3);
30241  one_vec = (vec_mask != 3);
30242
30243  /* Implementable with shufps or pshufd.  */
30244  if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
30245    return true;
30246
30247  /* Otherwise we have to go through the motions and see if we can
30248     figure out how to generate the requested permutation.  */
30249  d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30250  d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
30251  if (!one_vec)
30252    d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
30253
30254  start_sequence ();
30255  ret = ix86_expand_vec_perm_builtin_1 (&d);
30256  end_sequence ();
30257
30258  return ret;
30259}
30260
30261void
30262ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
30263{
30264  struct expand_vec_perm_d d;
30265  unsigned i, nelt;
30266
30267  d.target = targ;
30268  d.op0 = op0;
30269  d.op1 = op1;
30270  d.vmode = GET_MODE (targ);
30271  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30272  d.testing_p = false;
30273
30274  for (i = 0; i < nelt; ++i)
30275    d.perm[i] = i * 2 + odd;
30276
30277  /* We'll either be able to implement the permutation directly...  */
30278  if (expand_vec_perm_1 (&d))
30279    return;
30280
30281  /* ... or we use the special-case patterns.  */
30282  expand_vec_perm_even_odd_1 (&d, odd);
30283}
30284
30285/* This function returns the calling abi specific va_list type node.
30286   It returns  the FNDECL specific va_list type.  */
30287
30288tree
30289ix86_fn_abi_va_list (tree fndecl)
30290{
30291  if (!TARGET_64BIT)
30292    return va_list_type_node;
30293  gcc_assert (fndecl != NULL_TREE);
30294
30295  if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
30296    return ms_va_list_type_node;
30297  else
30298    return sysv_va_list_type_node;
30299}
30300
30301/* Returns the canonical va_list type specified by TYPE. If there
30302   is no valid TYPE provided, it return NULL_TREE.  */
30303
30304tree
30305ix86_canonical_va_list_type (tree type)
30306{
30307  tree wtype, htype;
30308
30309  /* Resolve references and pointers to va_list type.  */
30310  if (INDIRECT_REF_P (type))
30311    type = TREE_TYPE (type);
30312  else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
30313    type = TREE_TYPE (type);
30314
30315  if (TARGET_64BIT)
30316    {
30317      wtype = va_list_type_node;
30318	  gcc_assert (wtype != NULL_TREE);
30319      htype = type;
30320      if (TREE_CODE (wtype) == ARRAY_TYPE)
30321	{
30322	  /* If va_list is an array type, the argument may have decayed
30323	     to a pointer type, e.g. by being passed to another function.
30324	     In that case, unwrap both types so that we can compare the
30325	     underlying records.  */
30326	  if (TREE_CODE (htype) == ARRAY_TYPE
30327	      || POINTER_TYPE_P (htype))
30328	    {
30329	      wtype = TREE_TYPE (wtype);
30330	      htype = TREE_TYPE (htype);
30331	    }
30332	}
30333      if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30334	return va_list_type_node;
30335      wtype = sysv_va_list_type_node;
30336	  gcc_assert (wtype != NULL_TREE);
30337      htype = type;
30338      if (TREE_CODE (wtype) == ARRAY_TYPE)
30339	{
30340	  /* If va_list is an array type, the argument may have decayed
30341	     to a pointer type, e.g. by being passed to another function.
30342	     In that case, unwrap both types so that we can compare the
30343	     underlying records.  */
30344	  if (TREE_CODE (htype) == ARRAY_TYPE
30345	      || POINTER_TYPE_P (htype))
30346	    {
30347	      wtype = TREE_TYPE (wtype);
30348	      htype = TREE_TYPE (htype);
30349	    }
30350	}
30351      if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30352	return sysv_va_list_type_node;
30353      wtype = ms_va_list_type_node;
30354	  gcc_assert (wtype != NULL_TREE);
30355      htype = type;
30356      if (TREE_CODE (wtype) == ARRAY_TYPE)
30357	{
30358	  /* If va_list is an array type, the argument may have decayed
30359	     to a pointer type, e.g. by being passed to another function.
30360	     In that case, unwrap both types so that we can compare the
30361	     underlying records.  */
30362	  if (TREE_CODE (htype) == ARRAY_TYPE
30363	      || POINTER_TYPE_P (htype))
30364	    {
30365	      wtype = TREE_TYPE (wtype);
30366	      htype = TREE_TYPE (htype);
30367	    }
30368	}
30369      if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30370	return ms_va_list_type_node;
30371      return NULL_TREE;
30372    }
30373  return std_canonical_va_list_type (type);
30374}
30375
30376/* Iterate through the target-specific builtin types for va_list.
30377    IDX denotes the iterator, *PTREE is set to the result type of
30378    the va_list builtin, and *PNAME to its internal type.
30379    Returns zero if there is no element for this index, otherwise
30380    IDX should be increased upon the next call.
30381    Note, do not iterate a base builtin's name like __builtin_va_list.
30382    Used from c_common_nodes_and_builtins.  */
30383
30384int
30385ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30386{
30387  if (!TARGET_64BIT)
30388    return 0;
30389  switch (idx) {
30390  case 0:
30391    *ptree = ms_va_list_type_node;
30392    *pname = "__builtin_ms_va_list";
30393    break;
30394  case 1:
30395    *ptree = sysv_va_list_type_node;
30396    *pname = "__builtin_sysv_va_list";
30397    break;
30398  default:
30399    return 0;
30400  }
30401  return 1;
30402}
30403
30404/* Initialize the GCC target structure.  */
30405#undef TARGET_RETURN_IN_MEMORY
30406#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30407
30408#undef TARGET_LEGITIMIZE_ADDRESS
30409#define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30410
30411#undef TARGET_ATTRIBUTE_TABLE
30412#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30413#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30414#  undef TARGET_MERGE_DECL_ATTRIBUTES
30415#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30416#endif
30417
30418#undef TARGET_COMP_TYPE_ATTRIBUTES
30419#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30420
30421#undef TARGET_INIT_BUILTINS
30422#define TARGET_INIT_BUILTINS ix86_init_builtins
30423#undef TARGET_BUILTIN_DECL
30424#define TARGET_BUILTIN_DECL ix86_builtin_decl
30425#undef TARGET_EXPAND_BUILTIN
30426#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30427
30428#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30429#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30430  ix86_builtin_vectorized_function
30431
30432#undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30433#define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30434
30435#undef TARGET_BUILTIN_RECIPROCAL
30436#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30437
30438#undef TARGET_ASM_FUNCTION_EPILOGUE
30439#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30440
30441#undef TARGET_ENCODE_SECTION_INFO
30442#ifndef SUBTARGET_ENCODE_SECTION_INFO
30443#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30444#else
30445#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30446#endif
30447
30448#undef TARGET_ASM_OPEN_PAREN
30449#define TARGET_ASM_OPEN_PAREN ""
30450#undef TARGET_ASM_CLOSE_PAREN
30451#define TARGET_ASM_CLOSE_PAREN ""
30452
30453#undef TARGET_ASM_BYTE_OP
30454#define TARGET_ASM_BYTE_OP ASM_BYTE
30455
30456#undef TARGET_ASM_ALIGNED_HI_OP
30457#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30458#undef TARGET_ASM_ALIGNED_SI_OP
30459#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30460#ifdef ASM_QUAD
30461#undef TARGET_ASM_ALIGNED_DI_OP
30462#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30463#endif
30464
30465#undef TARGET_ASM_UNALIGNED_HI_OP
30466#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30467#undef TARGET_ASM_UNALIGNED_SI_OP
30468#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30469#undef TARGET_ASM_UNALIGNED_DI_OP
30470#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30471
30472#undef TARGET_SCHED_ADJUST_COST
30473#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30474#undef TARGET_SCHED_ISSUE_RATE
30475#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30476#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30477#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30478  ia32_multipass_dfa_lookahead
30479
30480#undef TARGET_FUNCTION_OK_FOR_SIBCALL
30481#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30482
30483#ifdef HAVE_AS_TLS
30484#undef TARGET_HAVE_TLS
30485#define TARGET_HAVE_TLS true
30486#endif
30487#undef TARGET_CANNOT_FORCE_CONST_MEM
30488#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30489#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30490#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30491
30492#undef TARGET_DELEGITIMIZE_ADDRESS
30493#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30494
30495#undef TARGET_MS_BITFIELD_LAYOUT_P
30496#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30497
30498#if TARGET_MACHO
30499#undef TARGET_BINDS_LOCAL_P
30500#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30501#endif
30502#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30503#undef TARGET_BINDS_LOCAL_P
30504#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30505#endif
30506
30507#undef TARGET_ASM_OUTPUT_MI_THUNK
30508#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30509#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30510#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30511
30512#undef TARGET_ASM_FILE_START
30513#define TARGET_ASM_FILE_START x86_file_start
30514
30515#undef TARGET_DEFAULT_TARGET_FLAGS
30516#define TARGET_DEFAULT_TARGET_FLAGS	\
30517  (TARGET_DEFAULT			\
30518   | TARGET_SUBTARGET_DEFAULT		\
30519   | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
30520   | MASK_FUSED_MADD)
30521
30522#undef TARGET_HANDLE_OPTION
30523#define TARGET_HANDLE_OPTION ix86_handle_option
30524
30525#undef TARGET_RTX_COSTS
30526#define TARGET_RTX_COSTS ix86_rtx_costs
30527#undef TARGET_ADDRESS_COST
30528#define TARGET_ADDRESS_COST ix86_address_cost
30529
30530#undef TARGET_FIXED_CONDITION_CODE_REGS
30531#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30532#undef TARGET_CC_MODES_COMPATIBLE
30533#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30534
30535#undef TARGET_MACHINE_DEPENDENT_REORG
30536#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30537
30538#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30539#define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30540
30541#undef TARGET_BUILD_BUILTIN_VA_LIST
30542#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30543
30544#undef TARGET_FN_ABI_VA_LIST
30545#define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30546
30547#undef TARGET_CANONICAL_VA_LIST_TYPE
30548#define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30549
30550#undef TARGET_EXPAND_BUILTIN_VA_START
30551#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30552
30553#undef TARGET_MD_ASM_CLOBBERS
30554#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30555
30556#undef TARGET_PROMOTE_PROTOTYPES
30557#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30558#undef TARGET_STRUCT_VALUE_RTX
30559#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30560#undef TARGET_SETUP_INCOMING_VARARGS
30561#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30562#undef TARGET_MUST_PASS_IN_STACK
30563#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30564#undef TARGET_PASS_BY_REFERENCE
30565#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30566#undef TARGET_INTERNAL_ARG_POINTER
30567#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30568#undef TARGET_UPDATE_STACK_BOUNDARY
30569#define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30570#undef TARGET_GET_DRAP_RTX
30571#define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30572#undef TARGET_STRICT_ARGUMENT_NAMING
30573#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30574#undef TARGET_STATIC_CHAIN
30575#define TARGET_STATIC_CHAIN ix86_static_chain
30576#undef TARGET_TRAMPOLINE_INIT
30577#define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
30578
30579#undef TARGET_GIMPLIFY_VA_ARG_EXPR
30580#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30581
30582#undef TARGET_SCALAR_MODE_SUPPORTED_P
30583#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30584
30585#undef TARGET_VECTOR_MODE_SUPPORTED_P
30586#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30587
30588#undef TARGET_C_MODE_FOR_SUFFIX
30589#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30590
30591#ifdef HAVE_AS_TLS
30592#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30593#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30594#endif
30595
30596#ifdef SUBTARGET_INSERT_ATTRIBUTES
30597#undef TARGET_INSERT_ATTRIBUTES
30598#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30599#endif
30600
30601#undef TARGET_MANGLE_TYPE
30602#define TARGET_MANGLE_TYPE ix86_mangle_type
30603
30604#undef TARGET_STACK_PROTECT_FAIL
30605#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30606
30607#undef TARGET_FUNCTION_VALUE
30608#define TARGET_FUNCTION_VALUE ix86_function_value
30609
30610#undef TARGET_SECONDARY_RELOAD
30611#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30612
30613#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30614#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
30615  ix86_builtin_vectorization_cost
30616#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
30617#define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
30618  ix86_vectorize_builtin_vec_perm
30619#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
30620#define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
30621  ix86_vectorize_builtin_vec_perm_ok
30622
30623#undef TARGET_SET_CURRENT_FUNCTION
30624#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30625
30626#undef TARGET_OPTION_VALID_ATTRIBUTE_P
30627#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30628
30629#undef TARGET_OPTION_SAVE
30630#define TARGET_OPTION_SAVE ix86_function_specific_save
30631
30632#undef TARGET_OPTION_RESTORE
30633#define TARGET_OPTION_RESTORE ix86_function_specific_restore
30634
30635#undef TARGET_OPTION_PRINT
30636#define TARGET_OPTION_PRINT ix86_function_specific_print
30637
30638#undef TARGET_CAN_INLINE_P
30639#define TARGET_CAN_INLINE_P ix86_can_inline_p
30640
30641#undef TARGET_EXPAND_TO_RTL_HOOK
30642#define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30643
30644#undef TARGET_LEGITIMATE_ADDRESS_P
30645#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
30646
30647#undef TARGET_IRA_COVER_CLASSES
30648#define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
30649
30650#undef TARGET_FRAME_POINTER_REQUIRED
30651#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
30652
30653#undef TARGET_CAN_ELIMINATE
30654#define TARGET_CAN_ELIMINATE ix86_can_eliminate
30655
30656#undef TARGET_ASM_CODE_END
30657#define TARGET_ASM_CODE_END ix86_code_end
30658
30659struct gcc_target targetm = TARGET_INITIALIZER;
30660
30661#include "gt-i386.h"
30662