expmed.c revision 261188
1/* Medium-level subroutines: convert bit-field store and extract
2   and shifts, multiplies and divides to rtl instructions.
3   Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4   1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
5   Free Software Foundation, Inc.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 2, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING.  If not, write to the Free
21Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2202110-1301, USA.  */
23
24
25#include "config.h"
26#include "system.h"
27#include "coretypes.h"
28#include "tm.h"
29#include "toplev.h"
30#include "rtl.h"
31#include "tree.h"
32#include "tm_p.h"
33#include "flags.h"
34#include "insn-config.h"
35#include "expr.h"
36#include "optabs.h"
37#include "real.h"
38#include "recog.h"
39#include "langhooks.h"
40
41static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
42				   unsigned HOST_WIDE_INT,
43				   unsigned HOST_WIDE_INT, rtx);
44static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
45				   unsigned HOST_WIDE_INT, rtx);
46static rtx extract_fixed_bit_field (enum machine_mode, rtx,
47				    unsigned HOST_WIDE_INT,
48				    unsigned HOST_WIDE_INT,
49				    unsigned HOST_WIDE_INT, rtx, int);
50static rtx mask_rtx (enum machine_mode, int, int, int);
51static rtx lshift_value (enum machine_mode, rtx, int, int);
52static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
53				    unsigned HOST_WIDE_INT, int);
54static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
55static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
56static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
57
58/* Test whether a value is zero of a power of two.  */
59#define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
60
61/* Nonzero means divides or modulus operations are relatively cheap for
62   powers of two, so don't use branches; emit the operation instead.
63   Usually, this will mean that the MD file will emit non-branch
64   sequences.  */
65
66static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
67static bool smod_pow2_cheap[NUM_MACHINE_MODES];
68
69#ifndef SLOW_UNALIGNED_ACCESS
70#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
71#endif
72
73/* For compilers that support multiple targets with different word sizes,
74   MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
75   is the H8/300(H) compiler.  */
76
77#ifndef MAX_BITS_PER_WORD
78#define MAX_BITS_PER_WORD BITS_PER_WORD
79#endif
80
81/* Reduce conditional compilation elsewhere.  */
82#ifndef HAVE_insv
83#define HAVE_insv	0
84#define CODE_FOR_insv	CODE_FOR_nothing
85#define gen_insv(a,b,c,d) NULL_RTX
86#endif
87#ifndef HAVE_extv
88#define HAVE_extv	0
89#define CODE_FOR_extv	CODE_FOR_nothing
90#define gen_extv(a,b,c,d) NULL_RTX
91#endif
92#ifndef HAVE_extzv
93#define HAVE_extzv	0
94#define CODE_FOR_extzv	CODE_FOR_nothing
95#define gen_extzv(a,b,c,d) NULL_RTX
96#endif
97
98/* Cost of various pieces of RTL.  Note that some of these are indexed by
99   shift count and some by mode.  */
100static int zero_cost;
101static int add_cost[NUM_MACHINE_MODES];
102static int neg_cost[NUM_MACHINE_MODES];
103static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
104static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
105static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
106static int mul_cost[NUM_MACHINE_MODES];
107static int sdiv_cost[NUM_MACHINE_MODES];
108static int udiv_cost[NUM_MACHINE_MODES];
109static int mul_widen_cost[NUM_MACHINE_MODES];
110static int mul_highpart_cost[NUM_MACHINE_MODES];
111
112void
113init_expmed (void)
114{
115  struct
116  {
117    struct rtx_def reg;		rtunion reg_fld[2];
118    struct rtx_def plus;	rtunion plus_fld1;
119    struct rtx_def neg;
120    struct rtx_def mult;	rtunion mult_fld1;
121    struct rtx_def sdiv;	rtunion sdiv_fld1;
122    struct rtx_def udiv;	rtunion udiv_fld1;
123    struct rtx_def zext;
124    struct rtx_def sdiv_32;	rtunion sdiv_32_fld1;
125    struct rtx_def smod_32;	rtunion smod_32_fld1;
126    struct rtx_def wide_mult;	rtunion wide_mult_fld1;
127    struct rtx_def wide_lshr;	rtunion wide_lshr_fld1;
128    struct rtx_def wide_trunc;
129    struct rtx_def shift;	rtunion shift_fld1;
130    struct rtx_def shift_mult;	rtunion shift_mult_fld1;
131    struct rtx_def shift_add;	rtunion shift_add_fld1;
132    struct rtx_def shift_sub;	rtunion shift_sub_fld1;
133  } all;
134
135  rtx pow2[MAX_BITS_PER_WORD];
136  rtx cint[MAX_BITS_PER_WORD];
137  int m, n;
138  enum machine_mode mode, wider_mode;
139
140  zero_cost = rtx_cost (const0_rtx, 0);
141
142  for (m = 1; m < MAX_BITS_PER_WORD; m++)
143    {
144      pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
145      cint[m] = GEN_INT (m);
146    }
147
148  memset (&all, 0, sizeof all);
149
150  PUT_CODE (&all.reg, REG);
151  /* Avoid using hard regs in ways which may be unsupported.  */
152  REGNO (&all.reg) = LAST_VIRTUAL_REGISTER + 1;
153
154  PUT_CODE (&all.plus, PLUS);
155  XEXP (&all.plus, 0) = &all.reg;
156  XEXP (&all.plus, 1) = &all.reg;
157
158  PUT_CODE (&all.neg, NEG);
159  XEXP (&all.neg, 0) = &all.reg;
160
161  PUT_CODE (&all.mult, MULT);
162  XEXP (&all.mult, 0) = &all.reg;
163  XEXP (&all.mult, 1) = &all.reg;
164
165  PUT_CODE (&all.sdiv, DIV);
166  XEXP (&all.sdiv, 0) = &all.reg;
167  XEXP (&all.sdiv, 1) = &all.reg;
168
169  PUT_CODE (&all.udiv, UDIV);
170  XEXP (&all.udiv, 0) = &all.reg;
171  XEXP (&all.udiv, 1) = &all.reg;
172
173  PUT_CODE (&all.sdiv_32, DIV);
174  XEXP (&all.sdiv_32, 0) = &all.reg;
175  XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
176
177  PUT_CODE (&all.smod_32, MOD);
178  XEXP (&all.smod_32, 0) = &all.reg;
179  XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
180
181  PUT_CODE (&all.zext, ZERO_EXTEND);
182  XEXP (&all.zext, 0) = &all.reg;
183
184  PUT_CODE (&all.wide_mult, MULT);
185  XEXP (&all.wide_mult, 0) = &all.zext;
186  XEXP (&all.wide_mult, 1) = &all.zext;
187
188  PUT_CODE (&all.wide_lshr, LSHIFTRT);
189  XEXP (&all.wide_lshr, 0) = &all.wide_mult;
190
191  PUT_CODE (&all.wide_trunc, TRUNCATE);
192  XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
193
194  PUT_CODE (&all.shift, ASHIFT);
195  XEXP (&all.shift, 0) = &all.reg;
196
197  PUT_CODE (&all.shift_mult, MULT);
198  XEXP (&all.shift_mult, 0) = &all.reg;
199
200  PUT_CODE (&all.shift_add, PLUS);
201  XEXP (&all.shift_add, 0) = &all.shift_mult;
202  XEXP (&all.shift_add, 1) = &all.reg;
203
204  PUT_CODE (&all.shift_sub, MINUS);
205  XEXP (&all.shift_sub, 0) = &all.shift_mult;
206  XEXP (&all.shift_sub, 1) = &all.reg;
207
208  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
209       mode != VOIDmode;
210       mode = GET_MODE_WIDER_MODE (mode))
211    {
212      PUT_MODE (&all.reg, mode);
213      PUT_MODE (&all.plus, mode);
214      PUT_MODE (&all.neg, mode);
215      PUT_MODE (&all.mult, mode);
216      PUT_MODE (&all.sdiv, mode);
217      PUT_MODE (&all.udiv, mode);
218      PUT_MODE (&all.sdiv_32, mode);
219      PUT_MODE (&all.smod_32, mode);
220      PUT_MODE (&all.wide_trunc, mode);
221      PUT_MODE (&all.shift, mode);
222      PUT_MODE (&all.shift_mult, mode);
223      PUT_MODE (&all.shift_add, mode);
224      PUT_MODE (&all.shift_sub, mode);
225
226      add_cost[mode] = rtx_cost (&all.plus, SET);
227      neg_cost[mode] = rtx_cost (&all.neg, SET);
228      mul_cost[mode] = rtx_cost (&all.mult, SET);
229      sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
230      udiv_cost[mode] = rtx_cost (&all.udiv, SET);
231
232      sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
233			       <= 2 * add_cost[mode]);
234      smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
235			       <= 4 * add_cost[mode]);
236
237      wider_mode = GET_MODE_WIDER_MODE (mode);
238      if (wider_mode != VOIDmode)
239	{
240	  PUT_MODE (&all.zext, wider_mode);
241	  PUT_MODE (&all.wide_mult, wider_mode);
242	  PUT_MODE (&all.wide_lshr, wider_mode);
243	  XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
244
245	  mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
246	  mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
247	}
248
249      shift_cost[mode][0] = 0;
250      shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
251
252      n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
253      for (m = 1; m < n; m++)
254	{
255	  XEXP (&all.shift, 1) = cint[m];
256	  XEXP (&all.shift_mult, 1) = pow2[m];
257
258	  shift_cost[mode][m] = rtx_cost (&all.shift, SET);
259	  shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
260	  shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
261	}
262    }
263}
264
265/* Return an rtx representing minus the value of X.
266   MODE is the intended mode of the result,
267   useful if X is a CONST_INT.  */
268
269rtx
270negate_rtx (enum machine_mode mode, rtx x)
271{
272  rtx result = simplify_unary_operation (NEG, mode, x, mode);
273
274  if (result == 0)
275    result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
276
277  return result;
278}
279
280/* Report on the availability of insv/extv/extzv and the desired mode
281   of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
282   is false; else the mode of the specified operand.  If OPNO is -1,
283   all the caller cares about is whether the insn is available.  */
284enum machine_mode
285mode_for_extraction (enum extraction_pattern pattern, int opno)
286{
287  const struct insn_data *data;
288
289  switch (pattern)
290    {
291    case EP_insv:
292      if (HAVE_insv)
293	{
294	  data = &insn_data[CODE_FOR_insv];
295	  break;
296	}
297      return MAX_MACHINE_MODE;
298
299    case EP_extv:
300      if (HAVE_extv)
301	{
302	  data = &insn_data[CODE_FOR_extv];
303	  break;
304	}
305      return MAX_MACHINE_MODE;
306
307    case EP_extzv:
308      if (HAVE_extzv)
309	{
310	  data = &insn_data[CODE_FOR_extzv];
311	  break;
312	}
313      return MAX_MACHINE_MODE;
314
315    default:
316      gcc_unreachable ();
317    }
318
319  if (opno == -1)
320    return VOIDmode;
321
322  /* Everyone who uses this function used to follow it with
323     if (result == VOIDmode) result = word_mode; */
324  if (data->operand[opno].mode == VOIDmode)
325    return word_mode;
326  return data->operand[opno].mode;
327}
328
329
330/* Generate code to store value from rtx VALUE
331   into a bit-field within structure STR_RTX
332   containing BITSIZE bits starting at bit BITNUM.
333   FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
334   ALIGN is the alignment that STR_RTX is known to have.
335   TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
336
337/* ??? Note that there are two different ideas here for how
338   to determine the size to count bits within, for a register.
339   One is BITS_PER_WORD, and the other is the size of operand 3
340   of the insv pattern.
341
342   If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
343   else, we use the mode of operand 3.  */
344
345rtx
346store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
347		 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
348		 rtx value)
349{
350  unsigned int unit
351    = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
352  unsigned HOST_WIDE_INT offset, bitpos;
353  rtx op0 = str_rtx;
354  int byte_offset;
355  rtx orig_value;
356
357  enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
358
359  while (GET_CODE (op0) == SUBREG)
360    {
361      /* The following line once was done only if WORDS_BIG_ENDIAN,
362	 but I think that is a mistake.  WORDS_BIG_ENDIAN is
363	 meaningful at a much higher level; when structures are copied
364	 between memory and regs, the higher-numbered regs
365	 always get higher addresses.  */
366      int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
367      int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
368
369      byte_offset = 0;
370
371      /* Paradoxical subregs need special handling on big endian machines.  */
372      if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
373	{
374	  int difference = inner_mode_size - outer_mode_size;
375
376	  if (WORDS_BIG_ENDIAN)
377	    byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
378	  if (BYTES_BIG_ENDIAN)
379	    byte_offset += difference % UNITS_PER_WORD;
380	}
381      else
382	byte_offset = SUBREG_BYTE (op0);
383
384      bitnum += byte_offset * BITS_PER_UNIT;
385      op0 = SUBREG_REG (op0);
386    }
387
388  /* No action is needed if the target is a register and if the field
389     lies completely outside that register.  This can occur if the source
390     code contains an out-of-bounds access to a small array.  */
391  if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
392    return value;
393
394  /* Use vec_set patterns for inserting parts of vectors whenever
395     available.  */
396  if (VECTOR_MODE_P (GET_MODE (op0))
397      && !MEM_P (op0)
398      && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
399	  != CODE_FOR_nothing)
400      && fieldmode == GET_MODE_INNER (GET_MODE (op0))
401      && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
402      && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
403    {
404      enum machine_mode outermode = GET_MODE (op0);
405      enum machine_mode innermode = GET_MODE_INNER (outermode);
406      int icode = (int) vec_set_optab->handlers[outermode].insn_code;
407      int pos = bitnum / GET_MODE_BITSIZE (innermode);
408      rtx rtxpos = GEN_INT (pos);
409      rtx src = value;
410      rtx dest = op0;
411      rtx pat, seq;
412      enum machine_mode mode0 = insn_data[icode].operand[0].mode;
413      enum machine_mode mode1 = insn_data[icode].operand[1].mode;
414      enum machine_mode mode2 = insn_data[icode].operand[2].mode;
415
416      start_sequence ();
417
418      if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
419	src = copy_to_mode_reg (mode1, src);
420
421      if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
422	rtxpos = copy_to_mode_reg (mode1, rtxpos);
423
424      /* We could handle this, but we should always be called with a pseudo
425	 for our targets and all insns should take them as outputs.  */
426      gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
427		  && (*insn_data[icode].operand[1].predicate) (src, mode1)
428		  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
429      pat = GEN_FCN (icode) (dest, src, rtxpos);
430      seq = get_insns ();
431      end_sequence ();
432      if (pat)
433	{
434	  emit_insn (seq);
435	  emit_insn (pat);
436	  return dest;
437	}
438    }
439
440  /* If the target is a register, overwriting the entire object, or storing
441     a full-word or multi-word field can be done with just a SUBREG.
442
443     If the target is memory, storing any naturally aligned field can be
444     done with a simple store.  For targets that support fast unaligned
445     memory, any naturally sized, unit aligned field can be done directly.  */
446
447  offset = bitnum / unit;
448  bitpos = bitnum % unit;
449  byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
450                + (offset * UNITS_PER_WORD);
451
452  if (bitpos == 0
453      && bitsize == GET_MODE_BITSIZE (fieldmode)
454      && (!MEM_P (op0)
455	  ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
456	     || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
457	     && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
458	  : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
459	     || (offset * BITS_PER_UNIT % bitsize == 0
460		 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
461    {
462      if (MEM_P (op0))
463	op0 = adjust_address (op0, fieldmode, offset);
464      else if (GET_MODE (op0) != fieldmode)
465	op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
466				   byte_offset);
467      emit_move_insn (op0, value);
468      return value;
469    }
470
471  /* Make sure we are playing with integral modes.  Pun with subregs
472     if we aren't.  This must come after the entire register case above,
473     since that case is valid for any mode.  The following cases are only
474     valid for integral modes.  */
475  {
476    enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
477    if (imode != GET_MODE (op0))
478      {
479	if (MEM_P (op0))
480	  op0 = adjust_address (op0, imode, 0);
481	else
482	  {
483	    gcc_assert (imode != BLKmode);
484	    op0 = gen_lowpart (imode, op0);
485	  }
486      }
487  }
488
489  /* We may be accessing data outside the field, which means
490     we can alias adjacent data.  */
491  if (MEM_P (op0))
492    {
493      op0 = shallow_copy_rtx (op0);
494      set_mem_alias_set (op0, 0);
495      set_mem_expr (op0, 0);
496    }
497
498  /* If OP0 is a register, BITPOS must count within a word.
499     But as we have it, it counts within whatever size OP0 now has.
500     On a bigendian machine, these are not the same, so convert.  */
501  if (BYTES_BIG_ENDIAN
502      && !MEM_P (op0)
503      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
504    bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
505
506  /* Storing an lsb-aligned field in a register
507     can be done with a movestrict instruction.  */
508
509  if (!MEM_P (op0)
510      && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
511      && bitsize == GET_MODE_BITSIZE (fieldmode)
512      && (movstrict_optab->handlers[fieldmode].insn_code
513	  != CODE_FOR_nothing))
514    {
515      int icode = movstrict_optab->handlers[fieldmode].insn_code;
516
517      /* Get appropriate low part of the value being stored.  */
518      if (GET_CODE (value) == CONST_INT || REG_P (value))
519	value = gen_lowpart (fieldmode, value);
520      else if (!(GET_CODE (value) == SYMBOL_REF
521		 || GET_CODE (value) == LABEL_REF
522		 || GET_CODE (value) == CONST))
523	value = convert_to_mode (fieldmode, value, 0);
524
525      if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
526	value = copy_to_mode_reg (fieldmode, value);
527
528      if (GET_CODE (op0) == SUBREG)
529	{
530	  /* Else we've got some float mode source being extracted into
531	     a different float mode destination -- this combination of
532	     subregs results in Severe Tire Damage.  */
533	  gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
534		      || GET_MODE_CLASS (fieldmode) == MODE_INT
535		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
536	  op0 = SUBREG_REG (op0);
537	}
538
539      emit_insn (GEN_FCN (icode)
540		 (gen_rtx_SUBREG (fieldmode, op0,
541				  (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
542				  + (offset * UNITS_PER_WORD)),
543				  value));
544
545      return value;
546    }
547
548  /* Handle fields bigger than a word.  */
549
550  if (bitsize > BITS_PER_WORD)
551    {
552      /* Here we transfer the words of the field
553	 in the order least significant first.
554	 This is because the most significant word is the one which may
555	 be less than full.
556	 However, only do that if the value is not BLKmode.  */
557
558      unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
559      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
560      unsigned int i;
561
562      /* This is the mode we must force value to, so that there will be enough
563	 subwords to extract.  Note that fieldmode will often (always?) be
564	 VOIDmode, because that is what store_field uses to indicate that this
565	 is a bit field, but passing VOIDmode to operand_subword_force
566	 is not allowed.  */
567      fieldmode = GET_MODE (value);
568      if (fieldmode == VOIDmode)
569	fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
570
571      for (i = 0; i < nwords; i++)
572	{
573	  /* If I is 0, use the low-order word in both field and target;
574	     if I is 1, use the next to lowest word; and so on.  */
575	  unsigned int wordnum = (backwards ? nwords - i - 1 : i);
576	  unsigned int bit_offset = (backwards
577				     ? MAX ((int) bitsize - ((int) i + 1)
578					    * BITS_PER_WORD,
579					    0)
580				     : (int) i * BITS_PER_WORD);
581
582	  store_bit_field (op0, MIN (BITS_PER_WORD,
583				     bitsize - i * BITS_PER_WORD),
584			   bitnum + bit_offset, word_mode,
585			   operand_subword_force (value, wordnum, fieldmode));
586	}
587      return value;
588    }
589
590  /* From here on we can assume that the field to be stored in is
591     a full-word (whatever type that is), since it is shorter than a word.  */
592
593  /* OFFSET is the number of words or bytes (UNIT says which)
594     from STR_RTX to the first word or byte containing part of the field.  */
595
596  if (!MEM_P (op0))
597    {
598      if (offset != 0
599	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
600	{
601	  if (!REG_P (op0))
602	    {
603	      /* Since this is a destination (lvalue), we can't copy
604		 it to a pseudo.  We can remove a SUBREG that does not
605		 change the size of the operand.  Such a SUBREG may
606		 have been added above.  */
607	      gcc_assert (GET_CODE (op0) == SUBREG
608			  && (GET_MODE_SIZE (GET_MODE (op0))
609			      == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
610	      op0 = SUBREG_REG (op0);
611	    }
612	  op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
613		                op0, (offset * UNITS_PER_WORD));
614	}
615      offset = 0;
616    }
617
618  /* If VALUE has a floating-point or complex mode, access it as an
619     integer of the corresponding size.  This can occur on a machine
620     with 64 bit registers that uses SFmode for float.  It can also
621     occur for unaligned float or complex fields.  */
622  orig_value = value;
623  if (GET_MODE (value) != VOIDmode
624      && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
625      && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
626    {
627      value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
628      emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
629    }
630
631  /* Now OFFSET is nonzero only if OP0 is memory
632     and is therefore always measured in bytes.  */
633
634  if (HAVE_insv
635      && GET_MODE (value) != BLKmode
636      && bitsize > 0
637      && GET_MODE_BITSIZE (op_mode) >= bitsize
638      && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
639	    && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
640      && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
641							VOIDmode))
642    {
643      int xbitpos = bitpos;
644      rtx value1;
645      rtx xop0 = op0;
646      rtx last = get_last_insn ();
647      rtx pat;
648      enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
649      int save_volatile_ok = volatile_ok;
650
651      volatile_ok = 1;
652
653      /* If this machine's insv can only insert into a register, copy OP0
654	 into a register and save it back later.  */
655      if (MEM_P (op0)
656	  && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
657		(op0, VOIDmode)))
658	{
659	  rtx tempreg;
660	  enum machine_mode bestmode;
661
662	  /* Get the mode to use for inserting into this field.  If OP0 is
663	     BLKmode, get the smallest mode consistent with the alignment. If
664	     OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
665	     mode. Otherwise, use the smallest mode containing the field.  */
666
667	  if (GET_MODE (op0) == BLKmode
668	      || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
669	    bestmode
670	      = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
671			       MEM_VOLATILE_P (op0));
672	  else
673	    bestmode = GET_MODE (op0);
674
675	  if (bestmode == VOIDmode
676	      || GET_MODE_SIZE (bestmode) < GET_MODE_SIZE (fieldmode)
677	      || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
678		  && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
679	    goto insv_loses;
680
681	  /* Adjust address to point to the containing unit of that mode.
682	     Compute offset as multiple of this unit, counting in bytes.  */
683	  unit = GET_MODE_BITSIZE (bestmode);
684	  offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
685	  bitpos = bitnum % unit;
686	  op0 = adjust_address (op0, bestmode,  offset);
687
688	  /* Fetch that unit, store the bitfield in it, then store
689	     the unit.  */
690	  tempreg = copy_to_reg (op0);
691	  store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
692	  emit_move_insn (op0, tempreg);
693	  return value;
694	}
695      volatile_ok = save_volatile_ok;
696
697      /* Add OFFSET into OP0's address.  */
698      if (MEM_P (xop0))
699	xop0 = adjust_address (xop0, byte_mode, offset);
700
701      /* If xop0 is a register, we need it in MAXMODE
702	 to make it acceptable to the format of insv.  */
703      if (GET_CODE (xop0) == SUBREG)
704	/* We can't just change the mode, because this might clobber op0,
705	   and we will need the original value of op0 if insv fails.  */
706	xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
707      if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
708	xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
709
710      /* On big-endian machines, we count bits from the most significant.
711	 If the bit field insn does not, we must invert.  */
712
713      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
714	xbitpos = unit - bitsize - xbitpos;
715
716      /* We have been counting XBITPOS within UNIT.
717	 Count instead within the size of the register.  */
718      if (BITS_BIG_ENDIAN && !MEM_P (xop0))
719	xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
720
721      unit = GET_MODE_BITSIZE (maxmode);
722
723      /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
724      value1 = value;
725      if (GET_MODE (value) != maxmode)
726	{
727	  if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
728	    {
729	      /* Optimization: Don't bother really extending VALUE
730		 if it has all the bits we will actually use.  However,
731		 if we must narrow it, be sure we do it correctly.  */
732
733	      if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
734		{
735		  rtx tmp;
736
737		  tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
738		  if (! tmp)
739		    tmp = simplify_gen_subreg (maxmode,
740					       force_reg (GET_MODE (value),
741							  value1),
742					       GET_MODE (value), 0);
743		  value1 = tmp;
744		}
745	      else
746		value1 = gen_lowpart (maxmode, value1);
747	    }
748	  else if (GET_CODE (value) == CONST_INT)
749	    value1 = gen_int_mode (INTVAL (value), maxmode);
750	  else
751	    /* Parse phase is supposed to make VALUE's data type
752	       match that of the component reference, which is a type
753	       at least as wide as the field; so VALUE should have
754	       a mode that corresponds to that type.  */
755	    gcc_assert (CONSTANT_P (value));
756	}
757
758      /* If this machine's insv insists on a register,
759	 get VALUE1 into a register.  */
760      if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
761	     (value1, maxmode)))
762	value1 = force_reg (maxmode, value1);
763
764      pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
765      if (pat)
766	emit_insn (pat);
767      else
768	{
769	  delete_insns_since (last);
770	  store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
771	}
772    }
773  else
774    insv_loses:
775    /* Insv is not available; store using shifts and boolean ops.  */
776    store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
777  return value;
778}
779
780/* Use shifts and boolean operations to store VALUE
781   into a bit field of width BITSIZE
782   in a memory location specified by OP0 except offset by OFFSET bytes.
783     (OFFSET must be 0 if OP0 is a register.)
784   The field starts at position BITPOS within the byte.
785    (If OP0 is a register, it may be a full word or a narrower mode,
786     but BITPOS still counts within a full word,
787     which is significant on bigendian machines.)  */
788
789static void
790store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
791		       unsigned HOST_WIDE_INT bitsize,
792		       unsigned HOST_WIDE_INT bitpos, rtx value)
793{
794  enum machine_mode mode;
795  unsigned int total_bits = BITS_PER_WORD;
796  rtx temp;
797  int all_zero = 0;
798  int all_one = 0;
799
800  /* There is a case not handled here:
801     a structure with a known alignment of just a halfword
802     and a field split across two aligned halfwords within the structure.
803     Or likewise a structure with a known alignment of just a byte
804     and a field split across two bytes.
805     Such cases are not supposed to be able to occur.  */
806
807  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
808    {
809      gcc_assert (!offset);
810      /* Special treatment for a bit field split across two registers.  */
811      if (bitsize + bitpos > BITS_PER_WORD)
812	{
813	  store_split_bit_field (op0, bitsize, bitpos, value);
814	  return;
815	}
816    }
817  else
818    {
819      /* Get the proper mode to use for this field.  We want a mode that
820	 includes the entire field.  If such a mode would be larger than
821	 a word, we won't be doing the extraction the normal way.
822	 We don't want a mode bigger than the destination.  */
823
824      mode = GET_MODE (op0);
825      if (GET_MODE_BITSIZE (mode) == 0
826	  || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
827	mode = word_mode;
828      mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
829			    MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
830
831      if (mode == VOIDmode)
832	{
833	  /* The only way this should occur is if the field spans word
834	     boundaries.  */
835	  store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
836				 value);
837	  return;
838	}
839
840      total_bits = GET_MODE_BITSIZE (mode);
841
842      /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
843	 be in the range 0 to total_bits-1, and put any excess bytes in
844	 OFFSET.  */
845      if (bitpos >= total_bits)
846	{
847	  offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
848	  bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
849		     * BITS_PER_UNIT);
850	}
851
852      /* Get ref to an aligned byte, halfword, or word containing the field.
853	 Adjust BITPOS to be position within a word,
854	 and OFFSET to be the offset of that word.
855	 Then alter OP0 to refer to that word.  */
856      bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
857      offset -= (offset % (total_bits / BITS_PER_UNIT));
858      op0 = adjust_address (op0, mode, offset);
859    }
860
861  mode = GET_MODE (op0);
862
863  /* Now MODE is either some integral mode for a MEM as OP0,
864     or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
865     The bit field is contained entirely within OP0.
866     BITPOS is the starting bit number within OP0.
867     (OP0's mode may actually be narrower than MODE.)  */
868
869  if (BYTES_BIG_ENDIAN)
870      /* BITPOS is the distance between our msb
871	 and that of the containing datum.
872	 Convert it to the distance from the lsb.  */
873      bitpos = total_bits - bitsize - bitpos;
874
875  /* Now BITPOS is always the distance between our lsb
876     and that of OP0.  */
877
878  /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
879     we must first convert its mode to MODE.  */
880
881  if (GET_CODE (value) == CONST_INT)
882    {
883      HOST_WIDE_INT v = INTVAL (value);
884
885      if (bitsize < HOST_BITS_PER_WIDE_INT)
886	v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
887
888      if (v == 0)
889	all_zero = 1;
890      else if ((bitsize < HOST_BITS_PER_WIDE_INT
891		&& v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
892	       || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
893	all_one = 1;
894
895      value = lshift_value (mode, value, bitpos, bitsize);
896    }
897  else
898    {
899      int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
900		      && bitpos + bitsize != GET_MODE_BITSIZE (mode));
901
902      if (GET_MODE (value) != mode)
903	{
904	  if ((REG_P (value) || GET_CODE (value) == SUBREG)
905	      && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
906	    value = gen_lowpart (mode, value);
907	  else
908	    value = convert_to_mode (mode, value, 1);
909	}
910
911      if (must_and)
912	value = expand_binop (mode, and_optab, value,
913			      mask_rtx (mode, 0, bitsize, 0),
914			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
915      if (bitpos > 0)
916	value = expand_shift (LSHIFT_EXPR, mode, value,
917			      build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
918    }
919
920  /* Now clear the chosen bits in OP0,
921     except that if VALUE is -1 we need not bother.  */
922  /* We keep the intermediates in registers to allow CSE to combine
923     consecutive bitfield assignments.  */
924
925  temp = force_reg (mode, op0);
926
927  if (! all_one)
928    {
929      temp = expand_binop (mode, and_optab, temp,
930			   mask_rtx (mode, bitpos, bitsize, 1),
931			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
932      temp = force_reg (mode, temp);
933    }
934
935  /* Now logical-or VALUE into OP0, unless it is zero.  */
936
937  if (! all_zero)
938    {
939      temp = expand_binop (mode, ior_optab, temp, value,
940			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
941      temp = force_reg (mode, temp);
942    }
943
944  if (op0 != temp)
945    emit_move_insn (op0, temp);
946}
947
948/* Store a bit field that is split across multiple accessible memory objects.
949
950   OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
951   BITSIZE is the field width; BITPOS the position of its first bit
952   (within the word).
953   VALUE is the value to store.
954
955   This does not yet handle fields wider than BITS_PER_WORD.  */
956
957static void
958store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
959		       unsigned HOST_WIDE_INT bitpos, rtx value)
960{
961  unsigned int unit;
962  unsigned int bitsdone = 0;
963
964  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
965     much at a time.  */
966  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
967    unit = BITS_PER_WORD;
968  else
969    unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
970
971  /* If VALUE is a constant other than a CONST_INT, get it into a register in
972     WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
973     that VALUE might be a floating-point constant.  */
974  if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
975    {
976      rtx word = gen_lowpart_common (word_mode, value);
977
978      if (word && (value != word))
979	value = word;
980      else
981	value = gen_lowpart_common (word_mode,
982				    force_reg (GET_MODE (value) != VOIDmode
983					       ? GET_MODE (value)
984					       : word_mode, value));
985    }
986
987  while (bitsdone < bitsize)
988    {
989      unsigned HOST_WIDE_INT thissize;
990      rtx part, word;
991      unsigned HOST_WIDE_INT thispos;
992      unsigned HOST_WIDE_INT offset;
993
994      offset = (bitpos + bitsdone) / unit;
995      thispos = (bitpos + bitsdone) % unit;
996
997      /* THISSIZE must not overrun a word boundary.  Otherwise,
998	 store_fixed_bit_field will call us again, and we will mutually
999	 recurse forever.  */
1000      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1001      thissize = MIN (thissize, unit - thispos);
1002
1003      if (BYTES_BIG_ENDIAN)
1004	{
1005	  int total_bits;
1006
1007	  /* We must do an endian conversion exactly the same way as it is
1008	     done in extract_bit_field, so that the two calls to
1009	     extract_fixed_bit_field will have comparable arguments.  */
1010	  if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1011	    total_bits = BITS_PER_WORD;
1012	  else
1013	    total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1014
1015	  /* Fetch successively less significant portions.  */
1016	  if (GET_CODE (value) == CONST_INT)
1017	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1018			     >> (bitsize - bitsdone - thissize))
1019			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1020	  else
1021	    /* The args are chosen so that the last part includes the
1022	       lsb.  Give extract_bit_field the value it needs (with
1023	       endianness compensation) to fetch the piece we want.  */
1024	    part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1025					    total_bits - bitsize + bitsdone,
1026					    NULL_RTX, 1);
1027	}
1028      else
1029	{
1030	  /* Fetch successively more significant portions.  */
1031	  if (GET_CODE (value) == CONST_INT)
1032	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1033			     >> bitsdone)
1034			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1035	  else
1036	    part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1037					    bitsdone, NULL_RTX, 1);
1038	}
1039
1040      /* If OP0 is a register, then handle OFFSET here.
1041
1042	 When handling multiword bitfields, extract_bit_field may pass
1043	 down a word_mode SUBREG of a larger REG for a bitfield that actually
1044	 crosses a word boundary.  Thus, for a SUBREG, we must find
1045	 the current word starting from the base register.  */
1046      if (GET_CODE (op0) == SUBREG)
1047	{
1048	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1049	  word = operand_subword_force (SUBREG_REG (op0), word_offset,
1050					GET_MODE (SUBREG_REG (op0)));
1051	  offset = 0;
1052	}
1053      else if (REG_P (op0))
1054	{
1055	  word = operand_subword_force (op0, offset, GET_MODE (op0));
1056	  offset = 0;
1057	}
1058      else
1059	word = op0;
1060
1061      /* OFFSET is in UNITs, and UNIT is in bits.
1062         store_fixed_bit_field wants offset in bytes.  */
1063      store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1064			     thispos, part);
1065      bitsdone += thissize;
1066    }
1067}
1068
1069/* Generate code to extract a byte-field from STR_RTX
1070   containing BITSIZE bits, starting at BITNUM,
1071   and put it in TARGET if possible (if TARGET is nonzero).
1072   Regardless of TARGET, we return the rtx for where the value is placed.
1073
1074   STR_RTX is the structure containing the byte (a REG or MEM).
1075   UNSIGNEDP is nonzero if this is an unsigned bit field.
1076   MODE is the natural mode of the field value once extracted.
1077   TMODE is the mode the caller would like the value to have;
1078   but the value may be returned with type MODE instead.
1079
1080   TOTAL_SIZE is the size in bytes of the containing structure,
1081   or -1 if varying.
1082
1083   If a TARGET is specified and we can store in it at no extra cost,
1084   we do so, and return TARGET.
1085   Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1086   if they are equally easy.  */
1087
1088rtx
1089extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1090		   unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1091		   enum machine_mode mode, enum machine_mode tmode)
1092{
1093  unsigned int unit
1094    = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1095  unsigned HOST_WIDE_INT offset, bitpos;
1096  rtx op0 = str_rtx;
1097  rtx spec_target = target;
1098  rtx spec_target_subreg = 0;
1099  enum machine_mode int_mode;
1100  enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1101  enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1102  enum machine_mode mode1;
1103  int byte_offset;
1104
1105  if (tmode == VOIDmode)
1106    tmode = mode;
1107
1108  while (GET_CODE (op0) == SUBREG)
1109    {
1110      bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1111      op0 = SUBREG_REG (op0);
1112    }
1113
1114  /* If we have an out-of-bounds access to a register, just return an
1115     uninitialized register of the required mode.  This can occur if the
1116     source code contains an out-of-bounds access to a small array.  */
1117  if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1118    return gen_reg_rtx (tmode);
1119
1120  if (REG_P (op0)
1121      && mode == GET_MODE (op0)
1122      && bitnum == 0
1123      && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1124    {
1125      /* We're trying to extract a full register from itself.  */
1126      return op0;
1127    }
1128
1129  /* Use vec_extract patterns for extracting parts of vectors whenever
1130     available.  */
1131  if (VECTOR_MODE_P (GET_MODE (op0))
1132      && !MEM_P (op0)
1133      && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1134	  != CODE_FOR_nothing)
1135      && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1136	  == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1137    {
1138      enum machine_mode outermode = GET_MODE (op0);
1139      enum machine_mode innermode = GET_MODE_INNER (outermode);
1140      int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1141      unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1142      rtx rtxpos = GEN_INT (pos);
1143      rtx src = op0;
1144      rtx dest = NULL, pat, seq;
1145      enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1146      enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1147      enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1148
1149      if (innermode == tmode || innermode == mode)
1150	dest = target;
1151
1152      if (!dest)
1153	dest = gen_reg_rtx (innermode);
1154
1155      start_sequence ();
1156
1157      if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1158	dest = copy_to_mode_reg (mode0, dest);
1159
1160      if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1161	src = copy_to_mode_reg (mode1, src);
1162
1163      if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1164	rtxpos = copy_to_mode_reg (mode1, rtxpos);
1165
1166      /* We could handle this, but we should always be called with a pseudo
1167	 for our targets and all insns should take them as outputs.  */
1168      gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1169		  && (*insn_data[icode].operand[1].predicate) (src, mode1)
1170		  && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1171
1172      pat = GEN_FCN (icode) (dest, src, rtxpos);
1173      seq = get_insns ();
1174      end_sequence ();
1175      if (pat)
1176	{
1177	  emit_insn (seq);
1178	  emit_insn (pat);
1179	  return dest;
1180	}
1181    }
1182
1183  /* Make sure we are playing with integral modes.  Pun with subregs
1184     if we aren't.  */
1185  {
1186    enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1187    if (imode != GET_MODE (op0))
1188      {
1189	if (MEM_P (op0))
1190	  op0 = adjust_address (op0, imode, 0);
1191	else
1192	  {
1193	    gcc_assert (imode != BLKmode);
1194	    op0 = gen_lowpart (imode, op0);
1195
1196	    /* If we got a SUBREG, force it into a register since we
1197	       aren't going to be able to do another SUBREG on it.  */
1198	    if (GET_CODE (op0) == SUBREG)
1199	      op0 = force_reg (imode, op0);
1200	  }
1201      }
1202  }
1203
1204  /* We may be accessing data outside the field, which means
1205     we can alias adjacent data.  */
1206  if (MEM_P (op0))
1207    {
1208      op0 = shallow_copy_rtx (op0);
1209      set_mem_alias_set (op0, 0);
1210      set_mem_expr (op0, 0);
1211    }
1212
1213  /* Extraction of a full-word or multi-word value from a structure
1214     in a register or aligned memory can be done with just a SUBREG.
1215     A subword value in the least significant part of a register
1216     can also be extracted with a SUBREG.  For this, we need the
1217     byte offset of the value in op0.  */
1218
1219  bitpos = bitnum % unit;
1220  offset = bitnum / unit;
1221  byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1222
1223  /* If OP0 is a register, BITPOS must count within a word.
1224     But as we have it, it counts within whatever size OP0 now has.
1225     On a bigendian machine, these are not the same, so convert.  */
1226  if (BYTES_BIG_ENDIAN
1227      && !MEM_P (op0)
1228      && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1229    bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1230
1231  /* ??? We currently assume TARGET is at least as big as BITSIZE.
1232     If that's wrong, the solution is to test for it and set TARGET to 0
1233     if needed.  */
1234
1235  /* Only scalar integer modes can be converted via subregs.  There is an
1236     additional problem for FP modes here in that they can have a precision
1237     which is different from the size.  mode_for_size uses precision, but
1238     we want a mode based on the size, so we must avoid calling it for FP
1239     modes.  */
1240  mode1  = (SCALAR_INT_MODE_P (tmode)
1241	    ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1242	    : mode);
1243
1244  if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1245	&& bitpos % BITS_PER_WORD == 0)
1246       || (mode1 != BLKmode
1247	   /* ??? The big endian test here is wrong.  This is correct
1248	      if the value is in a register, and if mode_for_size is not
1249	      the same mode as op0.  This causes us to get unnecessarily
1250	      inefficient code from the Thumb port when -mbig-endian.  */
1251	   && (BYTES_BIG_ENDIAN
1252	       ? bitpos + bitsize == BITS_PER_WORD
1253	       : bitpos == 0)))
1254      && ((!MEM_P (op0)
1255	   && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1256				     GET_MODE_BITSIZE (GET_MODE (op0)))
1257	   && GET_MODE_SIZE (mode1) != 0
1258	   && byte_offset % GET_MODE_SIZE (mode1) == 0)
1259	  || (MEM_P (op0)
1260	      && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1261		  || (offset * BITS_PER_UNIT % bitsize == 0
1262		      && MEM_ALIGN (op0) % bitsize == 0)))))
1263    {
1264      if (mode1 != GET_MODE (op0))
1265	{
1266	  if (MEM_P (op0))
1267	    op0 = adjust_address (op0, mode1, offset);
1268	  else
1269	    {
1270	      rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1271					     byte_offset);
1272	      if (sub == NULL)
1273		goto no_subreg_mode_swap;
1274	      op0 = sub;
1275	    }
1276	}
1277      if (mode1 != mode)
1278	return convert_to_mode (tmode, op0, unsignedp);
1279      return op0;
1280    }
1281 no_subreg_mode_swap:
1282
1283  /* Handle fields bigger than a word.  */
1284
1285  if (bitsize > BITS_PER_WORD)
1286    {
1287      /* Here we transfer the words of the field
1288	 in the order least significant first.
1289	 This is because the most significant word is the one which may
1290	 be less than full.  */
1291
1292      unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1293      unsigned int i;
1294
1295      if (target == 0 || !REG_P (target))
1296	target = gen_reg_rtx (mode);
1297
1298      /* Indicate for flow that the entire target reg is being set.  */
1299      emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1300
1301      for (i = 0; i < nwords; i++)
1302	{
1303	  /* If I is 0, use the low-order word in both field and target;
1304	     if I is 1, use the next to lowest word; and so on.  */
1305	  /* Word number in TARGET to use.  */
1306	  unsigned int wordnum
1307	    = (WORDS_BIG_ENDIAN
1308	       ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1309	       : i);
1310	  /* Offset from start of field in OP0.  */
1311	  unsigned int bit_offset = (WORDS_BIG_ENDIAN
1312				     ? MAX (0, ((int) bitsize - ((int) i + 1)
1313						* (int) BITS_PER_WORD))
1314				     : (int) i * BITS_PER_WORD);
1315	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1316	  rtx result_part
1317	    = extract_bit_field (op0, MIN (BITS_PER_WORD,
1318					   bitsize - i * BITS_PER_WORD),
1319				 bitnum + bit_offset, 1, target_part, mode,
1320				 word_mode);
1321
1322	  gcc_assert (target_part);
1323
1324	  if (result_part != target_part)
1325	    emit_move_insn (target_part, result_part);
1326	}
1327
1328      if (unsignedp)
1329	{
1330	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1331	     need to be zero'd out.  */
1332	  if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1333	    {
1334	      unsigned int i, total_words;
1335
1336	      total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1337	      for (i = nwords; i < total_words; i++)
1338		emit_move_insn
1339		  (operand_subword (target,
1340				    WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1341				    1, VOIDmode),
1342		   const0_rtx);
1343	    }
1344	  return target;
1345	}
1346
1347      /* Signed bit field: sign-extend with two arithmetic shifts.  */
1348      target = expand_shift (LSHIFT_EXPR, mode, target,
1349			     build_int_cst (NULL_TREE,
1350					    GET_MODE_BITSIZE (mode) - bitsize),
1351			     NULL_RTX, 0);
1352      return expand_shift (RSHIFT_EXPR, mode, target,
1353			   build_int_cst (NULL_TREE,
1354					  GET_MODE_BITSIZE (mode) - bitsize),
1355			   NULL_RTX, 0);
1356    }
1357
1358  /* From here on we know the desired field is smaller than a word.  */
1359
1360  /* Check if there is a correspondingly-sized integer field, so we can
1361     safely extract it as one size of integer, if necessary; then
1362     truncate or extend to the size that is wanted; then use SUBREGs or
1363     convert_to_mode to get one of the modes we really wanted.  */
1364
1365  int_mode = int_mode_for_mode (tmode);
1366  if (int_mode == BLKmode)
1367    int_mode = int_mode_for_mode (mode);
1368  /* Should probably push op0 out to memory and then do a load.  */
1369  gcc_assert (int_mode != BLKmode);
1370
1371  /* OFFSET is the number of words or bytes (UNIT says which)
1372     from STR_RTX to the first word or byte containing part of the field.  */
1373  if (!MEM_P (op0))
1374    {
1375      if (offset != 0
1376	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1377	{
1378	  if (!REG_P (op0))
1379	    op0 = copy_to_reg (op0);
1380	  op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1381		                op0, (offset * UNITS_PER_WORD));
1382	}
1383      offset = 0;
1384    }
1385
1386  /* Now OFFSET is nonzero only for memory operands.  */
1387
1388  if (unsignedp)
1389    {
1390      if (HAVE_extzv
1391	  && bitsize > 0
1392	  && GET_MODE_BITSIZE (extzv_mode) >= bitsize
1393	  && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1394		&& (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1395	{
1396	  unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1397	  rtx bitsize_rtx, bitpos_rtx;
1398	  rtx last = get_last_insn ();
1399	  rtx xop0 = op0;
1400	  rtx xtarget = target;
1401	  rtx xspec_target = spec_target;
1402	  rtx xspec_target_subreg = spec_target_subreg;
1403	  rtx pat;
1404	  enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1405
1406	  if (MEM_P (xop0))
1407	    {
1408	      int save_volatile_ok = volatile_ok;
1409	      volatile_ok = 1;
1410
1411	      /* Is the memory operand acceptable?  */
1412	      if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1413		     (xop0, GET_MODE (xop0))))
1414		{
1415		  /* No, load into a reg and extract from there.  */
1416		  enum machine_mode bestmode;
1417
1418		  /* Get the mode to use for inserting into this field.  If
1419		     OP0 is BLKmode, get the smallest mode consistent with the
1420		     alignment. If OP0 is a non-BLKmode object that is no
1421		     wider than MAXMODE, use its mode. Otherwise, use the
1422		     smallest mode containing the field.  */
1423
1424		  if (GET_MODE (xop0) == BLKmode
1425		      || (GET_MODE_SIZE (GET_MODE (op0))
1426			  > GET_MODE_SIZE (maxmode)))
1427		    bestmode = get_best_mode (bitsize, bitnum,
1428					      MEM_ALIGN (xop0), maxmode,
1429					      MEM_VOLATILE_P (xop0));
1430		  else
1431		    bestmode = GET_MODE (xop0);
1432
1433		  if (bestmode == VOIDmode
1434		      || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1435			  && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1436		    goto extzv_loses;
1437
1438		  /* Compute offset as multiple of this unit,
1439		     counting in bytes.  */
1440		  unit = GET_MODE_BITSIZE (bestmode);
1441		  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1442		  xbitpos = bitnum % unit;
1443		  xop0 = adjust_address (xop0, bestmode, xoffset);
1444
1445		  /* Make sure register is big enough for the whole field. */
1446		  if (xoffset * BITS_PER_UNIT + unit
1447		      < offset * BITS_PER_UNIT + bitsize)
1448		    goto extzv_loses;
1449
1450		  /* Fetch it to a register in that size.  */
1451		  xop0 = force_reg (bestmode, xop0);
1452
1453		  /* XBITPOS counts within UNIT, which is what is expected.  */
1454		}
1455	      else
1456		/* Get ref to first byte containing part of the field.  */
1457		xop0 = adjust_address (xop0, byte_mode, xoffset);
1458
1459	      volatile_ok = save_volatile_ok;
1460	    }
1461
1462	  /* If op0 is a register, we need it in MAXMODE (which is usually
1463	     SImode). to make it acceptable to the format of extzv.  */
1464	  if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1465	    goto extzv_loses;
1466	  if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1467	    xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1468
1469	  /* On big-endian machines, we count bits from the most significant.
1470	     If the bit field insn does not, we must invert.  */
1471	  if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1472	    xbitpos = unit - bitsize - xbitpos;
1473
1474	  /* Now convert from counting within UNIT to counting in MAXMODE.  */
1475	  if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1476	    xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1477
1478	  unit = GET_MODE_BITSIZE (maxmode);
1479
1480	  if (xtarget == 0)
1481	    xtarget = xspec_target = gen_reg_rtx (tmode);
1482
1483	  if (GET_MODE (xtarget) != maxmode)
1484	    {
1485	      if (REG_P (xtarget))
1486		{
1487		  int wider = (GET_MODE_SIZE (maxmode)
1488			       > GET_MODE_SIZE (GET_MODE (xtarget)));
1489		  xtarget = gen_lowpart (maxmode, xtarget);
1490		  if (wider)
1491		    xspec_target_subreg = xtarget;
1492		}
1493	      else
1494		xtarget = gen_reg_rtx (maxmode);
1495	    }
1496
1497	  /* If this machine's extzv insists on a register target,
1498	     make sure we have one.  */
1499	  if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1500		 (xtarget, maxmode)))
1501	    xtarget = gen_reg_rtx (maxmode);
1502
1503	  bitsize_rtx = GEN_INT (bitsize);
1504	  bitpos_rtx = GEN_INT (xbitpos);
1505
1506	  pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1507	  if (pat)
1508	    {
1509	      emit_insn (pat);
1510	      target = xtarget;
1511	      spec_target = xspec_target;
1512	      spec_target_subreg = xspec_target_subreg;
1513	    }
1514	  else
1515	    {
1516	      delete_insns_since (last);
1517	      target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1518						bitpos, target, 1);
1519	    }
1520	}
1521      else
1522      extzv_loses:
1523	target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1524					  bitpos, target, 1);
1525    }
1526  else
1527    {
1528      if (HAVE_extv
1529	  && bitsize > 0
1530	  && GET_MODE_BITSIZE (extv_mode) >= bitsize
1531	  && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1532		&& (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1533	{
1534	  int xbitpos = bitpos, xoffset = offset;
1535	  rtx bitsize_rtx, bitpos_rtx;
1536	  rtx last = get_last_insn ();
1537	  rtx xop0 = op0, xtarget = target;
1538	  rtx xspec_target = spec_target;
1539	  rtx xspec_target_subreg = spec_target_subreg;
1540	  rtx pat;
1541	  enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1542
1543	  if (MEM_P (xop0))
1544	    {
1545	      /* Is the memory operand acceptable?  */
1546	      if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1547		     (xop0, GET_MODE (xop0))))
1548		{
1549		  /* No, load into a reg and extract from there.  */
1550		  enum machine_mode bestmode;
1551
1552		  /* Get the mode to use for inserting into this field.  If
1553		     OP0 is BLKmode, get the smallest mode consistent with the
1554		     alignment. If OP0 is a non-BLKmode object that is no
1555		     wider than MAXMODE, use its mode. Otherwise, use the
1556		     smallest mode containing the field.  */
1557
1558		  if (GET_MODE (xop0) == BLKmode
1559		      || (GET_MODE_SIZE (GET_MODE (op0))
1560			  > GET_MODE_SIZE (maxmode)))
1561		    bestmode = get_best_mode (bitsize, bitnum,
1562					      MEM_ALIGN (xop0), maxmode,
1563					      MEM_VOLATILE_P (xop0));
1564		  else
1565		    bestmode = GET_MODE (xop0);
1566
1567		  if (bestmode == VOIDmode
1568		      || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1569			  && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1570		    goto extv_loses;
1571
1572		  /* Compute offset as multiple of this unit,
1573		     counting in bytes.  */
1574		  unit = GET_MODE_BITSIZE (bestmode);
1575		  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1576		  xbitpos = bitnum % unit;
1577		  xop0 = adjust_address (xop0, bestmode, xoffset);
1578
1579		  /* Make sure register is big enough for the whole field. */
1580		  if (xoffset * BITS_PER_UNIT + unit
1581		      < offset * BITS_PER_UNIT + bitsize)
1582		    goto extv_loses;
1583
1584		  /* Fetch it to a register in that size.  */
1585		  xop0 = force_reg (bestmode, xop0);
1586
1587		  /* XBITPOS counts within UNIT, which is what is expected.  */
1588		}
1589	      else
1590		/* Get ref to first byte containing part of the field.  */
1591		xop0 = adjust_address (xop0, byte_mode, xoffset);
1592	    }
1593
1594	  /* If op0 is a register, we need it in MAXMODE (which is usually
1595	     SImode) to make it acceptable to the format of extv.  */
1596	  if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1597	    goto extv_loses;
1598	  if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1599	    xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1600
1601	  /* On big-endian machines, we count bits from the most significant.
1602	     If the bit field insn does not, we must invert.  */
1603	  if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1604	    xbitpos = unit - bitsize - xbitpos;
1605
1606	  /* XBITPOS counts within a size of UNIT.
1607	     Adjust to count within a size of MAXMODE.  */
1608	  if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1609	    xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1610
1611	  unit = GET_MODE_BITSIZE (maxmode);
1612
1613	  if (xtarget == 0)
1614	    xtarget = xspec_target = gen_reg_rtx (tmode);
1615
1616	  if (GET_MODE (xtarget) != maxmode)
1617	    {
1618	      if (REG_P (xtarget))
1619		{
1620		  int wider = (GET_MODE_SIZE (maxmode)
1621			       > GET_MODE_SIZE (GET_MODE (xtarget)));
1622		  xtarget = gen_lowpart (maxmode, xtarget);
1623		  if (wider)
1624		    xspec_target_subreg = xtarget;
1625		}
1626	      else
1627		xtarget = gen_reg_rtx (maxmode);
1628	    }
1629
1630	  /* If this machine's extv insists on a register target,
1631	     make sure we have one.  */
1632	  if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1633		 (xtarget, maxmode)))
1634	    xtarget = gen_reg_rtx (maxmode);
1635
1636	  bitsize_rtx = GEN_INT (bitsize);
1637	  bitpos_rtx = GEN_INT (xbitpos);
1638
1639	  pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1640	  if (pat)
1641	    {
1642	      emit_insn (pat);
1643	      target = xtarget;
1644	      spec_target = xspec_target;
1645	      spec_target_subreg = xspec_target_subreg;
1646	    }
1647	  else
1648	    {
1649	      delete_insns_since (last);
1650	      target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1651						bitpos, target, 0);
1652	    }
1653	}
1654      else
1655      extv_loses:
1656	target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1657					  bitpos, target, 0);
1658    }
1659  if (target == spec_target)
1660    return target;
1661  if (target == spec_target_subreg)
1662    return spec_target;
1663  if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1664    {
1665      /* If the target mode is not a scalar integral, first convert to the
1666	 integer mode of that size and then access it as a floating-point
1667	 value via a SUBREG.  */
1668      if (!SCALAR_INT_MODE_P (tmode))
1669	{
1670	  enum machine_mode smode
1671	    = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1672	  target = convert_to_mode (smode, target, unsignedp);
1673	  target = force_reg (smode, target);
1674	  return gen_lowpart (tmode, target);
1675	}
1676
1677      return convert_to_mode (tmode, target, unsignedp);
1678    }
1679  return target;
1680}
1681
1682/* Extract a bit field using shifts and boolean operations
1683   Returns an rtx to represent the value.
1684   OP0 addresses a register (word) or memory (byte).
1685   BITPOS says which bit within the word or byte the bit field starts in.
1686   OFFSET says how many bytes farther the bit field starts;
1687    it is 0 if OP0 is a register.
1688   BITSIZE says how many bits long the bit field is.
1689    (If OP0 is a register, it may be narrower than a full word,
1690     but BITPOS still counts within a full word,
1691     which is significant on bigendian machines.)
1692
1693   UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1694   If TARGET is nonzero, attempts to store the value there
1695   and return TARGET, but this is not guaranteed.
1696   If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1697
1698static rtx
1699extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1700			 unsigned HOST_WIDE_INT offset,
1701			 unsigned HOST_WIDE_INT bitsize,
1702			 unsigned HOST_WIDE_INT bitpos, rtx target,
1703			 int unsignedp)
1704{
1705  unsigned int total_bits = BITS_PER_WORD;
1706  enum machine_mode mode;
1707
1708  if (GET_CODE (op0) == SUBREG || REG_P (op0))
1709    {
1710      /* Special treatment for a bit field split across two registers.  */
1711      if (bitsize + bitpos > BITS_PER_WORD)
1712	return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1713    }
1714  else
1715    {
1716      /* Get the proper mode to use for this field.  We want a mode that
1717	 includes the entire field.  If such a mode would be larger than
1718	 a word, we won't be doing the extraction the normal way.  */
1719
1720      mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1721			    MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1722
1723      if (mode == VOIDmode)
1724	/* The only way this should occur is if the field spans word
1725	   boundaries.  */
1726	return extract_split_bit_field (op0, bitsize,
1727					bitpos + offset * BITS_PER_UNIT,
1728					unsignedp);
1729
1730      total_bits = GET_MODE_BITSIZE (mode);
1731
1732      /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1733	 be in the range 0 to total_bits-1, and put any excess bytes in
1734	 OFFSET.  */
1735      if (bitpos >= total_bits)
1736	{
1737	  offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1738	  bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1739		     * BITS_PER_UNIT);
1740	}
1741
1742      /* Get ref to an aligned byte, halfword, or word containing the field.
1743	 Adjust BITPOS to be position within a word,
1744	 and OFFSET to be the offset of that word.
1745	 Then alter OP0 to refer to that word.  */
1746      bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1747      offset -= (offset % (total_bits / BITS_PER_UNIT));
1748      op0 = adjust_address (op0, mode, offset);
1749    }
1750
1751  mode = GET_MODE (op0);
1752
1753  if (BYTES_BIG_ENDIAN)
1754    /* BITPOS is the distance between our msb and that of OP0.
1755       Convert it to the distance from the lsb.  */
1756    bitpos = total_bits - bitsize - bitpos;
1757
1758  /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1759     We have reduced the big-endian case to the little-endian case.  */
1760
1761  if (unsignedp)
1762    {
1763      if (bitpos)
1764	{
1765	  /* If the field does not already start at the lsb,
1766	     shift it so it does.  */
1767	  tree amount = build_int_cst (NULL_TREE, bitpos);
1768	  /* Maybe propagate the target for the shift.  */
1769	  /* But not if we will return it--could confuse integrate.c.  */
1770	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1771	  if (tmode != mode) subtarget = 0;
1772	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1773	}
1774      /* Convert the value to the desired mode.  */
1775      if (mode != tmode)
1776	op0 = convert_to_mode (tmode, op0, 1);
1777
1778      /* Unless the msb of the field used to be the msb when we shifted,
1779	 mask out the upper bits.  */
1780
1781      if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1782	return expand_binop (GET_MODE (op0), and_optab, op0,
1783			     mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1784			     target, 1, OPTAB_LIB_WIDEN);
1785      return op0;
1786    }
1787
1788  /* To extract a signed bit-field, first shift its msb to the msb of the word,
1789     then arithmetic-shift its lsb to the lsb of the word.  */
1790  op0 = force_reg (mode, op0);
1791  if (mode != tmode)
1792    target = 0;
1793
1794  /* Find the narrowest integer mode that contains the field.  */
1795
1796  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1797       mode = GET_MODE_WIDER_MODE (mode))
1798    if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1799      {
1800	op0 = convert_to_mode (mode, op0, 0);
1801	break;
1802      }
1803
1804  if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1805    {
1806      tree amount
1807	= build_int_cst (NULL_TREE,
1808			 GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1809      /* Maybe propagate the target for the shift.  */
1810      rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1811      op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1812    }
1813
1814  return expand_shift (RSHIFT_EXPR, mode, op0,
1815		       build_int_cst (NULL_TREE,
1816				      GET_MODE_BITSIZE (mode) - bitsize),
1817		       target, 0);
1818}
1819
1820/* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1821   of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1822   complement of that if COMPLEMENT.  The mask is truncated if
1823   necessary to the width of mode MODE.  The mask is zero-extended if
1824   BITSIZE+BITPOS is too small for MODE.  */
1825
1826static rtx
1827mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1828{
1829  HOST_WIDE_INT masklow, maskhigh;
1830
1831  if (bitsize == 0)
1832    masklow = 0;
1833  else if (bitpos < HOST_BITS_PER_WIDE_INT)
1834    masklow = (HOST_WIDE_INT) -1 << bitpos;
1835  else
1836    masklow = 0;
1837
1838  if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1839    masklow &= ((unsigned HOST_WIDE_INT) -1
1840		>> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1841
1842  if (bitpos <= HOST_BITS_PER_WIDE_INT)
1843    maskhigh = -1;
1844  else
1845    maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1846
1847  if (bitsize == 0)
1848    maskhigh = 0;
1849  else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1850    maskhigh &= ((unsigned HOST_WIDE_INT) -1
1851		 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1852  else
1853    maskhigh = 0;
1854
1855  if (complement)
1856    {
1857      maskhigh = ~maskhigh;
1858      masklow = ~masklow;
1859    }
1860
1861  return immed_double_const (masklow, maskhigh, mode);
1862}
1863
1864/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1865   VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1866
1867static rtx
1868lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1869{
1870  unsigned HOST_WIDE_INT v = INTVAL (value);
1871  HOST_WIDE_INT low, high;
1872
1873  if (bitsize < HOST_BITS_PER_WIDE_INT)
1874    v &= ~((HOST_WIDE_INT) -1 << bitsize);
1875
1876  if (bitpos < HOST_BITS_PER_WIDE_INT)
1877    {
1878      low = v << bitpos;
1879      high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1880    }
1881  else
1882    {
1883      low = 0;
1884      high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1885    }
1886
1887  return immed_double_const (low, high, mode);
1888}
1889
1890/* Extract a bit field from a memory by forcing the alignment of the
1891   memory.  This efficient only if the field spans at least 4 boundaries.
1892
1893   OP0 is the MEM.
1894   BITSIZE is the field width; BITPOS is the position of the first bit.
1895   UNSIGNEDP is true if the result should be zero-extended.  */
1896
1897static rtx
1898extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1899				   unsigned HOST_WIDE_INT bitpos,
1900				   int unsignedp)
1901{
1902  enum machine_mode mode, dmode;
1903  unsigned int m_bitsize, m_size;
1904  unsigned int sign_shift_up, sign_shift_dn;
1905  rtx base, a1, a2, v1, v2, comb, shift, result, start;
1906
1907  /* Choose a mode that will fit BITSIZE.  */
1908  mode = smallest_mode_for_size (bitsize, MODE_INT);
1909  m_size = GET_MODE_SIZE (mode);
1910  m_bitsize = GET_MODE_BITSIZE (mode);
1911
1912  /* Choose a mode twice as wide.  Fail if no such mode exists.  */
1913  dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1914  if (dmode == BLKmode)
1915    return NULL;
1916
1917  do_pending_stack_adjust ();
1918  start = get_last_insn ();
1919
1920  /* At the end, we'll need an additional shift to deal with sign/zero
1921     extension.  By default this will be a left+right shift of the
1922     appropriate size.  But we may be able to eliminate one of them.  */
1923  sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1924
1925  if (STRICT_ALIGNMENT)
1926    {
1927      base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1928      bitpos %= BITS_PER_UNIT;
1929
1930      /* We load two values to be concatenate.  There's an edge condition
1931	 that bears notice -- an aligned value at the end of a page can
1932	 only load one value lest we segfault.  So the two values we load
1933	 are at "base & -size" and "(base + size - 1) & -size".  If base
1934	 is unaligned, the addresses will be aligned and sequential; if
1935	 base is aligned, the addresses will both be equal to base.  */
1936
1937      a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1938				GEN_INT (-(HOST_WIDE_INT)m_size),
1939				NULL, true, OPTAB_LIB_WIDEN);
1940      mark_reg_pointer (a1, m_bitsize);
1941      v1 = gen_rtx_MEM (mode, a1);
1942      set_mem_align (v1, m_bitsize);
1943      v1 = force_reg (mode, validize_mem (v1));
1944
1945      a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1946      a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1947				GEN_INT (-(HOST_WIDE_INT)m_size),
1948				NULL, true, OPTAB_LIB_WIDEN);
1949      v2 = gen_rtx_MEM (mode, a2);
1950      set_mem_align (v2, m_bitsize);
1951      v2 = force_reg (mode, validize_mem (v2));
1952
1953      /* Combine these two values into a double-word value.  */
1954      if (m_bitsize == BITS_PER_WORD)
1955	{
1956	  comb = gen_reg_rtx (dmode);
1957	  emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1958	  emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1959	  emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1960	}
1961      else
1962	{
1963	  if (BYTES_BIG_ENDIAN)
1964	    comb = v1, v1 = v2, v2 = comb;
1965	  v1 = convert_modes (dmode, mode, v1, true);
1966	  if (v1 == NULL)
1967	    goto fail;
1968	  v2 = convert_modes (dmode, mode, v2, true);
1969	  v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1970				    NULL, true, OPTAB_LIB_WIDEN);
1971	  if (v2 == NULL)
1972	    goto fail;
1973	  comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1974				      true, OPTAB_LIB_WIDEN);
1975	  if (comb == NULL)
1976	    goto fail;
1977	}
1978
1979      shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1980				   NULL, true, OPTAB_LIB_WIDEN);
1981      shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1982
1983      if (bitpos != 0)
1984	{
1985	  if (sign_shift_up <= bitpos)
1986	    bitpos -= sign_shift_up, sign_shift_up = 0;
1987	  shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1988				       NULL, true, OPTAB_LIB_WIDEN);
1989	}
1990    }
1991  else
1992    {
1993      unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1994      bitpos %= BITS_PER_UNIT;
1995
1996      /* When strict alignment is not required, we can just load directly
1997	 from memory without masking.  If the remaining BITPOS offset is
1998	 small enough, we may be able to do all operations in MODE as
1999	 opposed to DMODE.  */
2000      if (bitpos + bitsize <= m_bitsize)
2001	dmode = mode;
2002      comb = adjust_address (op0, dmode, offset);
2003
2004      if (sign_shift_up <= bitpos)
2005	bitpos -= sign_shift_up, sign_shift_up = 0;
2006      shift = GEN_INT (bitpos);
2007    }
2008
2009  /* Shift down the double-word such that the requested value is at bit 0.  */
2010  if (shift != const0_rtx)
2011    comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
2012				comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
2013  if (comb == NULL)
2014    goto fail;
2015
2016  /* If the field exactly matches MODE, then all we need to do is return the
2017     lowpart.  Otherwise, shift to get the sign bits set properly.  */
2018  result = force_reg (mode, gen_lowpart (mode, comb));
2019
2020  if (sign_shift_up)
2021    result = expand_simple_binop (mode, ASHIFT, result,
2022				  GEN_INT (sign_shift_up),
2023				  NULL_RTX, 0, OPTAB_LIB_WIDEN);
2024  if (sign_shift_dn)
2025    result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
2026				  result, GEN_INT (sign_shift_dn),
2027				  NULL_RTX, 0, OPTAB_LIB_WIDEN);
2028
2029  return result;
2030
2031 fail:
2032  delete_insns_since (start);
2033  return NULL;
2034}
2035
2036/* Extract a bit field that is split across two words
2037   and return an RTX for the result.
2038
2039   OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2040   BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2041   UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
2042
2043static rtx
2044extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2045			 unsigned HOST_WIDE_INT bitpos, int unsignedp)
2046{
2047  unsigned int unit;
2048  unsigned int bitsdone = 0;
2049  rtx result = NULL_RTX;
2050  int first = 1;
2051
2052  /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2053     much at a time.  */
2054  if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2055    unit = BITS_PER_WORD;
2056  else
2057    {
2058      unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2059      if (0 && bitsize / unit > 2)
2060	{
2061	  rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2062						       unsignedp);
2063	  if (tmp)
2064	    return tmp;
2065	}
2066    }
2067
2068  while (bitsdone < bitsize)
2069    {
2070      unsigned HOST_WIDE_INT thissize;
2071      rtx part, word;
2072      unsigned HOST_WIDE_INT thispos;
2073      unsigned HOST_WIDE_INT offset;
2074
2075      offset = (bitpos + bitsdone) / unit;
2076      thispos = (bitpos + bitsdone) % unit;
2077
2078      /* THISSIZE must not overrun a word boundary.  Otherwise,
2079	 extract_fixed_bit_field will call us again, and we will mutually
2080	 recurse forever.  */
2081      thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2082      thissize = MIN (thissize, unit - thispos);
2083
2084      /* If OP0 is a register, then handle OFFSET here.
2085
2086	 When handling multiword bitfields, extract_bit_field may pass
2087	 down a word_mode SUBREG of a larger REG for a bitfield that actually
2088	 crosses a word boundary.  Thus, for a SUBREG, we must find
2089	 the current word starting from the base register.  */
2090      if (GET_CODE (op0) == SUBREG)
2091	{
2092	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2093	  word = operand_subword_force (SUBREG_REG (op0), word_offset,
2094					GET_MODE (SUBREG_REG (op0)));
2095	  offset = 0;
2096	}
2097      else if (REG_P (op0))
2098	{
2099	  word = operand_subword_force (op0, offset, GET_MODE (op0));
2100	  offset = 0;
2101	}
2102      else
2103	word = op0;
2104
2105      /* Extract the parts in bit-counting order,
2106	 whose meaning is determined by BYTES_PER_UNIT.
2107	 OFFSET is in UNITs, and UNIT is in bits.
2108	 extract_fixed_bit_field wants offset in bytes.  */
2109      part = extract_fixed_bit_field (word_mode, word,
2110				      offset * unit / BITS_PER_UNIT,
2111				      thissize, thispos, 0, 1);
2112      bitsdone += thissize;
2113
2114      /* Shift this part into place for the result.  */
2115      if (BYTES_BIG_ENDIAN)
2116	{
2117	  if (bitsize != bitsdone)
2118	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2119				 build_int_cst (NULL_TREE, bitsize - bitsdone),
2120				 0, 1);
2121	}
2122      else
2123	{
2124	  if (bitsdone != thissize)
2125	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2126				 build_int_cst (NULL_TREE,
2127						bitsdone - thissize), 0, 1);
2128	}
2129
2130      if (first)
2131	result = part;
2132      else
2133	/* Combine the parts with bitwise or.  This works
2134	   because we extracted each part as an unsigned bit field.  */
2135	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2136			       OPTAB_LIB_WIDEN);
2137
2138      first = 0;
2139    }
2140
2141  /* Unsigned bit field: we are done.  */
2142  if (unsignedp)
2143    return result;
2144  /* Signed bit field: sign-extend with two arithmetic shifts.  */
2145  result = expand_shift (LSHIFT_EXPR, word_mode, result,
2146			 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2147			 NULL_RTX, 0);
2148  return expand_shift (RSHIFT_EXPR, word_mode, result,
2149		       build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2150		       NULL_RTX, 0);
2151}
2152
2153/* Add INC into TARGET.  */
2154
2155void
2156expand_inc (rtx target, rtx inc)
2157{
2158  rtx value = expand_binop (GET_MODE (target), add_optab,
2159			    target, inc,
2160			    target, 0, OPTAB_LIB_WIDEN);
2161  if (value != target)
2162    emit_move_insn (target, value);
2163}
2164
2165/* Subtract DEC from TARGET.  */
2166
2167void
2168expand_dec (rtx target, rtx dec)
2169{
2170  rtx value = expand_binop (GET_MODE (target), sub_optab,
2171			    target, dec,
2172			    target, 0, OPTAB_LIB_WIDEN);
2173  if (value != target)
2174    emit_move_insn (target, value);
2175}
2176
2177/* Output a shift instruction for expression code CODE,
2178   with SHIFTED being the rtx for the value to shift,
2179   and AMOUNT the tree for the amount to shift by.
2180   Store the result in the rtx TARGET, if that is convenient.
2181   If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2182   Return the rtx for where the value is.  */
2183
2184rtx
2185expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2186	      tree amount, rtx target, int unsignedp)
2187{
2188  rtx op1, temp = 0;
2189  int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2190  int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2191  int try;
2192
2193  /* Previously detected shift-counts computed by NEGATE_EXPR
2194     and shifted in the other direction; but that does not work
2195     on all machines.  */
2196
2197  op1 = expand_normal (amount);
2198
2199  if (SHIFT_COUNT_TRUNCATED)
2200    {
2201      if (GET_CODE (op1) == CONST_INT
2202	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2203	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2204	op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2205		       % GET_MODE_BITSIZE (mode));
2206      else if (GET_CODE (op1) == SUBREG
2207	       && subreg_lowpart_p (op1))
2208	op1 = SUBREG_REG (op1);
2209    }
2210
2211  if (op1 == const0_rtx)
2212    return shifted;
2213
2214  /* Check whether its cheaper to implement a left shift by a constant
2215     bit count by a sequence of additions.  */
2216  if (code == LSHIFT_EXPR
2217      && GET_CODE (op1) == CONST_INT
2218      && INTVAL (op1) > 0
2219      && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2220      && INTVAL (op1) < MAX_BITS_PER_WORD
2221      && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode]
2222      && shift_cost[mode][INTVAL (op1)] != MAX_COST)
2223    {
2224      int i;
2225      for (i = 0; i < INTVAL (op1); i++)
2226	{
2227	  temp = force_reg (mode, shifted);
2228	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2229				  unsignedp, OPTAB_LIB_WIDEN);
2230	}
2231      return shifted;
2232    }
2233
2234  for (try = 0; temp == 0 && try < 3; try++)
2235    {
2236      enum optab_methods methods;
2237
2238      if (try == 0)
2239	methods = OPTAB_DIRECT;
2240      else if (try == 1)
2241	methods = OPTAB_WIDEN;
2242      else
2243	methods = OPTAB_LIB_WIDEN;
2244
2245      if (rotate)
2246	{
2247	  /* Widening does not work for rotation.  */
2248	  if (methods == OPTAB_WIDEN)
2249	    continue;
2250	  else if (methods == OPTAB_LIB_WIDEN)
2251	    {
2252	      /* If we have been unable to open-code this by a rotation,
2253		 do it as the IOR of two shifts.  I.e., to rotate A
2254		 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2255		 where C is the bitsize of A.
2256
2257		 It is theoretically possible that the target machine might
2258		 not be able to perform either shift and hence we would
2259		 be making two libcalls rather than just the one for the
2260		 shift (similarly if IOR could not be done).  We will allow
2261		 this extremely unlikely lossage to avoid complicating the
2262		 code below.  */
2263
2264	      rtx subtarget = target == shifted ? 0 : target;
2265	      tree new_amount, other_amount;
2266	      rtx temp1;
2267	      tree type = TREE_TYPE (amount);
2268	      if (GET_MODE (op1) != TYPE_MODE (type)
2269		  && GET_MODE (op1) != VOIDmode)
2270		op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2271	      new_amount = make_tree (type, op1);
2272	      other_amount
2273		= fold_build2 (MINUS_EXPR, type,
2274			       build_int_cst (type, GET_MODE_BITSIZE (mode)),
2275			       new_amount);
2276
2277	      shifted = force_reg (mode, shifted);
2278
2279	      temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2280				   mode, shifted, new_amount, 0, 1);
2281	      temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2282				    mode, shifted, other_amount, subtarget, 1);
2283	      return expand_binop (mode, ior_optab, temp, temp1, target,
2284				   unsignedp, methods);
2285	    }
2286
2287	  temp = expand_binop (mode,
2288			       left ? rotl_optab : rotr_optab,
2289			       shifted, op1, target, unsignedp, methods);
2290	}
2291      else if (unsignedp)
2292	temp = expand_binop (mode,
2293			     left ? ashl_optab : lshr_optab,
2294			     shifted, op1, target, unsignedp, methods);
2295
2296      /* Do arithmetic shifts.
2297	 Also, if we are going to widen the operand, we can just as well
2298	 use an arithmetic right-shift instead of a logical one.  */
2299      if (temp == 0 && ! rotate
2300	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2301	{
2302	  enum optab_methods methods1 = methods;
2303
2304	  /* If trying to widen a log shift to an arithmetic shift,
2305	     don't accept an arithmetic shift of the same size.  */
2306	  if (unsignedp)
2307	    methods1 = OPTAB_MUST_WIDEN;
2308
2309	  /* Arithmetic shift */
2310
2311	  temp = expand_binop (mode,
2312			       left ? ashl_optab : ashr_optab,
2313			       shifted, op1, target, unsignedp, methods1);
2314	}
2315
2316      /* We used to try extzv here for logical right shifts, but that was
2317	 only useful for one machine, the VAX, and caused poor code
2318	 generation there for lshrdi3, so the code was deleted and a
2319	 define_expand for lshrsi3 was added to vax.md.  */
2320    }
2321
2322  gcc_assert (temp);
2323  return temp;
2324}
2325
2326enum alg_code {
2327  alg_unknown,
2328  alg_zero,
2329  alg_m, alg_shift,
2330  alg_add_t_m2,
2331  alg_sub_t_m2,
2332  alg_add_factor,
2333  alg_sub_factor,
2334  alg_add_t2_m,
2335  alg_sub_t2_m,
2336  alg_impossible
2337};
2338
2339/* This structure holds the "cost" of a multiply sequence.  The
2340   "cost" field holds the total rtx_cost of every operator in the
2341   synthetic multiplication sequence, hence cost(a op b) is defined
2342   as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2343   The "latency" field holds the minimum possible latency of the
2344   synthetic multiply, on a hypothetical infinitely parallel CPU.
2345   This is the critical path, or the maximum height, of the expression
2346   tree which is the sum of rtx_costs on the most expensive path from
2347   any leaf to the root.  Hence latency(a op b) is defined as zero for
2348   leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2349
2350struct mult_cost {
2351  short cost;     /* Total rtx_cost of the multiplication sequence.  */
2352  short latency;  /* The latency of the multiplication sequence.  */
2353};
2354
2355/* This macro is used to compare a pointer to a mult_cost against an
2356   single integer "rtx_cost" value.  This is equivalent to the macro
2357   CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2358#define MULT_COST_LESS(X,Y) ((X)->cost < (Y)	\
2359			     || ((X)->cost == (Y) && (X)->latency < (Y)))
2360
2361/* This macro is used to compare two pointers to mult_costs against
2362   each other.  The macro returns true if X is cheaper than Y.
2363   Currently, the cheaper of two mult_costs is the one with the
2364   lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2365#define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost		\
2366				 || ((X)->cost == (Y)->cost	\
2367				     && (X)->latency < (Y)->latency))
2368
2369/* This structure records a sequence of operations.
2370   `ops' is the number of operations recorded.
2371   `cost' is their total cost.
2372   The operations are stored in `op' and the corresponding
2373   logarithms of the integer coefficients in `log'.
2374
2375   These are the operations:
2376   alg_zero		total := 0;
2377   alg_m		total := multiplicand;
2378   alg_shift		total := total * coeff
2379   alg_add_t_m2		total := total + multiplicand * coeff;
2380   alg_sub_t_m2		total := total - multiplicand * coeff;
2381   alg_add_factor	total := total * coeff + total;
2382   alg_sub_factor	total := total * coeff - total;
2383   alg_add_t2_m		total := total * coeff + multiplicand;
2384   alg_sub_t2_m		total := total * coeff - multiplicand;
2385
2386   The first operand must be either alg_zero or alg_m.  */
2387
2388struct algorithm
2389{
2390  struct mult_cost cost;
2391  short ops;
2392  /* The size of the OP and LOG fields are not directly related to the
2393     word size, but the worst-case algorithms will be if we have few
2394     consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2395     In that case we will generate shift-by-2, add, shift-by-2, add,...,
2396     in total wordsize operations.  */
2397  enum alg_code op[MAX_BITS_PER_WORD];
2398  char log[MAX_BITS_PER_WORD];
2399};
2400
2401/* The entry for our multiplication cache/hash table.  */
2402struct alg_hash_entry {
2403  /* The number we are multiplying by.  */
2404  unsigned HOST_WIDE_INT t;
2405
2406  /* The mode in which we are multiplying something by T.  */
2407  enum machine_mode mode;
2408
2409  /* The best multiplication algorithm for t.  */
2410  enum alg_code alg;
2411
2412  /* The cost of multiplication if ALG_CODE is not alg_impossible.
2413     Otherwise, the cost within which multiplication by T is
2414     impossible.  */
2415  struct mult_cost cost;
2416};
2417
2418/* The number of cache/hash entries.  */
2419#if HOST_BITS_PER_WIDE_INT == 64
2420#define NUM_ALG_HASH_ENTRIES 1031
2421#else
2422#define NUM_ALG_HASH_ENTRIES 307
2423#endif
2424
2425/* Each entry of ALG_HASH caches alg_code for some integer.  This is
2426   actually a hash table.  If we have a collision, that the older
2427   entry is kicked out.  */
2428static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2429
2430/* Indicates the type of fixup needed after a constant multiplication.
2431   BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2432   the result should be negated, and ADD_VARIANT means that the
2433   multiplicand should be added to the result.  */
2434enum mult_variant {basic_variant, negate_variant, add_variant};
2435
2436static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2437			const struct mult_cost *, enum machine_mode mode);
2438static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2439				 struct algorithm *, enum mult_variant *, int);
2440static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2441			      const struct algorithm *, enum mult_variant);
2442static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2443						 int, rtx *, int *, int *);
2444static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2445static rtx extract_high_half (enum machine_mode, rtx);
2446static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2447static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2448				       int, int);
2449/* Compute and return the best algorithm for multiplying by T.
2450   The algorithm must cost less than cost_limit
2451   If retval.cost >= COST_LIMIT, no algorithm was found and all
2452   other field of the returned struct are undefined.
2453   MODE is the machine mode of the multiplication.  */
2454
2455static void
2456synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2457	    const struct mult_cost *cost_limit, enum machine_mode mode)
2458{
2459  int m;
2460  struct algorithm *alg_in, *best_alg;
2461  struct mult_cost best_cost;
2462  struct mult_cost new_limit;
2463  int op_cost, op_latency;
2464  unsigned HOST_WIDE_INT q;
2465  int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2466  int hash_index;
2467  bool cache_hit = false;
2468  enum alg_code cache_alg = alg_zero;
2469
2470  /* Indicate that no algorithm is yet found.  If no algorithm
2471     is found, this value will be returned and indicate failure.  */
2472  alg_out->cost.cost = cost_limit->cost + 1;
2473  alg_out->cost.latency = cost_limit->latency + 1;
2474
2475  if (cost_limit->cost < 0
2476      || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2477    return;
2478
2479  /* Restrict the bits of "t" to the multiplication's mode.  */
2480  t &= GET_MODE_MASK (mode);
2481
2482  /* t == 1 can be done in zero cost.  */
2483  if (t == 1)
2484    {
2485      alg_out->ops = 1;
2486      alg_out->cost.cost = 0;
2487      alg_out->cost.latency = 0;
2488      alg_out->op[0] = alg_m;
2489      return;
2490    }
2491
2492  /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2493     fail now.  */
2494  if (t == 0)
2495    {
2496      if (MULT_COST_LESS (cost_limit, zero_cost))
2497	return;
2498      else
2499	{
2500	  alg_out->ops = 1;
2501	  alg_out->cost.cost = zero_cost;
2502	  alg_out->cost.latency = zero_cost;
2503	  alg_out->op[0] = alg_zero;
2504	  return;
2505	}
2506    }
2507
2508  /* We'll be needing a couple extra algorithm structures now.  */
2509
2510  alg_in = alloca (sizeof (struct algorithm));
2511  best_alg = alloca (sizeof (struct algorithm));
2512  best_cost = *cost_limit;
2513
2514  /* Compute the hash index.  */
2515  hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2516
2517  /* See if we already know what to do for T.  */
2518  if (alg_hash[hash_index].t == t
2519      && alg_hash[hash_index].mode == mode
2520      && alg_hash[hash_index].alg != alg_unknown)
2521    {
2522      cache_alg = alg_hash[hash_index].alg;
2523
2524      if (cache_alg == alg_impossible)
2525	{
2526	  /* The cache tells us that it's impossible to synthesize
2527	     multiplication by T within alg_hash[hash_index].cost.  */
2528	  if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2529	    /* COST_LIMIT is at least as restrictive as the one
2530	       recorded in the hash table, in which case we have no
2531	       hope of synthesizing a multiplication.  Just
2532	       return.  */
2533	    return;
2534
2535	  /* If we get here, COST_LIMIT is less restrictive than the
2536	     one recorded in the hash table, so we may be able to
2537	     synthesize a multiplication.  Proceed as if we didn't
2538	     have the cache entry.  */
2539	}
2540      else
2541	{
2542	  if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2543	    /* The cached algorithm shows that this multiplication
2544	       requires more cost than COST_LIMIT.  Just return.  This
2545	       way, we don't clobber this cache entry with
2546	       alg_impossible but retain useful information.  */
2547	    return;
2548
2549	  cache_hit = true;
2550
2551	  switch (cache_alg)
2552	    {
2553	    case alg_shift:
2554	      goto do_alg_shift;
2555
2556	    case alg_add_t_m2:
2557	    case alg_sub_t_m2:
2558	      goto do_alg_addsub_t_m2;
2559
2560	    case alg_add_factor:
2561	    case alg_sub_factor:
2562	      goto do_alg_addsub_factor;
2563
2564	    case alg_add_t2_m:
2565	      goto do_alg_add_t2_m;
2566
2567	    case alg_sub_t2_m:
2568	      goto do_alg_sub_t2_m;
2569
2570	    default:
2571	      gcc_unreachable ();
2572	    }
2573	}
2574    }
2575
2576  /* If we have a group of zero bits at the low-order part of T, try
2577     multiplying by the remaining bits and then doing a shift.  */
2578
2579  if ((t & 1) == 0)
2580    {
2581    do_alg_shift:
2582      m = floor_log2 (t & -t);	/* m = number of low zero bits */
2583      if (m < maxm)
2584	{
2585	  q = t >> m;
2586	  /* The function expand_shift will choose between a shift and
2587	     a sequence of additions, so the observed cost is given as
2588	     MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2589	  op_cost = m * add_cost[mode];
2590	  if (shift_cost[mode][m] < op_cost)
2591	    op_cost = shift_cost[mode][m];
2592	  new_limit.cost = best_cost.cost - op_cost;
2593	  new_limit.latency = best_cost.latency - op_cost;
2594	  synth_mult (alg_in, q, &new_limit, mode);
2595
2596	  alg_in->cost.cost += op_cost;
2597	  alg_in->cost.latency += op_cost;
2598	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2599	    {
2600	      struct algorithm *x;
2601	      best_cost = alg_in->cost;
2602	      x = alg_in, alg_in = best_alg, best_alg = x;
2603	      best_alg->log[best_alg->ops] = m;
2604	      best_alg->op[best_alg->ops] = alg_shift;
2605	    }
2606	}
2607      if (cache_hit)
2608	goto done;
2609    }
2610
2611  /* If we have an odd number, add or subtract one.  */
2612  if ((t & 1) != 0)
2613    {
2614      unsigned HOST_WIDE_INT w;
2615
2616    do_alg_addsub_t_m2:
2617      for (w = 1; (w & t) != 0; w <<= 1)
2618	;
2619      /* APPLE LOCAL begin 7744816 DImode multiply by 0xffffffffULL */
2620      if (w > 2
2621	      /* Reject the case where t is 3.
2622		 Thus we prefer addition in that case.  */
2623	      && t != 3)
2624      /* APPLE LOCAL end 7744816 DImode multiply by 0xffffffffULL */
2625	{
2626	  /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2627
2628	  op_cost = add_cost[mode];
2629	  new_limit.cost = best_cost.cost - op_cost;
2630	  new_limit.latency = best_cost.latency - op_cost;
2631	  synth_mult (alg_in, t + 1, &new_limit, mode);
2632
2633	  alg_in->cost.cost += op_cost;
2634	  alg_in->cost.latency += op_cost;
2635	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2636	    {
2637	      struct algorithm *x;
2638	      best_cost = alg_in->cost;
2639	      x = alg_in, alg_in = best_alg, best_alg = x;
2640	      best_alg->log[best_alg->ops] = 0;
2641	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2642	    }
2643	}
2644      else
2645	{
2646	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2647
2648	  op_cost = add_cost[mode];
2649	  new_limit.cost = best_cost.cost - op_cost;
2650	  new_limit.latency = best_cost.latency - op_cost;
2651	  synth_mult (alg_in, t - 1, &new_limit, mode);
2652
2653	  alg_in->cost.cost += op_cost;
2654	  alg_in->cost.latency += op_cost;
2655	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2656	    {
2657	      struct algorithm *x;
2658	      best_cost = alg_in->cost;
2659	      x = alg_in, alg_in = best_alg, best_alg = x;
2660	      best_alg->log[best_alg->ops] = 0;
2661	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2662	    }
2663	}
2664      if (cache_hit)
2665	goto done;
2666    }
2667
2668  /* Look for factors of t of the form
2669     t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2670     If we find such a factor, we can multiply by t using an algorithm that
2671     multiplies by q, shift the result by m and add/subtract it to itself.
2672
2673     We search for large factors first and loop down, even if large factors
2674     are less probable than small; if we find a large factor we will find a
2675     good sequence quickly, and therefore be able to prune (by decreasing
2676     COST_LIMIT) the search.  */
2677
2678 do_alg_addsub_factor:
2679  for (m = floor_log2 (t - 1); m >= 2; m--)
2680    {
2681      unsigned HOST_WIDE_INT d;
2682
2683      d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2684      if (t % d == 0 && t > d && m < maxm
2685	  && (!cache_hit || cache_alg == alg_add_factor))
2686	{
2687	  /* If the target has a cheap shift-and-add instruction use
2688	     that in preference to a shift insn followed by an add insn.
2689	     Assume that the shift-and-add is "atomic" with a latency
2690	     equal to its cost, otherwise assume that on superscalar
2691	     hardware the shift may be executed concurrently with the
2692	     earlier steps in the algorithm.  */
2693	  op_cost = add_cost[mode] + shift_cost[mode][m];
2694	  if (shiftadd_cost[mode][m] < op_cost)
2695	    {
2696	      op_cost = shiftadd_cost[mode][m];
2697	      op_latency = op_cost;
2698	    }
2699	  else
2700	    op_latency = add_cost[mode];
2701
2702	  new_limit.cost = best_cost.cost - op_cost;
2703	  new_limit.latency = best_cost.latency - op_latency;
2704	  synth_mult (alg_in, t / d, &new_limit, mode);
2705
2706	  alg_in->cost.cost += op_cost;
2707	  alg_in->cost.latency += op_latency;
2708	  if (alg_in->cost.latency < op_cost)
2709	    alg_in->cost.latency = op_cost;
2710	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2711	    {
2712	      struct algorithm *x;
2713	      best_cost = alg_in->cost;
2714	      x = alg_in, alg_in = best_alg, best_alg = x;
2715	      best_alg->log[best_alg->ops] = m;
2716	      best_alg->op[best_alg->ops] = alg_add_factor;
2717	    }
2718	  /* Other factors will have been taken care of in the recursion.  */
2719	  break;
2720	}
2721
2722      d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2723      if (t % d == 0 && t > d && m < maxm
2724	  && (!cache_hit || cache_alg == alg_sub_factor))
2725	{
2726	  /* If the target has a cheap shift-and-subtract insn use
2727	     that in preference to a shift insn followed by a sub insn.
2728	     Assume that the shift-and-sub is "atomic" with a latency
2729	     equal to it's cost, otherwise assume that on superscalar
2730	     hardware the shift may be executed concurrently with the
2731	     earlier steps in the algorithm.  */
2732	  op_cost = add_cost[mode] + shift_cost[mode][m];
2733	  if (shiftsub_cost[mode][m] < op_cost)
2734	    {
2735	      op_cost = shiftsub_cost[mode][m];
2736	      op_latency = op_cost;
2737	    }
2738	  else
2739	    op_latency = add_cost[mode];
2740
2741	  new_limit.cost = best_cost.cost - op_cost;
2742	  new_limit.latency = best_cost.latency - op_latency;
2743	  synth_mult (alg_in, t / d, &new_limit, mode);
2744
2745	  alg_in->cost.cost += op_cost;
2746	  alg_in->cost.latency += op_latency;
2747	  if (alg_in->cost.latency < op_cost)
2748	    alg_in->cost.latency = op_cost;
2749	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2750	    {
2751	      struct algorithm *x;
2752	      best_cost = alg_in->cost;
2753	      x = alg_in, alg_in = best_alg, best_alg = x;
2754	      best_alg->log[best_alg->ops] = m;
2755	      best_alg->op[best_alg->ops] = alg_sub_factor;
2756	    }
2757	  break;
2758	}
2759    }
2760  if (cache_hit)
2761    goto done;
2762
2763  /* Try shift-and-add (load effective address) instructions,
2764     i.e. do a*3, a*5, a*9.  */
2765  if ((t & 1) != 0)
2766    {
2767    do_alg_add_t2_m:
2768      q = t - 1;
2769      q = q & -q;
2770      m = exact_log2 (q);
2771      if (m >= 0 && m < maxm)
2772	{
2773	  op_cost = shiftadd_cost[mode][m];
2774	  new_limit.cost = best_cost.cost - op_cost;
2775	  new_limit.latency = best_cost.latency - op_cost;
2776	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2777
2778	  alg_in->cost.cost += op_cost;
2779	  alg_in->cost.latency += op_cost;
2780	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2781	    {
2782	      struct algorithm *x;
2783	      best_cost = alg_in->cost;
2784	      x = alg_in, alg_in = best_alg, best_alg = x;
2785	      best_alg->log[best_alg->ops] = m;
2786	      best_alg->op[best_alg->ops] = alg_add_t2_m;
2787	    }
2788	}
2789      if (cache_hit)
2790	goto done;
2791
2792    do_alg_sub_t2_m:
2793      q = t + 1;
2794      q = q & -q;
2795      m = exact_log2 (q);
2796      if (m >= 0 && m < maxm)
2797	{
2798	  op_cost = shiftsub_cost[mode][m];
2799	  new_limit.cost = best_cost.cost - op_cost;
2800	  new_limit.latency = best_cost.latency - op_cost;
2801	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2802
2803	  alg_in->cost.cost += op_cost;
2804	  alg_in->cost.latency += op_cost;
2805	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2806	    {
2807	      struct algorithm *x;
2808	      best_cost = alg_in->cost;
2809	      x = alg_in, alg_in = best_alg, best_alg = x;
2810	      best_alg->log[best_alg->ops] = m;
2811	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
2812	    }
2813	}
2814      if (cache_hit)
2815	goto done;
2816    }
2817
2818 done:
2819  /* If best_cost has not decreased, we have not found any algorithm.  */
2820  if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2821    {
2822      /* We failed to find an algorithm.  Record alg_impossible for
2823	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2824	 we are asked to find an algorithm for T within the same or
2825	 lower COST_LIMIT, we can immediately return to the
2826	 caller.  */
2827      alg_hash[hash_index].t = t;
2828      alg_hash[hash_index].mode = mode;
2829      alg_hash[hash_index].alg = alg_impossible;
2830      alg_hash[hash_index].cost = *cost_limit;
2831      return;
2832    }
2833
2834  /* Cache the result.  */
2835  if (!cache_hit)
2836    {
2837      alg_hash[hash_index].t = t;
2838      alg_hash[hash_index].mode = mode;
2839      alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2840      alg_hash[hash_index].cost.cost = best_cost.cost;
2841      alg_hash[hash_index].cost.latency = best_cost.latency;
2842    }
2843
2844  /* If we are getting a too long sequence for `struct algorithm'
2845     to record, make this search fail.  */
2846  if (best_alg->ops == MAX_BITS_PER_WORD)
2847    return;
2848
2849  /* Copy the algorithm from temporary space to the space at alg_out.
2850     We avoid using structure assignment because the majority of
2851     best_alg is normally undefined, and this is a critical function.  */
2852  alg_out->ops = best_alg->ops + 1;
2853  alg_out->cost = best_cost;
2854  memcpy (alg_out->op, best_alg->op,
2855	  alg_out->ops * sizeof *alg_out->op);
2856  memcpy (alg_out->log, best_alg->log,
2857	  alg_out->ops * sizeof *alg_out->log);
2858}
2859
2860/* Find the cheapest way of multiplying a value of mode MODE by VAL.
2861   Try three variations:
2862
2863       - a shift/add sequence based on VAL itself
2864       - a shift/add sequence based on -VAL, followed by a negation
2865       - a shift/add sequence based on VAL - 1, followed by an addition.
2866
2867   Return true if the cheapest of these cost less than MULT_COST,
2868   describing the algorithm in *ALG and final fixup in *VARIANT.  */
2869
2870static bool
2871choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2872		     struct algorithm *alg, enum mult_variant *variant,
2873		     int mult_cost)
2874{
2875  struct algorithm alg2;
2876  struct mult_cost limit;
2877  int op_cost;
2878
2879  /* Fail quickly for impossible bounds.  */
2880  if (mult_cost < 0)
2881    return false;
2882
2883  /* Ensure that mult_cost provides a reasonable upper bound.
2884     Any constant multiplication can be performed with less
2885     than 2 * bits additions.  */
2886  op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2887  if (mult_cost > op_cost)
2888    mult_cost = op_cost;
2889
2890  *variant = basic_variant;
2891  limit.cost = mult_cost;
2892  limit.latency = mult_cost;
2893  synth_mult (alg, val, &limit, mode);
2894
2895  /* This works only if the inverted value actually fits in an
2896     `unsigned int' */
2897  if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2898    {
2899      op_cost = neg_cost[mode];
2900      if (MULT_COST_LESS (&alg->cost, mult_cost))
2901	{
2902	  limit.cost = alg->cost.cost - op_cost;
2903	  limit.latency = alg->cost.latency - op_cost;
2904	}
2905      else
2906	{
2907	  limit.cost = mult_cost - op_cost;
2908	  limit.latency = mult_cost - op_cost;
2909	}
2910
2911      synth_mult (&alg2, -val, &limit, mode);
2912      alg2.cost.cost += op_cost;
2913      alg2.cost.latency += op_cost;
2914      if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2915	*alg = alg2, *variant = negate_variant;
2916    }
2917
2918  /* This proves very useful for division-by-constant.  */
2919  op_cost = add_cost[mode];
2920  if (MULT_COST_LESS (&alg->cost, mult_cost))
2921    {
2922      limit.cost = alg->cost.cost - op_cost;
2923      limit.latency = alg->cost.latency - op_cost;
2924    }
2925  else
2926    {
2927      limit.cost = mult_cost - op_cost;
2928      limit.latency = mult_cost - op_cost;
2929    }
2930
2931  synth_mult (&alg2, val - 1, &limit, mode);
2932  alg2.cost.cost += op_cost;
2933  alg2.cost.latency += op_cost;
2934  if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2935    *alg = alg2, *variant = add_variant;
2936
2937  return MULT_COST_LESS (&alg->cost, mult_cost);
2938}
2939
2940/* A subroutine of expand_mult, used for constant multiplications.
2941   Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2942   convenient.  Use the shift/add sequence described by ALG and apply
2943   the final fixup specified by VARIANT.  */
2944
2945static rtx
2946expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2947		   rtx target, const struct algorithm *alg,
2948		   enum mult_variant variant)
2949{
2950  HOST_WIDE_INT val_so_far;
2951  rtx insn, accum, tem;
2952  int opno;
2953  enum machine_mode nmode;
2954
2955  /* Avoid referencing memory over and over.
2956     For speed, but also for correctness when mem is volatile.  */
2957  if (MEM_P (op0))
2958    op0 = force_reg (mode, op0);
2959
2960  /* ACCUM starts out either as OP0 or as a zero, depending on
2961     the first operation.  */
2962
2963  if (alg->op[0] == alg_zero)
2964    {
2965      accum = copy_to_mode_reg (mode, const0_rtx);
2966      val_so_far = 0;
2967    }
2968  else if (alg->op[0] == alg_m)
2969    {
2970      accum = copy_to_mode_reg (mode, op0);
2971      val_so_far = 1;
2972    }
2973  else
2974    gcc_unreachable ();
2975
2976  for (opno = 1; opno < alg->ops; opno++)
2977    {
2978      int log = alg->log[opno];
2979      rtx shift_subtarget = optimize ? 0 : accum;
2980      rtx add_target
2981	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
2982	   && !optimize)
2983	  ? target : 0;
2984      rtx accum_target = optimize ? 0 : accum;
2985
2986      switch (alg->op[opno])
2987	{
2988	case alg_shift:
2989	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
2990				build_int_cst (NULL_TREE, log),
2991				NULL_RTX, 0);
2992	  val_so_far <<= log;
2993	  break;
2994
2995	case alg_add_t_m2:
2996	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
2997			      build_int_cst (NULL_TREE, log),
2998			      NULL_RTX, 0);
2999	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3000				 add_target ? add_target : accum_target);
3001	  val_so_far += (HOST_WIDE_INT) 1 << log;
3002	  break;
3003
3004	case alg_sub_t_m2:
3005	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
3006			      build_int_cst (NULL_TREE, log),
3007			      NULL_RTX, 0);
3008	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3009				 add_target ? add_target : accum_target);
3010	  val_so_far -= (HOST_WIDE_INT) 1 << log;
3011	  break;
3012
3013	case alg_add_t2_m:
3014	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3015				build_int_cst (NULL_TREE, log),
3016				shift_subtarget,
3017				0);
3018	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3019				 add_target ? add_target : accum_target);
3020	  val_so_far = (val_so_far << log) + 1;
3021	  break;
3022
3023	case alg_sub_t2_m:
3024	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3025				build_int_cst (NULL_TREE, log),
3026				shift_subtarget, 0);
3027	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3028				 add_target ? add_target : accum_target);
3029	  val_so_far = (val_so_far << log) - 1;
3030	  break;
3031
3032	case alg_add_factor:
3033	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
3034			      build_int_cst (NULL_TREE, log),
3035			      NULL_RTX, 0);
3036	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3037				 add_target ? add_target : accum_target);
3038	  val_so_far += val_so_far << log;
3039	  break;
3040
3041	case alg_sub_factor:
3042	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
3043			      build_int_cst (NULL_TREE, log),
3044			      NULL_RTX, 0);
3045	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3046				 (add_target
3047				  ? add_target : (optimize ? 0 : tem)));
3048	  val_so_far = (val_so_far << log) - val_so_far;
3049	  break;
3050
3051	default:
3052	  gcc_unreachable ();
3053	}
3054
3055      /* Write a REG_EQUAL note on the last insn so that we can cse
3056	 multiplication sequences.  Note that if ACCUM is a SUBREG,
3057	 we've set the inner register and must properly indicate
3058	 that.  */
3059
3060      tem = op0, nmode = mode;
3061      if (GET_CODE (accum) == SUBREG)
3062	{
3063	  nmode = GET_MODE (SUBREG_REG (accum));
3064	  tem = gen_lowpart (nmode, op0);
3065	}
3066
3067      insn = get_last_insn ();
3068      set_unique_reg_note (insn, REG_EQUAL,
3069			   gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
3070    }
3071
3072  if (variant == negate_variant)
3073    {
3074      val_so_far = -val_so_far;
3075      accum = expand_unop (mode, neg_optab, accum, target, 0);
3076    }
3077  else if (variant == add_variant)
3078    {
3079      val_so_far = val_so_far + 1;
3080      accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3081    }
3082
3083  /* Compare only the bits of val and val_so_far that are significant
3084     in the result mode, to avoid sign-/zero-extension confusion.  */
3085  val &= GET_MODE_MASK (mode);
3086  val_so_far &= GET_MODE_MASK (mode);
3087  gcc_assert (val == val_so_far);
3088
3089  return accum;
3090}
3091
3092/* Perform a multiplication and return an rtx for the result.
3093   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3094   TARGET is a suggestion for where to store the result (an rtx).
3095
3096   We check specially for a constant integer as OP1.
3097   If you want this check for OP0 as well, then before calling
3098   you should swap the two operands if OP0 would be constant.  */
3099
3100rtx
3101expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3102	     int unsignedp)
3103{
3104  enum mult_variant variant;
3105  struct algorithm algorithm;
3106  int max_cost;
3107
3108  /* Handling const0_rtx here allows us to use zero as a rogue value for
3109     coeff below.  */
3110  if (op1 == const0_rtx)
3111    return const0_rtx;
3112  if (op1 == const1_rtx)
3113    return op0;
3114  if (op1 == constm1_rtx)
3115    return expand_unop (mode,
3116			GET_MODE_CLASS (mode) == MODE_INT
3117			&& !unsignedp && flag_trapv
3118			? negv_optab : neg_optab,
3119			op0, target, 0);
3120
3121  /* These are the operations that are potentially turned into a sequence
3122     of shifts and additions.  */
3123  if (SCALAR_INT_MODE_P (mode)
3124      && (unsignedp || !flag_trapv))
3125    {
3126      HOST_WIDE_INT coeff = 0;
3127      rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3128
3129      /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3130	 less than or equal in size to `unsigned int' this doesn't matter.
3131	 If the mode is larger than `unsigned int', then synth_mult works
3132	 only if the constant value exactly fits in an `unsigned int' without
3133	 any truncation.  This means that multiplying by negative values does
3134	 not work; results are off by 2^32 on a 32 bit machine.  */
3135
3136      if (GET_CODE (op1) == CONST_INT)
3137	{
3138	  /* Attempt to handle multiplication of DImode values by negative
3139	     coefficients, by performing the multiplication by a positive
3140	     multiplier and then inverting the result.  */
3141	  if (INTVAL (op1) < 0
3142	      && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3143	    {
3144	      /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3145		 result is interpreted as an unsigned coefficient.
3146		 Exclude cost of op0 from max_cost to match the cost
3147		 calculation of the synth_mult.  */
3148	      max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3149			 - neg_cost[mode];
3150	      if (max_cost > 0
3151		  && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3152					  &variant, max_cost))
3153		{
3154		  rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3155						NULL_RTX, &algorithm,
3156						variant);
3157		  return expand_unop (mode, neg_optab, temp, target, 0);
3158		}
3159	    }
3160	  else coeff = INTVAL (op1);
3161	}
3162      else if (GET_CODE (op1) == CONST_DOUBLE)
3163	{
3164	  /* If we are multiplying in DImode, it may still be a win
3165	     to try to work with shifts and adds.  */
3166	  if (CONST_DOUBLE_HIGH (op1) == 0)
3167	    coeff = CONST_DOUBLE_LOW (op1);
3168	  else if (CONST_DOUBLE_LOW (op1) == 0
3169		   && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3170	    {
3171	      int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3172			  + HOST_BITS_PER_WIDE_INT;
3173	      return expand_shift (LSHIFT_EXPR, mode, op0,
3174				   build_int_cst (NULL_TREE, shift),
3175				   target, unsignedp);
3176	    }
3177	}
3178
3179      /* We used to test optimize here, on the grounds that it's better to
3180	 produce a smaller program when -O is not used.  But this causes
3181	 such a terrible slowdown sometimes that it seems better to always
3182	 use synth_mult.  */
3183      if (coeff != 0)
3184	{
3185	  /* Special case powers of two.  */
3186	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3187	    return expand_shift (LSHIFT_EXPR, mode, op0,
3188				 build_int_cst (NULL_TREE, floor_log2 (coeff)),
3189				 target, unsignedp);
3190
3191	  /* Exclude cost of op0 from max_cost to match the cost
3192	     calculation of the synth_mult.  */
3193	  max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3194	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3195				   max_cost))
3196	    return expand_mult_const (mode, op0, coeff, target,
3197				      &algorithm, variant);
3198	}
3199    }
3200
3201  if (GET_CODE (op0) == CONST_DOUBLE)
3202    {
3203      rtx temp = op0;
3204      op0 = op1;
3205      op1 = temp;
3206    }
3207
3208  /* Expand x*2.0 as x+x.  */
3209  if (GET_CODE (op1) == CONST_DOUBLE
3210      && SCALAR_FLOAT_MODE_P (mode))
3211    {
3212      REAL_VALUE_TYPE d;
3213      REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3214
3215      if (REAL_VALUES_EQUAL (d, dconst2))
3216	{
3217	  op0 = force_reg (GET_MODE (op0), op0);
3218	  return expand_binop (mode, add_optab, op0, op0,
3219			       target, unsignedp, OPTAB_LIB_WIDEN);
3220	}
3221    }
3222
3223  /* This used to use umul_optab if unsigned, but for non-widening multiply
3224     there is no difference between signed and unsigned.  */
3225  op0 = expand_binop (mode,
3226		      ! unsignedp
3227		      && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3228		      ? smulv_optab : smul_optab,
3229		      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3230  gcc_assert (op0);
3231  return op0;
3232}
3233
3234/* Return the smallest n such that 2**n >= X.  */
3235
3236int
3237ceil_log2 (unsigned HOST_WIDE_INT x)
3238{
3239  return floor_log2 (x - 1) + 1;
3240}
3241
3242/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3243   replace division by D, and put the least significant N bits of the result
3244   in *MULTIPLIER_PTR and return the most significant bit.
3245
3246   The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3247   needed precision is in PRECISION (should be <= N).
3248
3249   PRECISION should be as small as possible so this function can choose
3250   multiplier more freely.
3251
3252   The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3253   is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3254
3255   Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3256   where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3257
3258static
3259unsigned HOST_WIDE_INT
3260choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3261		   rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3262{
3263  HOST_WIDE_INT mhigh_hi, mlow_hi;
3264  unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3265  int lgup, post_shift;
3266  int pow, pow2;
3267  unsigned HOST_WIDE_INT nl, dummy1;
3268  HOST_WIDE_INT nh, dummy2;
3269
3270  /* lgup = ceil(log2(divisor)); */
3271  lgup = ceil_log2 (d);
3272
3273  gcc_assert (lgup <= n);
3274
3275  pow = n + lgup;
3276  pow2 = n + lgup - precision;
3277
3278  /* We could handle this with some effort, but this case is much
3279     better handled directly with a scc insn, so rely on caller using
3280     that.  */
3281  gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3282
3283  /* mlow = 2^(N + lgup)/d */
3284 if (pow >= HOST_BITS_PER_WIDE_INT)
3285    {
3286      nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3287      nl = 0;
3288    }
3289  else
3290    {
3291      nh = 0;
3292      nl = (unsigned HOST_WIDE_INT) 1 << pow;
3293    }
3294  div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3295			&mlow_lo, &mlow_hi, &dummy1, &dummy2);
3296
3297  /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3298  if (pow2 >= HOST_BITS_PER_WIDE_INT)
3299    nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3300  else
3301    nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3302  div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3303			&mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3304
3305  gcc_assert (!mhigh_hi || nh - d < d);
3306  gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3307  /* Assert that mlow < mhigh.  */
3308  gcc_assert (mlow_hi < mhigh_hi
3309	      || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3310
3311  /* If precision == N, then mlow, mhigh exceed 2^N
3312     (but they do not exceed 2^(N+1)).  */
3313
3314  /* Reduce to lowest terms.  */
3315  for (post_shift = lgup; post_shift > 0; post_shift--)
3316    {
3317      unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3318      unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3319      if (ml_lo >= mh_lo)
3320	break;
3321
3322      mlow_hi = 0;
3323      mlow_lo = ml_lo;
3324      mhigh_hi = 0;
3325      mhigh_lo = mh_lo;
3326    }
3327
3328  *post_shift_ptr = post_shift;
3329  *lgup_ptr = lgup;
3330  if (n < HOST_BITS_PER_WIDE_INT)
3331    {
3332      unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3333      *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3334      return mhigh_lo >= mask;
3335    }
3336  else
3337    {
3338      *multiplier_ptr = GEN_INT (mhigh_lo);
3339      return mhigh_hi;
3340    }
3341}
3342
3343/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3344   congruent to 1 (mod 2**N).  */
3345
3346static unsigned HOST_WIDE_INT
3347invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3348{
3349  /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3350
3351  /* The algorithm notes that the choice y = x satisfies
3352     x*y == 1 mod 2^3, since x is assumed odd.
3353     Each iteration doubles the number of bits of significance in y.  */
3354
3355  unsigned HOST_WIDE_INT mask;
3356  unsigned HOST_WIDE_INT y = x;
3357  int nbit = 3;
3358
3359  mask = (n == HOST_BITS_PER_WIDE_INT
3360	  ? ~(unsigned HOST_WIDE_INT) 0
3361	  : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3362
3363  while (nbit < n)
3364    {
3365      y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3366      nbit *= 2;
3367    }
3368  return y;
3369}
3370
3371/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3372   flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3373   product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3374   to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3375   become signed.
3376
3377   The result is put in TARGET if that is convenient.
3378
3379   MODE is the mode of operation.  */
3380
3381rtx
3382expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3383			     rtx op1, rtx target, int unsignedp)
3384{
3385  rtx tem;
3386  enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3387
3388  tem = expand_shift (RSHIFT_EXPR, mode, op0,
3389		      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3390		      NULL_RTX, 0);
3391  tem = expand_and (mode, tem, op1, NULL_RTX);
3392  adj_operand
3393    = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3394		     adj_operand);
3395
3396  tem = expand_shift (RSHIFT_EXPR, mode, op1,
3397		      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3398		      NULL_RTX, 0);
3399  tem = expand_and (mode, tem, op0, NULL_RTX);
3400  target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3401			  target);
3402
3403  return target;
3404}
3405
3406/* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3407
3408static rtx
3409extract_high_half (enum machine_mode mode, rtx op)
3410{
3411  enum machine_mode wider_mode;
3412
3413  if (mode == word_mode)
3414    return gen_highpart (mode, op);
3415
3416  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3417
3418  wider_mode = GET_MODE_WIDER_MODE (mode);
3419  op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3420		     build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3421  return convert_modes (mode, wider_mode, op, 0);
3422}
3423
3424/* Like expand_mult_highpart, but only consider using a multiplication
3425   optab.  OP1 is an rtx for the constant operand.  */
3426
3427static rtx
3428expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3429			    rtx target, int unsignedp, int max_cost)
3430{
3431  rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3432  enum machine_mode wider_mode;
3433  optab moptab;
3434  rtx tem;
3435  int size;
3436
3437  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3438
3439  wider_mode = GET_MODE_WIDER_MODE (mode);
3440  size = GET_MODE_BITSIZE (mode);
3441
3442  /* Firstly, try using a multiplication insn that only generates the needed
3443     high part of the product, and in the sign flavor of unsignedp.  */
3444  if (mul_highpart_cost[mode] < max_cost)
3445    {
3446      moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3447      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3448			  unsignedp, OPTAB_DIRECT);
3449      if (tem)
3450	return tem;
3451    }
3452
3453  /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3454     Need to adjust the result after the multiplication.  */
3455  if (size - 1 < BITS_PER_WORD
3456      && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3457	  + 4 * add_cost[mode] < max_cost))
3458    {
3459      moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3460      tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3461			  unsignedp, OPTAB_DIRECT);
3462      if (tem)
3463	/* We used the wrong signedness.  Adjust the result.  */
3464	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3465					    tem, unsignedp);
3466    }
3467
3468  /* Try widening multiplication.  */
3469  moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3470  if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3471      && mul_widen_cost[wider_mode] < max_cost)
3472    {
3473      tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3474			  unsignedp, OPTAB_WIDEN);
3475      if (tem)
3476	return extract_high_half (mode, tem);
3477    }
3478
3479  /* Try widening the mode and perform a non-widening multiplication.  */
3480  if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3481      && size - 1 < BITS_PER_WORD
3482      && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3483    {
3484      rtx insns, wop0, wop1;
3485
3486      /* We need to widen the operands, for example to ensure the
3487	 constant multiplier is correctly sign or zero extended.
3488	 Use a sequence to clean-up any instructions emitted by
3489	 the conversions if things don't work out.  */
3490      start_sequence ();
3491      wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3492      wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3493      tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3494			  unsignedp, OPTAB_WIDEN);
3495      insns = get_insns ();
3496      end_sequence ();
3497
3498      if (tem)
3499	{
3500	  emit_insn (insns);
3501	  return extract_high_half (mode, tem);
3502	}
3503    }
3504
3505  /* Try widening multiplication of opposite signedness, and adjust.  */
3506  moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3507  if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3508      && size - 1 < BITS_PER_WORD
3509      && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3510	  + 4 * add_cost[mode] < max_cost))
3511    {
3512      tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3513			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3514      if (tem != 0)
3515	{
3516	  tem = extract_high_half (mode, tem);
3517	  /* We used the wrong signedness.  Adjust the result.  */
3518	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3519					      target, unsignedp);
3520	}
3521    }
3522
3523  return 0;
3524}
3525
3526/* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3527   putting the high half of the result in TARGET if that is convenient,
3528   and return where the result is.  If the operation can not be performed,
3529   0 is returned.
3530
3531   MODE is the mode of operation and result.
3532
3533   UNSIGNEDP nonzero means unsigned multiply.
3534
3535   MAX_COST is the total allowed cost for the expanded RTL.  */
3536
3537static rtx
3538expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3539		      rtx target, int unsignedp, int max_cost)
3540{
3541  enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3542  unsigned HOST_WIDE_INT cnst1;
3543  int extra_cost;
3544  bool sign_adjust = false;
3545  enum mult_variant variant;
3546  struct algorithm alg;
3547  rtx tem;
3548
3549  gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3550  /* We can't support modes wider than HOST_BITS_PER_INT.  */
3551  gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3552
3553  cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3554
3555  /* We can't optimize modes wider than BITS_PER_WORD.
3556     ??? We might be able to perform double-word arithmetic if
3557     mode == word_mode, however all the cost calculations in
3558     synth_mult etc. assume single-word operations.  */
3559  if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3560    return expand_mult_highpart_optab (mode, op0, op1, target,
3561				       unsignedp, max_cost);
3562
3563  extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3564
3565  /* Check whether we try to multiply by a negative constant.  */
3566  if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3567    {
3568      sign_adjust = true;
3569      extra_cost += add_cost[mode];
3570    }
3571
3572  /* See whether shift/add multiplication is cheap enough.  */
3573  if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3574			   max_cost - extra_cost))
3575    {
3576      /* See whether the specialized multiplication optabs are
3577	 cheaper than the shift/add version.  */
3578      tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3579					alg.cost.cost + extra_cost);
3580      if (tem)
3581	return tem;
3582
3583      tem = convert_to_mode (wider_mode, op0, unsignedp);
3584      tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3585      tem = extract_high_half (mode, tem);
3586
3587      /* Adjust result for signedness.  */
3588      if (sign_adjust)
3589	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3590
3591      return tem;
3592    }
3593  return expand_mult_highpart_optab (mode, op0, op1, target,
3594				     unsignedp, max_cost);
3595}
3596
3597
3598/* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3599
3600static rtx
3601expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3602{
3603  unsigned HOST_WIDE_INT masklow, maskhigh;
3604  rtx result, temp, shift, label;
3605  int logd;
3606
3607  logd = floor_log2 (d);
3608  result = gen_reg_rtx (mode);
3609
3610  /* Avoid conditional branches when they're expensive.  */
3611  if (BRANCH_COST >= 2
3612      && !optimize_size)
3613    {
3614      rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3615				      mode, 0, -1);
3616      if (signmask)
3617	{
3618	  signmask = force_reg (mode, signmask);
3619	  masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3620	  shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3621
3622	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
3623	     which instruction sequence to use.  If logical right shifts
3624	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3625	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3626
3627	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
3628	  if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3629	      || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3630	    {
3631	      temp = expand_binop (mode, xor_optab, op0, signmask,
3632				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3633	      temp = expand_binop (mode, sub_optab, temp, signmask,
3634				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3635	      temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3636				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3637	      temp = expand_binop (mode, xor_optab, temp, signmask,
3638				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3639	      temp = expand_binop (mode, sub_optab, temp, signmask,
3640				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3641	    }
3642	  else
3643	    {
3644	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
3645				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3646	      signmask = force_reg (mode, signmask);
3647
3648	      temp = expand_binop (mode, add_optab, op0, signmask,
3649				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3650	      temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3651				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3652	      temp = expand_binop (mode, sub_optab, temp, signmask,
3653				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3654	    }
3655	  return temp;
3656	}
3657    }
3658
3659  /* Mask contains the mode's signbit and the significant bits of the
3660     modulus.  By including the signbit in the operation, many targets
3661     can avoid an explicit compare operation in the following comparison
3662     against zero.  */
3663
3664  masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3665  if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3666    {
3667      masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3668      maskhigh = -1;
3669    }
3670  else
3671    maskhigh = (HOST_WIDE_INT) -1
3672		 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3673
3674  temp = expand_binop (mode, and_optab, op0,
3675		       immed_double_const (masklow, maskhigh, mode),
3676		       result, 1, OPTAB_LIB_WIDEN);
3677  if (temp != result)
3678    emit_move_insn (result, temp);
3679
3680  label = gen_label_rtx ();
3681  do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3682
3683  temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3684		       0, OPTAB_LIB_WIDEN);
3685  masklow = (HOST_WIDE_INT) -1 << logd;
3686  maskhigh = -1;
3687  temp = expand_binop (mode, ior_optab, temp,
3688		       immed_double_const (masklow, maskhigh, mode),
3689		       result, 1, OPTAB_LIB_WIDEN);
3690  temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3691		       0, OPTAB_LIB_WIDEN);
3692  if (temp != result)
3693    emit_move_insn (result, temp);
3694  emit_label (label);
3695  return result;
3696}
3697
3698/* Expand signed division of OP0 by a power of two D in mode MODE.
3699   This routine is only called for positive values of D.  */
3700
3701static rtx
3702expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3703{
3704  rtx temp, label;
3705  tree shift;
3706  int logd;
3707
3708  logd = floor_log2 (d);
3709  shift = build_int_cst (NULL_TREE, logd);
3710
3711  if (d == 2 && BRANCH_COST >= 1)
3712    {
3713      temp = gen_reg_rtx (mode);
3714      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3715      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3716			   0, OPTAB_LIB_WIDEN);
3717      return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3718    }
3719
3720#ifdef HAVE_conditional_move
3721  if (BRANCH_COST >= 2)
3722    {
3723      rtx temp2;
3724
3725      /* ??? emit_conditional_move forces a stack adjustment via
3726	 compare_from_rtx so, if the sequence is discarded, it will
3727	 be lost.  Do it now instead.  */
3728      do_pending_stack_adjust ();
3729
3730      start_sequence ();
3731      temp2 = copy_to_mode_reg (mode, op0);
3732      temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3733			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
3734      temp = force_reg (mode, temp);
3735
3736      /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3737      temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3738				     mode, temp, temp2, mode, 0);
3739      if (temp2)
3740	{
3741	  rtx seq = get_insns ();
3742	  end_sequence ();
3743	  emit_insn (seq);
3744	  return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3745	}
3746      end_sequence ();
3747    }
3748#endif
3749
3750  if (BRANCH_COST >= 2)
3751    {
3752      int ushift = GET_MODE_BITSIZE (mode) - logd;
3753
3754      temp = gen_reg_rtx (mode);
3755      temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3756      if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3757	temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3758			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
3759      else
3760	temp = expand_shift (RSHIFT_EXPR, mode, temp,
3761			     build_int_cst (NULL_TREE, ushift),
3762			     NULL_RTX, 1);
3763      temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3764			   0, OPTAB_LIB_WIDEN);
3765      return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3766    }
3767
3768  label = gen_label_rtx ();
3769  temp = copy_to_mode_reg (mode, op0);
3770  do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3771  expand_inc (temp, GEN_INT (d - 1));
3772  emit_label (label);
3773  return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3774}
3775
3776/* Emit the code to divide OP0 by OP1, putting the result in TARGET
3777   if that is convenient, and returning where the result is.
3778   You may request either the quotient or the remainder as the result;
3779   specify REM_FLAG nonzero to get the remainder.
3780
3781   CODE is the expression code for which kind of division this is;
3782   it controls how rounding is done.  MODE is the machine mode to use.
3783   UNSIGNEDP nonzero means do unsigned division.  */
3784
3785/* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3786   and then correct it by or'ing in missing high bits
3787   if result of ANDI is nonzero.
3788   For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3789   This could optimize to a bfexts instruction.
3790   But C doesn't use these operations, so their optimizations are
3791   left for later.  */
3792/* ??? For modulo, we don't actually need the highpart of the first product,
3793   the low part will do nicely.  And for small divisors, the second multiply
3794   can also be a low-part only multiply or even be completely left out.
3795   E.g. to calculate the remainder of a division by 3 with a 32 bit
3796   multiply, multiply with 0x55555556 and extract the upper two bits;
3797   the result is exact for inputs up to 0x1fffffff.
3798   The input range can be reduced by using cross-sum rules.
3799   For odd divisors >= 3, the following table gives right shift counts
3800   so that if a number is shifted by an integer multiple of the given
3801   amount, the remainder stays the same:
3802   2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3803   14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3804   0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3805   20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3806   0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3807
3808   Cross-sum rules for even numbers can be derived by leaving as many bits
3809   to the right alone as the divisor has zeros to the right.
3810   E.g. if x is an unsigned 32 bit number:
3811   (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3812   */
3813
3814rtx
3815expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3816	       rtx op0, rtx op1, rtx target, int unsignedp)
3817{
3818  enum machine_mode compute_mode;
3819  rtx tquotient;
3820  rtx quotient = 0, remainder = 0;
3821  rtx last;
3822  int size;
3823  rtx insn, set;
3824  optab optab1, optab2;
3825  int op1_is_constant, op1_is_pow2 = 0;
3826  int max_cost, extra_cost;
3827  static HOST_WIDE_INT last_div_const = 0;
3828  static HOST_WIDE_INT ext_op1;
3829
3830  op1_is_constant = GET_CODE (op1) == CONST_INT;
3831  if (op1_is_constant)
3832    {
3833      ext_op1 = INTVAL (op1);
3834      if (unsignedp)
3835	ext_op1 &= GET_MODE_MASK (mode);
3836      op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3837		     || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3838    }
3839
3840  /*
3841     This is the structure of expand_divmod:
3842
3843     First comes code to fix up the operands so we can perform the operations
3844     correctly and efficiently.
3845
3846     Second comes a switch statement with code specific for each rounding mode.
3847     For some special operands this code emits all RTL for the desired
3848     operation, for other cases, it generates only a quotient and stores it in
3849     QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3850     to indicate that it has not done anything.
3851
3852     Last comes code that finishes the operation.  If QUOTIENT is set and
3853     REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3854     QUOTIENT is not set, it is computed using trunc rounding.
3855
3856     We try to generate special code for division and remainder when OP1 is a
3857     constant.  If |OP1| = 2**n we can use shifts and some other fast
3858     operations.  For other values of OP1, we compute a carefully selected
3859     fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3860     by m.
3861
3862     In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3863     half of the product.  Different strategies for generating the product are
3864     implemented in expand_mult_highpart.
3865
3866     If what we actually want is the remainder, we generate that by another
3867     by-constant multiplication and a subtraction.  */
3868
3869  /* We shouldn't be called with OP1 == const1_rtx, but some of the
3870     code below will malfunction if we are, so check here and handle
3871     the special case if so.  */
3872  if (op1 == const1_rtx)
3873    return rem_flag ? const0_rtx : op0;
3874
3875    /* When dividing by -1, we could get an overflow.
3876     negv_optab can handle overflows.  */
3877  if (! unsignedp && op1 == constm1_rtx)
3878    {
3879      if (rem_flag)
3880	return const0_rtx;
3881      return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3882			  ? negv_optab : neg_optab, op0, target, 0);
3883    }
3884
3885  if (target
3886      /* Don't use the function value register as a target
3887	 since we have to read it as well as write it,
3888	 and function-inlining gets confused by this.  */
3889      && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3890	  /* Don't clobber an operand while doing a multi-step calculation.  */
3891	  || ((rem_flag || op1_is_constant)
3892	      && (reg_mentioned_p (target, op0)
3893		  || (MEM_P (op0) && MEM_P (target))))
3894	  || reg_mentioned_p (target, op1)
3895	  || (MEM_P (op1) && MEM_P (target))))
3896    target = 0;
3897
3898  /* Get the mode in which to perform this computation.  Normally it will
3899     be MODE, but sometimes we can't do the desired operation in MODE.
3900     If so, pick a wider mode in which we can do the operation.  Convert
3901     to that mode at the start to avoid repeated conversions.
3902
3903     First see what operations we need.  These depend on the expression
3904     we are evaluating.  (We assume that divxx3 insns exist under the
3905     same conditions that modxx3 insns and that these insns don't normally
3906     fail.  If these assumptions are not correct, we may generate less
3907     efficient code in some cases.)
3908
3909     Then see if we find a mode in which we can open-code that operation
3910     (either a division, modulus, or shift).  Finally, check for the smallest
3911     mode for which we can do the operation with a library call.  */
3912
3913  /* We might want to refine this now that we have division-by-constant
3914     optimization.  Since expand_mult_highpart tries so many variants, it is
3915     not straightforward to generalize this.  Maybe we should make an array
3916     of possible modes in init_expmed?  Save this for GCC 2.7.  */
3917
3918  optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3919	    ? (unsignedp ? lshr_optab : ashr_optab)
3920	    : (unsignedp ? udiv_optab : sdiv_optab));
3921  optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3922	    ? optab1
3923	    : (unsignedp ? udivmod_optab : sdivmod_optab));
3924
3925  for (compute_mode = mode; compute_mode != VOIDmode;
3926       compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3927    if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3928	|| optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3929      break;
3930
3931  if (compute_mode == VOIDmode)
3932    for (compute_mode = mode; compute_mode != VOIDmode;
3933	 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3934      if (optab1->handlers[compute_mode].libfunc
3935	  || optab2->handlers[compute_mode].libfunc)
3936	break;
3937
3938  /* If we still couldn't find a mode, use MODE, but expand_binop will
3939     probably die.  */
3940  if (compute_mode == VOIDmode)
3941    compute_mode = mode;
3942
3943  if (target && GET_MODE (target) == compute_mode)
3944    tquotient = target;
3945  else
3946    tquotient = gen_reg_rtx (compute_mode);
3947
3948  size = GET_MODE_BITSIZE (compute_mode);
3949#if 0
3950  /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3951     (mode), and thereby get better code when OP1 is a constant.  Do that
3952     later.  It will require going over all usages of SIZE below.  */
3953  size = GET_MODE_BITSIZE (mode);
3954#endif
3955
3956  /* Only deduct something for a REM if the last divide done was
3957     for a different constant.   Then set the constant of the last
3958     divide.  */
3959  max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
3960  if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3961		     && INTVAL (op1) == last_div_const))
3962    max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
3963
3964  last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3965
3966  /* Now convert to the best mode to use.  */
3967  if (compute_mode != mode)
3968    {
3969      op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3970      op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3971
3972      /* convert_modes may have placed op1 into a register, so we
3973	 must recompute the following.  */
3974      op1_is_constant = GET_CODE (op1) == CONST_INT;
3975      op1_is_pow2 = (op1_is_constant
3976		     && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3977			  || (! unsignedp
3978			      && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3979    }
3980
3981  /* If one of the operands is a volatile MEM, copy it into a register.  */
3982
3983  if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3984    op0 = force_reg (compute_mode, op0);
3985  if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3986    op1 = force_reg (compute_mode, op1);
3987
3988  /* If we need the remainder or if OP1 is constant, we need to
3989     put OP0 in a register in case it has any queued subexpressions.  */
3990  if (rem_flag || op1_is_constant)
3991    op0 = force_reg (compute_mode, op0);
3992
3993  last = get_last_insn ();
3994
3995  /* Promote floor rounding to trunc rounding for unsigned operations.  */
3996  if (unsignedp)
3997    {
3998      if (code == FLOOR_DIV_EXPR)
3999	code = TRUNC_DIV_EXPR;
4000      if (code == FLOOR_MOD_EXPR)
4001	code = TRUNC_MOD_EXPR;
4002      if (code == EXACT_DIV_EXPR && op1_is_pow2)
4003	code = TRUNC_DIV_EXPR;
4004    }
4005
4006  if (op1 != const0_rtx)
4007    switch (code)
4008      {
4009      case TRUNC_MOD_EXPR:
4010      case TRUNC_DIV_EXPR:
4011	if (op1_is_constant)
4012	  {
4013	    if (unsignedp)
4014	      {
4015		unsigned HOST_WIDE_INT mh;
4016		int pre_shift, post_shift;
4017		int dummy;
4018		rtx ml;
4019		unsigned HOST_WIDE_INT d = (INTVAL (op1)
4020					    & GET_MODE_MASK (compute_mode));
4021
4022		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4023		  {
4024		    pre_shift = floor_log2 (d);
4025		    if (rem_flag)
4026		      {
4027			remainder
4028			  = expand_binop (compute_mode, and_optab, op0,
4029					  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4030					  remainder, 1,
4031					  OPTAB_LIB_WIDEN);
4032			if (remainder)
4033			  return gen_lowpart (mode, remainder);
4034		      }
4035		    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4036					     build_int_cst (NULL_TREE,
4037							    pre_shift),
4038					     tquotient, 1);
4039		  }
4040		else if (size <= HOST_BITS_PER_WIDE_INT)
4041		  {
4042		    if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4043		      {
4044			/* Most significant bit of divisor is set; emit an scc
4045			   insn.  */
4046			quotient = emit_store_flag (tquotient, GEU, op0, op1,
4047						    compute_mode, 1, 1);
4048			if (quotient == 0)
4049			  goto fail1;
4050		      }
4051		    else
4052		      {
4053			/* Find a suitable multiplier and right shift count
4054			   instead of multiplying with D.  */
4055
4056			mh = choose_multiplier (d, size, size,
4057						&ml, &post_shift, &dummy);
4058
4059			/* If the suggested multiplier is more than SIZE bits,
4060			   we can do better for even divisors, using an
4061			   initial right shift.  */
4062			if (mh != 0 && (d & 1) == 0)
4063			  {
4064			    pre_shift = floor_log2 (d & -d);
4065			    mh = choose_multiplier (d >> pre_shift, size,
4066						    size - pre_shift,
4067						    &ml, &post_shift, &dummy);
4068			    gcc_assert (!mh);
4069			  }
4070			else
4071			  pre_shift = 0;
4072
4073			if (mh != 0)
4074			  {
4075			    rtx t1, t2, t3, t4;
4076
4077			    if (post_shift - 1 >= BITS_PER_WORD)
4078			      goto fail1;
4079
4080			    extra_cost
4081			      = (shift_cost[compute_mode][post_shift - 1]
4082				 + shift_cost[compute_mode][1]
4083				 + 2 * add_cost[compute_mode]);
4084			    t1 = expand_mult_highpart (compute_mode, op0, ml,
4085						       NULL_RTX, 1,
4086						       max_cost - extra_cost);
4087			    if (t1 == 0)
4088			      goto fail1;
4089			    t2 = force_operand (gen_rtx_MINUS (compute_mode,
4090							       op0, t1),
4091						NULL_RTX);
4092			    t3 = expand_shift
4093			      (RSHIFT_EXPR, compute_mode, t2,
4094			       build_int_cst (NULL_TREE, 1),
4095			       NULL_RTX,1);
4096			    t4 = force_operand (gen_rtx_PLUS (compute_mode,
4097							      t1, t3),
4098						NULL_RTX);
4099			    quotient = expand_shift
4100			      (RSHIFT_EXPR, compute_mode, t4,
4101			       build_int_cst (NULL_TREE, post_shift - 1),
4102			       tquotient, 1);
4103			  }
4104			else
4105			  {
4106			    rtx t1, t2;
4107
4108			    if (pre_shift >= BITS_PER_WORD
4109				|| post_shift >= BITS_PER_WORD)
4110			      goto fail1;
4111
4112			    t1 = expand_shift
4113			      (RSHIFT_EXPR, compute_mode, op0,
4114			       build_int_cst (NULL_TREE, pre_shift),
4115			       NULL_RTX, 1);
4116			    extra_cost
4117			      = (shift_cost[compute_mode][pre_shift]
4118				 + shift_cost[compute_mode][post_shift]);
4119			    t2 = expand_mult_highpart (compute_mode, t1, ml,
4120						       NULL_RTX, 1,
4121						       max_cost - extra_cost);
4122			    if (t2 == 0)
4123			      goto fail1;
4124			    quotient = expand_shift
4125			      (RSHIFT_EXPR, compute_mode, t2,
4126			       build_int_cst (NULL_TREE, post_shift),
4127			       tquotient, 1);
4128			  }
4129		      }
4130		  }
4131		else		/* Too wide mode to use tricky code */
4132		  break;
4133
4134		insn = get_last_insn ();
4135		if (insn != last
4136		    && (set = single_set (insn)) != 0
4137		    && SET_DEST (set) == quotient)
4138		  set_unique_reg_note (insn,
4139				       REG_EQUAL,
4140				       gen_rtx_UDIV (compute_mode, op0, op1));
4141	      }
4142	    else		/* TRUNC_DIV, signed */
4143	      {
4144		unsigned HOST_WIDE_INT ml;
4145		int lgup, post_shift;
4146		rtx mlr;
4147		HOST_WIDE_INT d = INTVAL (op1);
4148		unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
4149
4150		/* n rem d = n rem -d */
4151		if (rem_flag && d < 0)
4152		  {
4153		    d = abs_d;
4154		    op1 = gen_int_mode (abs_d, compute_mode);
4155		  }
4156
4157		if (d == 1)
4158		  quotient = op0;
4159		else if (d == -1)
4160		  quotient = expand_unop (compute_mode, neg_optab, op0,
4161					  tquotient, 0);
4162		else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4163		  {
4164		    /* This case is not handled correctly below.  */
4165		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4166						compute_mode, 1, 1);
4167		    if (quotient == 0)
4168		      goto fail1;
4169		  }
4170		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4171			 && (rem_flag ? smod_pow2_cheap[compute_mode]
4172				      : sdiv_pow2_cheap[compute_mode])
4173			 /* We assume that cheap metric is true if the
4174			    optab has an expander for this mode.  */
4175			 && (((rem_flag ? smod_optab : sdiv_optab)
4176			      ->handlers[compute_mode].insn_code
4177			      != CODE_FOR_nothing)
4178			     || (sdivmod_optab->handlers[compute_mode]
4179				 .insn_code != CODE_FOR_nothing)))
4180		  ;
4181		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4182		  {
4183		    if (rem_flag)
4184		      {
4185			remainder = expand_smod_pow2 (compute_mode, op0, d);
4186			if (remainder)
4187			  return gen_lowpart (mode, remainder);
4188		      }
4189
4190		    if (sdiv_pow2_cheap[compute_mode]
4191			&& ((sdiv_optab->handlers[compute_mode].insn_code
4192			     != CODE_FOR_nothing)
4193			    || (sdivmod_optab->handlers[compute_mode].insn_code
4194				!= CODE_FOR_nothing)))
4195		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4196						compute_mode, op0,
4197						gen_int_mode (abs_d,
4198							      compute_mode),
4199						NULL_RTX, 0);
4200		    else
4201		      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4202
4203		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4204		       negate the quotient.  */
4205		    if (d < 0)
4206		      {
4207			insn = get_last_insn ();
4208			if (insn != last
4209			    && (set = single_set (insn)) != 0
4210			    && SET_DEST (set) == quotient
4211			    && abs_d < ((unsigned HOST_WIDE_INT) 1
4212					<< (HOST_BITS_PER_WIDE_INT - 1)))
4213			  set_unique_reg_note (insn,
4214					       REG_EQUAL,
4215					       gen_rtx_DIV (compute_mode,
4216							    op0,
4217							    GEN_INT
4218							    (trunc_int_for_mode
4219							     (abs_d,
4220							      compute_mode))));
4221
4222			quotient = expand_unop (compute_mode, neg_optab,
4223						quotient, quotient, 0);
4224		      }
4225		  }
4226		else if (size <= HOST_BITS_PER_WIDE_INT)
4227		  {
4228		    choose_multiplier (abs_d, size, size - 1,
4229				       &mlr, &post_shift, &lgup);
4230		    ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4231		    if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4232		      {
4233			rtx t1, t2, t3;
4234
4235			if (post_shift >= BITS_PER_WORD
4236			    || size - 1 >= BITS_PER_WORD)
4237			  goto fail1;
4238
4239			extra_cost = (shift_cost[compute_mode][post_shift]
4240				      + shift_cost[compute_mode][size - 1]
4241				      + add_cost[compute_mode]);
4242			t1 = expand_mult_highpart (compute_mode, op0, mlr,
4243						   NULL_RTX, 0,
4244						   max_cost - extra_cost);
4245			if (t1 == 0)
4246			  goto fail1;
4247			t2 = expand_shift
4248			  (RSHIFT_EXPR, compute_mode, t1,
4249			   build_int_cst (NULL_TREE, post_shift),
4250			   NULL_RTX, 0);
4251			t3 = expand_shift
4252			  (RSHIFT_EXPR, compute_mode, op0,
4253			   build_int_cst (NULL_TREE, size - 1),
4254			   NULL_RTX, 0);
4255			if (d < 0)
4256			  quotient
4257			    = force_operand (gen_rtx_MINUS (compute_mode,
4258							    t3, t2),
4259					     tquotient);
4260			else
4261			  quotient
4262			    = force_operand (gen_rtx_MINUS (compute_mode,
4263							    t2, t3),
4264					     tquotient);
4265		      }
4266		    else
4267		      {
4268			rtx t1, t2, t3, t4;
4269
4270			if (post_shift >= BITS_PER_WORD
4271			    || size - 1 >= BITS_PER_WORD)
4272			  goto fail1;
4273
4274			ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4275			mlr = gen_int_mode (ml, compute_mode);
4276			extra_cost = (shift_cost[compute_mode][post_shift]
4277				      + shift_cost[compute_mode][size - 1]
4278				      + 2 * add_cost[compute_mode]);
4279			t1 = expand_mult_highpart (compute_mode, op0, mlr,
4280						   NULL_RTX, 0,
4281						   max_cost - extra_cost);
4282			if (t1 == 0)
4283			  goto fail1;
4284			t2 = force_operand (gen_rtx_PLUS (compute_mode,
4285							  t1, op0),
4286					    NULL_RTX);
4287			t3 = expand_shift
4288			  (RSHIFT_EXPR, compute_mode, t2,
4289			   build_int_cst (NULL_TREE, post_shift),
4290			   NULL_RTX, 0);
4291			t4 = expand_shift
4292			  (RSHIFT_EXPR, compute_mode, op0,
4293			   build_int_cst (NULL_TREE, size - 1),
4294			   NULL_RTX, 0);
4295			if (d < 0)
4296			  quotient
4297			    = force_operand (gen_rtx_MINUS (compute_mode,
4298							    t4, t3),
4299					     tquotient);
4300			else
4301			  quotient
4302			    = force_operand (gen_rtx_MINUS (compute_mode,
4303							    t3, t4),
4304					     tquotient);
4305		      }
4306		  }
4307		else		/* Too wide mode to use tricky code */
4308		  break;
4309
4310		insn = get_last_insn ();
4311		if (insn != last
4312		    && (set = single_set (insn)) != 0
4313		    && SET_DEST (set) == quotient)
4314		  set_unique_reg_note (insn,
4315				       REG_EQUAL,
4316				       gen_rtx_DIV (compute_mode, op0, op1));
4317	      }
4318	    break;
4319	  }
4320      fail1:
4321	delete_insns_since (last);
4322	break;
4323
4324      case FLOOR_DIV_EXPR:
4325      case FLOOR_MOD_EXPR:
4326      /* We will come here only for signed operations.  */
4327	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4328	  {
4329	    unsigned HOST_WIDE_INT mh;
4330	    int pre_shift, lgup, post_shift;
4331	    HOST_WIDE_INT d = INTVAL (op1);
4332	    rtx ml;
4333
4334	    if (d > 0)
4335	      {
4336		/* We could just as easily deal with negative constants here,
4337		   but it does not seem worth the trouble for GCC 2.6.  */
4338		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4339		  {
4340		    pre_shift = floor_log2 (d);
4341		    if (rem_flag)
4342		      {
4343			remainder = expand_binop (compute_mode, and_optab, op0,
4344						  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4345						  remainder, 0, OPTAB_LIB_WIDEN);
4346			if (remainder)
4347			  return gen_lowpart (mode, remainder);
4348		      }
4349		    quotient = expand_shift
4350		      (RSHIFT_EXPR, compute_mode, op0,
4351		       build_int_cst (NULL_TREE, pre_shift),
4352		       tquotient, 0);
4353		  }
4354		else
4355		  {
4356		    rtx t1, t2, t3, t4;
4357
4358		    mh = choose_multiplier (d, size, size - 1,
4359					    &ml, &post_shift, &lgup);
4360		    gcc_assert (!mh);
4361
4362		    if (post_shift < BITS_PER_WORD
4363			&& size - 1 < BITS_PER_WORD)
4364		      {
4365			t1 = expand_shift
4366			  (RSHIFT_EXPR, compute_mode, op0,
4367			   build_int_cst (NULL_TREE, size - 1),
4368			   NULL_RTX, 0);
4369			t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4370					   NULL_RTX, 0, OPTAB_WIDEN);
4371			extra_cost = (shift_cost[compute_mode][post_shift]
4372				      + shift_cost[compute_mode][size - 1]
4373				      + 2 * add_cost[compute_mode]);
4374			t3 = expand_mult_highpart (compute_mode, t2, ml,
4375						   NULL_RTX, 1,
4376						   max_cost - extra_cost);
4377			if (t3 != 0)
4378			  {
4379			    t4 = expand_shift
4380			      (RSHIFT_EXPR, compute_mode, t3,
4381			       build_int_cst (NULL_TREE, post_shift),
4382			       NULL_RTX, 1);
4383			    quotient = expand_binop (compute_mode, xor_optab,
4384						     t4, t1, tquotient, 0,
4385						     OPTAB_WIDEN);
4386			  }
4387		      }
4388		  }
4389	      }
4390	    else
4391	      {
4392		rtx nsign, t1, t2, t3, t4;
4393		t1 = force_operand (gen_rtx_PLUS (compute_mode,
4394						  op0, constm1_rtx), NULL_RTX);
4395		t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4396				   0, OPTAB_WIDEN);
4397		nsign = expand_shift
4398		  (RSHIFT_EXPR, compute_mode, t2,
4399		   build_int_cst (NULL_TREE, size - 1),
4400		   NULL_RTX, 0);
4401		t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4402				    NULL_RTX);
4403		t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4404				    NULL_RTX, 0);
4405		if (t4)
4406		  {
4407		    rtx t5;
4408		    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4409				      NULL_RTX, 0);
4410		    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4411							    t4, t5),
4412					      tquotient);
4413		  }
4414	      }
4415	  }
4416
4417	if (quotient != 0)
4418	  break;
4419	delete_insns_since (last);
4420
4421	/* Try using an instruction that produces both the quotient and
4422	   remainder, using truncation.  We can easily compensate the quotient
4423	   or remainder to get floor rounding, once we have the remainder.
4424	   Notice that we compute also the final remainder value here,
4425	   and return the result right away.  */
4426	if (target == 0 || GET_MODE (target) != compute_mode)
4427	  target = gen_reg_rtx (compute_mode);
4428
4429	if (rem_flag)
4430	  {
4431	    remainder
4432	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4433	    quotient = gen_reg_rtx (compute_mode);
4434	  }
4435	else
4436	  {
4437	    quotient
4438	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4439	    remainder = gen_reg_rtx (compute_mode);
4440	  }
4441
4442	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4443				 quotient, remainder, 0))
4444	  {
4445	    /* This could be computed with a branch-less sequence.
4446	       Save that for later.  */
4447	    rtx tem;
4448	    rtx label = gen_label_rtx ();
4449	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4450	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4451				NULL_RTX, 0, OPTAB_WIDEN);
4452	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4453	    expand_dec (quotient, const1_rtx);
4454	    expand_inc (remainder, op1);
4455	    emit_label (label);
4456	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4457	  }
4458
4459	/* No luck with division elimination or divmod.  Have to do it
4460	   by conditionally adjusting op0 *and* the result.  */
4461	{
4462	  rtx label1, label2, label3, label4, label5;
4463	  rtx adjusted_op0;
4464	  rtx tem;
4465
4466	  quotient = gen_reg_rtx (compute_mode);
4467	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4468	  label1 = gen_label_rtx ();
4469	  label2 = gen_label_rtx ();
4470	  label3 = gen_label_rtx ();
4471	  label4 = gen_label_rtx ();
4472	  label5 = gen_label_rtx ();
4473	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4474	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4475	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4476			      quotient, 0, OPTAB_LIB_WIDEN);
4477	  if (tem != quotient)
4478	    emit_move_insn (quotient, tem);
4479	  emit_jump_insn (gen_jump (label5));
4480	  emit_barrier ();
4481	  emit_label (label1);
4482	  expand_inc (adjusted_op0, const1_rtx);
4483	  emit_jump_insn (gen_jump (label4));
4484	  emit_barrier ();
4485	  emit_label (label2);
4486	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4487	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4488			      quotient, 0, OPTAB_LIB_WIDEN);
4489	  if (tem != quotient)
4490	    emit_move_insn (quotient, tem);
4491	  emit_jump_insn (gen_jump (label5));
4492	  emit_barrier ();
4493	  emit_label (label3);
4494	  expand_dec (adjusted_op0, const1_rtx);
4495	  emit_label (label4);
4496	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4497			      quotient, 0, OPTAB_LIB_WIDEN);
4498	  if (tem != quotient)
4499	    emit_move_insn (quotient, tem);
4500	  expand_dec (quotient, const1_rtx);
4501	  emit_label (label5);
4502	}
4503	break;
4504
4505      case CEIL_DIV_EXPR:
4506      case CEIL_MOD_EXPR:
4507	if (unsignedp)
4508	  {
4509	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4510	      {
4511		rtx t1, t2, t3;
4512		unsigned HOST_WIDE_INT d = INTVAL (op1);
4513		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4514				   build_int_cst (NULL_TREE, floor_log2 (d)),
4515				   tquotient, 1);
4516		t2 = expand_binop (compute_mode, and_optab, op0,
4517				   GEN_INT (d - 1),
4518				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4519		t3 = gen_reg_rtx (compute_mode);
4520		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4521				      compute_mode, 1, 1);
4522		if (t3 == 0)
4523		  {
4524		    rtx lab;
4525		    lab = gen_label_rtx ();
4526		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4527		    expand_inc (t1, const1_rtx);
4528		    emit_label (lab);
4529		    quotient = t1;
4530		  }
4531		else
4532		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4533							  t1, t3),
4534					    tquotient);
4535		break;
4536	      }
4537
4538	    /* Try using an instruction that produces both the quotient and
4539	       remainder, using truncation.  We can easily compensate the
4540	       quotient or remainder to get ceiling rounding, once we have the
4541	       remainder.  Notice that we compute also the final remainder
4542	       value here, and return the result right away.  */
4543	    if (target == 0 || GET_MODE (target) != compute_mode)
4544	      target = gen_reg_rtx (compute_mode);
4545
4546	    if (rem_flag)
4547	      {
4548		remainder = (REG_P (target)
4549			     ? target : gen_reg_rtx (compute_mode));
4550		quotient = gen_reg_rtx (compute_mode);
4551	      }
4552	    else
4553	      {
4554		quotient = (REG_P (target)
4555			    ? target : gen_reg_rtx (compute_mode));
4556		remainder = gen_reg_rtx (compute_mode);
4557	      }
4558
4559	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4560				     remainder, 1))
4561	      {
4562		/* This could be computed with a branch-less sequence.
4563		   Save that for later.  */
4564		rtx label = gen_label_rtx ();
4565		do_cmp_and_jump (remainder, const0_rtx, EQ,
4566				 compute_mode, label);
4567		expand_inc (quotient, const1_rtx);
4568		expand_dec (remainder, op1);
4569		emit_label (label);
4570		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4571	      }
4572
4573	    /* No luck with division elimination or divmod.  Have to do it
4574	       by conditionally adjusting op0 *and* the result.  */
4575	    {
4576	      rtx label1, label2;
4577	      rtx adjusted_op0, tem;
4578
4579	      quotient = gen_reg_rtx (compute_mode);
4580	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4581	      label1 = gen_label_rtx ();
4582	      label2 = gen_label_rtx ();
4583	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4584			       compute_mode, label1);
4585	      emit_move_insn  (quotient, const0_rtx);
4586	      emit_jump_insn (gen_jump (label2));
4587	      emit_barrier ();
4588	      emit_label (label1);
4589	      expand_dec (adjusted_op0, const1_rtx);
4590	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4591				  quotient, 1, OPTAB_LIB_WIDEN);
4592	      if (tem != quotient)
4593		emit_move_insn (quotient, tem);
4594	      expand_inc (quotient, const1_rtx);
4595	      emit_label (label2);
4596	    }
4597	  }
4598	else /* signed */
4599	  {
4600	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4601		&& INTVAL (op1) >= 0)
4602	      {
4603		/* This is extremely similar to the code for the unsigned case
4604		   above.  For 2.7 we should merge these variants, but for
4605		   2.6.1 I don't want to touch the code for unsigned since that
4606		   get used in C.  The signed case will only be used by other
4607		   languages (Ada).  */
4608
4609		rtx t1, t2, t3;
4610		unsigned HOST_WIDE_INT d = INTVAL (op1);
4611		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4612				   build_int_cst (NULL_TREE, floor_log2 (d)),
4613				   tquotient, 0);
4614		t2 = expand_binop (compute_mode, and_optab, op0,
4615				   GEN_INT (d - 1),
4616				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4617		t3 = gen_reg_rtx (compute_mode);
4618		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4619				      compute_mode, 1, 1);
4620		if (t3 == 0)
4621		  {
4622		    rtx lab;
4623		    lab = gen_label_rtx ();
4624		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4625		    expand_inc (t1, const1_rtx);
4626		    emit_label (lab);
4627		    quotient = t1;
4628		  }
4629		else
4630		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4631							  t1, t3),
4632					    tquotient);
4633		break;
4634	      }
4635
4636	    /* Try using an instruction that produces both the quotient and
4637	       remainder, using truncation.  We can easily compensate the
4638	       quotient or remainder to get ceiling rounding, once we have the
4639	       remainder.  Notice that we compute also the final remainder
4640	       value here, and return the result right away.  */
4641	    if (target == 0 || GET_MODE (target) != compute_mode)
4642	      target = gen_reg_rtx (compute_mode);
4643	    if (rem_flag)
4644	      {
4645		remainder= (REG_P (target)
4646			    ? target : gen_reg_rtx (compute_mode));
4647		quotient = gen_reg_rtx (compute_mode);
4648	      }
4649	    else
4650	      {
4651		quotient = (REG_P (target)
4652			    ? target : gen_reg_rtx (compute_mode));
4653		remainder = gen_reg_rtx (compute_mode);
4654	      }
4655
4656	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4657				     remainder, 0))
4658	      {
4659		/* This could be computed with a branch-less sequence.
4660		   Save that for later.  */
4661		rtx tem;
4662		rtx label = gen_label_rtx ();
4663		do_cmp_and_jump (remainder, const0_rtx, EQ,
4664				 compute_mode, label);
4665		tem = expand_binop (compute_mode, xor_optab, op0, op1,
4666				    NULL_RTX, 0, OPTAB_WIDEN);
4667		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4668		expand_inc (quotient, const1_rtx);
4669		expand_dec (remainder, op1);
4670		emit_label (label);
4671		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4672	      }
4673
4674	    /* No luck with division elimination or divmod.  Have to do it
4675	       by conditionally adjusting op0 *and* the result.  */
4676	    {
4677	      rtx label1, label2, label3, label4, label5;
4678	      rtx adjusted_op0;
4679	      rtx tem;
4680
4681	      quotient = gen_reg_rtx (compute_mode);
4682	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4683	      label1 = gen_label_rtx ();
4684	      label2 = gen_label_rtx ();
4685	      label3 = gen_label_rtx ();
4686	      label4 = gen_label_rtx ();
4687	      label5 = gen_label_rtx ();
4688	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4689	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4690			       compute_mode, label1);
4691	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4692				  quotient, 0, OPTAB_LIB_WIDEN);
4693	      if (tem != quotient)
4694		emit_move_insn (quotient, tem);
4695	      emit_jump_insn (gen_jump (label5));
4696	      emit_barrier ();
4697	      emit_label (label1);
4698	      expand_dec (adjusted_op0, const1_rtx);
4699	      emit_jump_insn (gen_jump (label4));
4700	      emit_barrier ();
4701	      emit_label (label2);
4702	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4703			       compute_mode, label3);
4704	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4705				  quotient, 0, OPTAB_LIB_WIDEN);
4706	      if (tem != quotient)
4707		emit_move_insn (quotient, tem);
4708	      emit_jump_insn (gen_jump (label5));
4709	      emit_barrier ();
4710	      emit_label (label3);
4711	      expand_inc (adjusted_op0, const1_rtx);
4712	      emit_label (label4);
4713	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4714				  quotient, 0, OPTAB_LIB_WIDEN);
4715	      if (tem != quotient)
4716		emit_move_insn (quotient, tem);
4717	      expand_inc (quotient, const1_rtx);
4718	      emit_label (label5);
4719	    }
4720	  }
4721	break;
4722
4723      case EXACT_DIV_EXPR:
4724	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4725	  {
4726	    HOST_WIDE_INT d = INTVAL (op1);
4727	    unsigned HOST_WIDE_INT ml;
4728	    int pre_shift;
4729	    rtx t1;
4730
4731	    pre_shift = floor_log2 (d & -d);
4732	    ml = invert_mod2n (d >> pre_shift, size);
4733	    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4734			       build_int_cst (NULL_TREE, pre_shift),
4735			       NULL_RTX, unsignedp);
4736	    quotient = expand_mult (compute_mode, t1,
4737				    gen_int_mode (ml, compute_mode),
4738				    NULL_RTX, 1);
4739
4740	    insn = get_last_insn ();
4741	    set_unique_reg_note (insn,
4742				 REG_EQUAL,
4743				 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4744						 compute_mode,
4745						 op0, op1));
4746	  }
4747	break;
4748
4749      case ROUND_DIV_EXPR:
4750      case ROUND_MOD_EXPR:
4751	if (unsignedp)
4752	  {
4753	    rtx tem;
4754	    rtx label;
4755	    label = gen_label_rtx ();
4756	    quotient = gen_reg_rtx (compute_mode);
4757	    remainder = gen_reg_rtx (compute_mode);
4758	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4759	      {
4760		rtx tem;
4761		quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4762					 quotient, 1, OPTAB_LIB_WIDEN);
4763		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4764		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4765					  remainder, 1, OPTAB_LIB_WIDEN);
4766	      }
4767	    tem = plus_constant (op1, -1);
4768	    tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4769				build_int_cst (NULL_TREE, 1),
4770				NULL_RTX, 1);
4771	    do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4772	    expand_inc (quotient, const1_rtx);
4773	    expand_dec (remainder, op1);
4774	    emit_label (label);
4775	  }
4776	else
4777	  {
4778	    rtx abs_rem, abs_op1, tem, mask;
4779	    rtx label;
4780	    label = gen_label_rtx ();
4781	    quotient = gen_reg_rtx (compute_mode);
4782	    remainder = gen_reg_rtx (compute_mode);
4783	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4784	      {
4785		rtx tem;
4786		quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4787					 quotient, 0, OPTAB_LIB_WIDEN);
4788		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4789		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4790					  remainder, 0, OPTAB_LIB_WIDEN);
4791	      }
4792	    abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4793	    abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4794	    tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4795				build_int_cst (NULL_TREE, 1),
4796				NULL_RTX, 1);
4797	    do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4798	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4799				NULL_RTX, 0, OPTAB_WIDEN);
4800	    mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4801				 build_int_cst (NULL_TREE, size - 1),
4802				 NULL_RTX, 0);
4803	    tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4804				NULL_RTX, 0, OPTAB_WIDEN);
4805	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4806				NULL_RTX, 0, OPTAB_WIDEN);
4807	    expand_inc (quotient, tem);
4808	    tem = expand_binop (compute_mode, xor_optab, mask, op1,
4809				NULL_RTX, 0, OPTAB_WIDEN);
4810	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4811				NULL_RTX, 0, OPTAB_WIDEN);
4812	    expand_dec (remainder, tem);
4813	    emit_label (label);
4814	  }
4815	return gen_lowpart (mode, rem_flag ? remainder : quotient);
4816
4817      default:
4818	gcc_unreachable ();
4819      }
4820
4821  if (quotient == 0)
4822    {
4823      if (target && GET_MODE (target) != compute_mode)
4824	target = 0;
4825
4826      if (rem_flag)
4827	{
4828	  /* Try to produce the remainder without producing the quotient.
4829	     If we seem to have a divmod pattern that does not require widening,
4830	     don't try widening here.  We should really have a WIDEN argument
4831	     to expand_twoval_binop, since what we'd really like to do here is
4832	     1) try a mod insn in compute_mode
4833	     2) try a divmod insn in compute_mode
4834	     3) try a div insn in compute_mode and multiply-subtract to get
4835	        remainder
4836	     4) try the same things with widening allowed.  */
4837	  remainder
4838	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4839				 op0, op1, target,
4840				 unsignedp,
4841				 ((optab2->handlers[compute_mode].insn_code
4842				   != CODE_FOR_nothing)
4843				  ? OPTAB_DIRECT : OPTAB_WIDEN));
4844	  if (remainder == 0)
4845	    {
4846	      /* No luck there.  Can we do remainder and divide at once
4847		 without a library call?  */
4848	      remainder = gen_reg_rtx (compute_mode);
4849	      if (! expand_twoval_binop ((unsignedp
4850					  ? udivmod_optab
4851					  : sdivmod_optab),
4852					 op0, op1,
4853					 NULL_RTX, remainder, unsignedp))
4854		remainder = 0;
4855	    }
4856
4857	  if (remainder)
4858	    return gen_lowpart (mode, remainder);
4859	}
4860
4861      /* Produce the quotient.  Try a quotient insn, but not a library call.
4862	 If we have a divmod in this mode, use it in preference to widening
4863	 the div (for this test we assume it will not fail). Note that optab2
4864	 is set to the one of the two optabs that the call below will use.  */
4865      quotient
4866	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4867			     op0, op1, rem_flag ? NULL_RTX : target,
4868			     unsignedp,
4869			     ((optab2->handlers[compute_mode].insn_code
4870			       != CODE_FOR_nothing)
4871			      ? OPTAB_DIRECT : OPTAB_WIDEN));
4872
4873      if (quotient == 0)
4874	{
4875	  /* No luck there.  Try a quotient-and-remainder insn,
4876	     keeping the quotient alone.  */
4877	  quotient = gen_reg_rtx (compute_mode);
4878	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4879				     op0, op1,
4880				     quotient, NULL_RTX, unsignedp))
4881	    {
4882	      quotient = 0;
4883	      if (! rem_flag)
4884		/* Still no luck.  If we are not computing the remainder,
4885		   use a library call for the quotient.  */
4886		quotient = sign_expand_binop (compute_mode,
4887					      udiv_optab, sdiv_optab,
4888					      op0, op1, target,
4889					      unsignedp, OPTAB_LIB_WIDEN);
4890	    }
4891	}
4892    }
4893
4894  if (rem_flag)
4895    {
4896      if (target && GET_MODE (target) != compute_mode)
4897	target = 0;
4898
4899      if (quotient == 0)
4900	{
4901	  /* No divide instruction either.  Use library for remainder.  */
4902	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4903					 op0, op1, target,
4904					 unsignedp, OPTAB_LIB_WIDEN);
4905	  /* No remainder function.  Try a quotient-and-remainder
4906	     function, keeping the remainder.  */
4907	  if (!remainder)
4908	    {
4909	      remainder = gen_reg_rtx (compute_mode);
4910	      if (!expand_twoval_binop_libfunc
4911		  (unsignedp ? udivmod_optab : sdivmod_optab,
4912		   op0, op1,
4913		   NULL_RTX, remainder,
4914		   unsignedp ? UMOD : MOD))
4915		remainder = NULL_RTX;
4916	    }
4917	}
4918      else
4919	{
4920	  /* We divided.  Now finish doing X - Y * (X / Y).  */
4921	  remainder = expand_mult (compute_mode, quotient, op1,
4922				   NULL_RTX, unsignedp);
4923	  remainder = expand_binop (compute_mode, sub_optab, op0,
4924				    remainder, target, unsignedp,
4925				    OPTAB_LIB_WIDEN);
4926	}
4927    }
4928
4929  return gen_lowpart (mode, rem_flag ? remainder : quotient);
4930}
4931
4932/* Return a tree node with data type TYPE, describing the value of X.
4933   Usually this is an VAR_DECL, if there is no obvious better choice.
4934   X may be an expression, however we only support those expressions
4935   generated by loop.c.  */
4936
4937tree
4938make_tree (tree type, rtx x)
4939{
4940  tree t;
4941
4942  switch (GET_CODE (x))
4943    {
4944    case CONST_INT:
4945      {
4946	HOST_WIDE_INT hi = 0;
4947
4948	if (INTVAL (x) < 0
4949	    && !(TYPE_UNSIGNED (type)
4950		 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4951		     < HOST_BITS_PER_WIDE_INT)))
4952	  hi = -1;
4953
4954	t = build_int_cst_wide (type, INTVAL (x), hi);
4955
4956	return t;
4957      }
4958
4959    case CONST_DOUBLE:
4960      if (GET_MODE (x) == VOIDmode)
4961	t = build_int_cst_wide (type,
4962				CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4963      else
4964	{
4965	  REAL_VALUE_TYPE d;
4966
4967	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4968	  t = build_real (type, d);
4969	}
4970
4971      return t;
4972
4973    case CONST_VECTOR:
4974      {
4975	int units = CONST_VECTOR_NUNITS (x);
4976	tree itype = TREE_TYPE (type);
4977	tree t = NULL_TREE;
4978	int i;
4979
4980
4981	/* Build a tree with vector elements.  */
4982	for (i = units - 1; i >= 0; --i)
4983	  {
4984	    rtx elt = CONST_VECTOR_ELT (x, i);
4985	    t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4986	  }
4987
4988	return build_vector (type, t);
4989      }
4990
4991    case PLUS:
4992      return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4993			  make_tree (type, XEXP (x, 1)));
4994
4995    case MINUS:
4996      return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4997			  make_tree (type, XEXP (x, 1)));
4998
4999    case NEG:
5000      return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5001
5002    case MULT:
5003      return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5004			  make_tree (type, XEXP (x, 1)));
5005
5006    case ASHIFT:
5007      return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5008			  make_tree (type, XEXP (x, 1)));
5009
5010    case LSHIFTRT:
5011      t = lang_hooks.types.unsigned_type (type);
5012      return fold_convert (type, build2 (RSHIFT_EXPR, t,
5013			    		 make_tree (t, XEXP (x, 0)),
5014				    	 make_tree (type, XEXP (x, 1))));
5015
5016    case ASHIFTRT:
5017      t = lang_hooks.types.signed_type (type);
5018      return fold_convert (type, build2 (RSHIFT_EXPR, t,
5019					 make_tree (t, XEXP (x, 0)),
5020				    	 make_tree (type, XEXP (x, 1))));
5021
5022    case DIV:
5023      if (TREE_CODE (type) != REAL_TYPE)
5024	t = lang_hooks.types.signed_type (type);
5025      else
5026	t = type;
5027
5028      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5029				    	 make_tree (t, XEXP (x, 0)),
5030				    	 make_tree (t, XEXP (x, 1))));
5031    case UDIV:
5032      t = lang_hooks.types.unsigned_type (type);
5033      return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5034				    	 make_tree (t, XEXP (x, 0)),
5035				    	 make_tree (t, XEXP (x, 1))));
5036
5037    case SIGN_EXTEND:
5038    case ZERO_EXTEND:
5039      t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5040					  GET_CODE (x) == ZERO_EXTEND);
5041      return fold_convert (type, make_tree (t, XEXP (x, 0)));
5042
5043    case CONST:
5044      return make_tree (type, XEXP (x, 0));
5045
5046    case SYMBOL_REF:
5047      t = SYMBOL_REF_DECL (x);
5048      if (t)
5049	return fold_convert (type, build_fold_addr_expr (t));
5050      /* else fall through.  */
5051
5052    default:
5053      t = build_decl (VAR_DECL, NULL_TREE, type);
5054
5055      /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
5056	 ptr_mode.  So convert.  */
5057      if (POINTER_TYPE_P (type))
5058	x = convert_memory_address (TYPE_MODE (type), x);
5059
5060      /* Note that we do *not* use SET_DECL_RTL here, because we do not
5061	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5062      t->decl_with_rtl.rtl = x;
5063
5064      return t;
5065    }
5066}
5067
5068/* Compute the logical-and of OP0 and OP1, storing it in TARGET
5069   and returning TARGET.
5070
5071   If TARGET is 0, a pseudo-register or constant is returned.  */
5072
5073rtx
5074expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5075{
5076  rtx tem = 0;
5077
5078  if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5079    tem = simplify_binary_operation (AND, mode, op0, op1);
5080  if (tem == 0)
5081    tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5082
5083  if (target == 0)
5084    target = tem;
5085  else if (tem != target)
5086    emit_move_insn (target, tem);
5087  return target;
5088}
5089
5090/* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5091   and storing in TARGET.  Normally return TARGET.
5092   Return 0 if that cannot be done.
5093
5094   MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5095   it is VOIDmode, they cannot both be CONST_INT.
5096
5097   UNSIGNEDP is for the case where we have to widen the operands
5098   to perform the operation.  It says to use zero-extension.
5099
5100   NORMALIZEP is 1 if we should convert the result to be either zero
5101   or one.  Normalize is -1 if we should convert the result to be
5102   either zero or -1.  If NORMALIZEP is zero, the result will be left
5103   "raw" out of the scc insn.  */
5104
5105rtx
5106emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5107		 enum machine_mode mode, int unsignedp, int normalizep)
5108{
5109  rtx subtarget;
5110  enum insn_code icode;
5111  enum machine_mode compare_mode;
5112  enum machine_mode target_mode = GET_MODE (target);
5113  rtx tem;
5114  rtx last = get_last_insn ();
5115  rtx pattern, comparison;
5116
5117  if (unsignedp)
5118    code = unsigned_condition (code);
5119
5120  /* If one operand is constant, make it the second one.  Only do this
5121     if the other operand is not constant as well.  */
5122
5123  if (swap_commutative_operands_p (op0, op1))
5124    {
5125      tem = op0;
5126      op0 = op1;
5127      op1 = tem;
5128      code = swap_condition (code);
5129    }
5130
5131  if (mode == VOIDmode)
5132    mode = GET_MODE (op0);
5133
5134  /* For some comparisons with 1 and -1, we can convert this to
5135     comparisons with zero.  This will often produce more opportunities for
5136     store-flag insns.  */
5137
5138  switch (code)
5139    {
5140    case LT:
5141      if (op1 == const1_rtx)
5142	op1 = const0_rtx, code = LE;
5143      break;
5144    case LE:
5145      if (op1 == constm1_rtx)
5146	op1 = const0_rtx, code = LT;
5147      break;
5148    case GE:
5149      if (op1 == const1_rtx)
5150	op1 = const0_rtx, code = GT;
5151      break;
5152    case GT:
5153      if (op1 == constm1_rtx)
5154	op1 = const0_rtx, code = GE;
5155      break;
5156    case GEU:
5157      if (op1 == const1_rtx)
5158	op1 = const0_rtx, code = NE;
5159      break;
5160    case LTU:
5161      if (op1 == const1_rtx)
5162	op1 = const0_rtx, code = EQ;
5163      break;
5164    default:
5165      break;
5166    }
5167
5168  /* If we are comparing a double-word integer with zero or -1, we can
5169     convert the comparison into one involving a single word.  */
5170  if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5171      && GET_MODE_CLASS (mode) == MODE_INT
5172      && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5173    {
5174      if ((code == EQ || code == NE)
5175	  && (op1 == const0_rtx || op1 == constm1_rtx))
5176	{
5177	  rtx op00, op01, op0both;
5178
5179	  /* Do a logical OR or AND of the two words and compare the result.  */
5180	  op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5181	  op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5182	  op0both = expand_binop (word_mode,
5183				  op1 == const0_rtx ? ior_optab : and_optab,
5184				  op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5185
5186	  if (op0both != 0)
5187	    return emit_store_flag (target, code, op0both, op1, word_mode,
5188				    unsignedp, normalizep);
5189	}
5190      else if ((code == LT || code == GE) && op1 == const0_rtx)
5191	{
5192	  rtx op0h;
5193
5194	  /* If testing the sign bit, can just test on high word.  */
5195	  op0h = simplify_gen_subreg (word_mode, op0, mode,
5196				      subreg_highpart_offset (word_mode, mode));
5197	  return emit_store_flag (target, code, op0h, op1, word_mode,
5198				  unsignedp, normalizep);
5199	}
5200    }
5201
5202  /* From now on, we won't change CODE, so set ICODE now.  */
5203  icode = setcc_gen_code[(int) code];
5204
5205  /* If this is A < 0 or A >= 0, we can do this by taking the ones
5206     complement of A (for GE) and shifting the sign bit to the low bit.  */
5207  if (op1 == const0_rtx && (code == LT || code == GE)
5208      && GET_MODE_CLASS (mode) == MODE_INT
5209      && (normalizep || STORE_FLAG_VALUE == 1
5210	  || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5211	      && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5212		  == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5213    {
5214      subtarget = target;
5215
5216      /* If the result is to be wider than OP0, it is best to convert it
5217	 first.  If it is to be narrower, it is *incorrect* to convert it
5218	 first.  */
5219      if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5220	{
5221	  op0 = convert_modes (target_mode, mode, op0, 0);
5222	  mode = target_mode;
5223	}
5224
5225      if (target_mode != mode)
5226	subtarget = 0;
5227
5228      if (code == GE)
5229	op0 = expand_unop (mode, one_cmpl_optab, op0,
5230			   ((STORE_FLAG_VALUE == 1 || normalizep)
5231			    ? 0 : subtarget), 0);
5232
5233      if (STORE_FLAG_VALUE == 1 || normalizep)
5234	/* If we are supposed to produce a 0/1 value, we want to do
5235	   a logical shift from the sign bit to the low-order bit; for
5236	   a -1/0 value, we do an arithmetic shift.  */
5237	op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5238			    size_int (GET_MODE_BITSIZE (mode) - 1),
5239			    subtarget, normalizep != -1);
5240
5241      if (mode != target_mode)
5242	op0 = convert_modes (target_mode, mode, op0, 0);
5243
5244      return op0;
5245    }
5246
5247  if (icode != CODE_FOR_nothing)
5248    {
5249      insn_operand_predicate_fn pred;
5250
5251      /* We think we may be able to do this with a scc insn.  Emit the
5252	 comparison and then the scc insn.  */
5253
5254      do_pending_stack_adjust ();
5255      last = get_last_insn ();
5256
5257      comparison
5258	= compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5259      if (CONSTANT_P (comparison))
5260	{
5261	  switch (GET_CODE (comparison))
5262	    {
5263	    case CONST_INT:
5264	      if (comparison == const0_rtx)
5265		return const0_rtx;
5266	      break;
5267
5268#ifdef FLOAT_STORE_FLAG_VALUE
5269	    case CONST_DOUBLE:
5270	      if (comparison == CONST0_RTX (GET_MODE (comparison)))
5271		return const0_rtx;
5272	      break;
5273#endif
5274	    default:
5275	      gcc_unreachable ();
5276	    }
5277
5278	  if (normalizep == 1)
5279	    return const1_rtx;
5280	  if (normalizep == -1)
5281	    return constm1_rtx;
5282	  return const_true_rtx;
5283	}
5284
5285      /* The code of COMPARISON may not match CODE if compare_from_rtx
5286	 decided to swap its operands and reverse the original code.
5287
5288	 We know that compare_from_rtx returns either a CONST_INT or
5289	 a new comparison code, so it is safe to just extract the
5290	 code from COMPARISON.  */
5291      code = GET_CODE (comparison);
5292
5293      /* Get a reference to the target in the proper mode for this insn.  */
5294      compare_mode = insn_data[(int) icode].operand[0].mode;
5295      subtarget = target;
5296      pred = insn_data[(int) icode].operand[0].predicate;
5297      if (optimize || ! (*pred) (subtarget, compare_mode))
5298	subtarget = gen_reg_rtx (compare_mode);
5299
5300      pattern = GEN_FCN (icode) (subtarget);
5301      if (pattern)
5302	{
5303	  emit_insn (pattern);
5304
5305	  /* If we are converting to a wider mode, first convert to
5306	     TARGET_MODE, then normalize.  This produces better combining
5307	     opportunities on machines that have a SIGN_EXTRACT when we are
5308	     testing a single bit.  This mostly benefits the 68k.
5309
5310	     If STORE_FLAG_VALUE does not have the sign bit set when
5311	     interpreted in COMPARE_MODE, we can do this conversion as
5312	     unsigned, which is usually more efficient.  */
5313	  if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5314	    {
5315	      convert_move (target, subtarget,
5316			    (GET_MODE_BITSIZE (compare_mode)
5317			     <= HOST_BITS_PER_WIDE_INT)
5318			    && 0 == (STORE_FLAG_VALUE
5319				     & ((HOST_WIDE_INT) 1
5320					<< (GET_MODE_BITSIZE (compare_mode) -1))));
5321	      op0 = target;
5322	      compare_mode = target_mode;
5323	    }
5324	  else
5325	    op0 = subtarget;
5326
5327	  /* If we want to keep subexpressions around, don't reuse our
5328	     last target.  */
5329
5330	  if (optimize)
5331	    subtarget = 0;
5332
5333	  /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
5334	     we don't have to do anything.  */
5335	  if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5336	    ;
5337	  /* STORE_FLAG_VALUE might be the most negative number, so write
5338	     the comparison this way to avoid a compiler-time warning.  */
5339	  else if (- normalizep == STORE_FLAG_VALUE)
5340	    op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5341
5342	  /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5343	     makes it hard to use a value of just the sign bit due to
5344	     ANSI integer constant typing rules.  */
5345	  else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5346		   && (STORE_FLAG_VALUE
5347		       & ((HOST_WIDE_INT) 1
5348			  << (GET_MODE_BITSIZE (compare_mode) - 1))))
5349	    op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5350				size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5351				subtarget, normalizep == 1);
5352	  else
5353	    {
5354	      gcc_assert (STORE_FLAG_VALUE & 1);
5355
5356	      op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5357	      if (normalizep == -1)
5358		op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5359	    }
5360
5361	  /* If we were converting to a smaller mode, do the
5362	     conversion now.  */
5363	  if (target_mode != compare_mode)
5364	    {
5365	      convert_move (target, op0, 0);
5366	      return target;
5367	    }
5368	  else
5369	    return op0;
5370	}
5371    }
5372
5373  delete_insns_since (last);
5374
5375  /* If optimizing, use different pseudo registers for each insn, instead
5376     of reusing the same pseudo.  This leads to better CSE, but slows
5377     down the compiler, since there are more pseudos */
5378  subtarget = (!optimize
5379	       && (target_mode == mode)) ? target : NULL_RTX;
5380
5381  /* If we reached here, we can't do this with a scc insn.  However, there
5382     are some comparisons that can be done directly.  For example, if
5383     this is an equality comparison of integers, we can try to exclusive-or
5384     (or subtract) the two operands and use a recursive call to try the
5385     comparison with zero.  Don't do any of these cases if branches are
5386     very cheap.  */
5387
5388  if (BRANCH_COST > 0
5389      && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5390      && op1 != const0_rtx)
5391    {
5392      tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5393			  OPTAB_WIDEN);
5394
5395      if (tem == 0)
5396	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5397			    OPTAB_WIDEN);
5398      if (tem != 0)
5399	tem = emit_store_flag (target, code, tem, const0_rtx,
5400			       mode, unsignedp, normalizep);
5401      if (tem == 0)
5402	delete_insns_since (last);
5403      return tem;
5404    }
5405
5406  /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5407     the constant zero.  Reject all other comparisons at this point.  Only
5408     do LE and GT if branches are expensive since they are expensive on
5409     2-operand machines.  */
5410
5411  if (BRANCH_COST == 0
5412      || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5413      || (code != EQ && code != NE
5414	  && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5415    return 0;
5416
5417  /* See what we need to return.  We can only return a 1, -1, or the
5418     sign bit.  */
5419
5420  if (normalizep == 0)
5421    {
5422      if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5423	normalizep = STORE_FLAG_VALUE;
5424
5425      else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5426	       && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5427		   == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5428	;
5429      else
5430	return 0;
5431    }
5432
5433  /* Try to put the result of the comparison in the sign bit.  Assume we can't
5434     do the necessary operation below.  */
5435
5436  tem = 0;
5437
5438  /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5439     the sign bit set.  */
5440
5441  if (code == LE)
5442    {
5443      /* This is destructive, so SUBTARGET can't be OP0.  */
5444      if (rtx_equal_p (subtarget, op0))
5445	subtarget = 0;
5446
5447      tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5448			  OPTAB_WIDEN);
5449      if (tem)
5450	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5451			    OPTAB_WIDEN);
5452    }
5453
5454  /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5455     number of bits in the mode of OP0, minus one.  */
5456
5457  if (code == GT)
5458    {
5459      if (rtx_equal_p (subtarget, op0))
5460	subtarget = 0;
5461
5462      tem = expand_shift (RSHIFT_EXPR, mode, op0,
5463			  size_int (GET_MODE_BITSIZE (mode) - 1),
5464			  subtarget, 0);
5465      tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5466			  OPTAB_WIDEN);
5467    }
5468
5469  if (code == EQ || code == NE)
5470    {
5471      /* For EQ or NE, one way to do the comparison is to apply an operation
5472	 that converts the operand into a positive number if it is nonzero
5473	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5474	 for NE we negate.  This puts the result in the sign bit.  Then we
5475	 normalize with a shift, if needed.
5476
5477	 Two operations that can do the above actions are ABS and FFS, so try
5478	 them.  If that doesn't work, and MODE is smaller than a full word,
5479	 we can use zero-extension to the wider mode (an unsigned conversion)
5480	 as the operation.  */
5481
5482      /* Note that ABS doesn't yield a positive number for INT_MIN, but
5483	 that is compensated by the subsequent overflow when subtracting
5484	 one / negating.  */
5485
5486      if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5487	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5488      else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5489	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5490      else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5491	{
5492	  tem = convert_modes (word_mode, mode, op0, 1);
5493	  mode = word_mode;
5494	}
5495
5496      if (tem != 0)
5497	{
5498	  if (code == EQ)
5499	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5500				0, OPTAB_WIDEN);
5501	  else
5502	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5503	}
5504
5505      /* If we couldn't do it that way, for NE we can "or" the two's complement
5506	 of the value with itself.  For EQ, we take the one's complement of
5507	 that "or", which is an extra insn, so we only handle EQ if branches
5508	 are expensive.  */
5509
5510      if (tem == 0 && (code == NE || BRANCH_COST > 1))
5511	{
5512	  if (rtx_equal_p (subtarget, op0))
5513	    subtarget = 0;
5514
5515	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5516	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5517			      OPTAB_WIDEN);
5518
5519	  if (tem && code == EQ)
5520	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5521	}
5522    }
5523
5524  if (tem && normalizep)
5525    tem = expand_shift (RSHIFT_EXPR, mode, tem,
5526			size_int (GET_MODE_BITSIZE (mode) - 1),
5527			subtarget, normalizep == 1);
5528
5529  if (tem)
5530    {
5531      if (GET_MODE (tem) != target_mode)
5532	{
5533	  convert_move (target, tem, 0);
5534	  tem = target;
5535	}
5536      else if (!subtarget)
5537	{
5538	  emit_move_insn (target, tem);
5539	  tem = target;
5540	}
5541    }
5542  else
5543    delete_insns_since (last);
5544
5545  return tem;
5546}
5547
5548/* Like emit_store_flag, but always succeeds.  */
5549
5550rtx
5551emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5552		       enum machine_mode mode, int unsignedp, int normalizep)
5553{
5554  rtx tem, label;
5555
5556  /* First see if emit_store_flag can do the job.  */
5557  tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5558  if (tem != 0)
5559    return tem;
5560
5561  if (normalizep == 0)
5562    normalizep = 1;
5563
5564  /* If this failed, we have to do this with set/compare/jump/set code.  */
5565
5566  if (!REG_P (target)
5567      || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5568    target = gen_reg_rtx (GET_MODE (target));
5569
5570  emit_move_insn (target, const1_rtx);
5571  label = gen_label_rtx ();
5572  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5573			   NULL_RTX, label);
5574
5575  emit_move_insn (target, const0_rtx);
5576  emit_label (label);
5577
5578  return target;
5579}
5580
5581/* Perform possibly multi-word comparison and conditional jump to LABEL
5582   if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5583   now a thin wrapper around do_compare_rtx_and_jump.  */
5584
5585static void
5586do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5587		 rtx label)
5588{
5589  int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5590  do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5591			   NULL_RTX, NULL_RTX, label);
5592}
5593