P9InstrResources.td revision 360784
1//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the resources required by P9 instructions. This is part of
10// the P9 processor model used for instruction scheduling. This file should
11// contain all the instructions that may be used on Power 9. This is not
12// just instructions that are new on Power 9 but also instructions that were
13// available on earlier architectures and are still used in Power 9.
14//
15// The makeup of the P9 CPU is modeled as follows:
16//   - Each CPU is made up of two superslices.
17//   - Each superslice is made up of two slices. Therefore, there are 4 slices
18//   for each CPU.
19//   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
20//   - Each CPU has:
21//     - One CY (Crypto) unit P9_CY_*
22//     - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23//     - Two PM (Permute) units. One on each superslice. P9_PM_*
24//     - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25//     - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26//     - Four DP (Floating Point) units. One on each slice. P9_DP_*
27//       This also includes fixed point multiply add.
28//     - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29//     - Four Load/Store Queues. P9_LS_*
30//   - Each set of instructions will require a number of these resources.
31//===----------------------------------------------------------------------===//
32
33// Two cycle ALU vector operation that uses an entire superslice.
34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
37      (instrs
38    (instregex "VADDU(B|H|W|D)M$"),
39    (instregex "VAND(C)?$"),
40    (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
41    (instregex "V_SET0(B|H)?$"),
42    (instregex "VS(R|L)(B|H|W|D)$"),
43    (instregex "VSUBU(B|H|W|D)M$"),
44    (instregex "VPOPCNT(B|H)$"),
45    (instregex "VRL(B|H|W|D)$"),
46    (instregex "VSRA(B|H|W|D)$"),
47    (instregex "XV(N)?ABS(D|S)P$"),
48    (instregex "XVCPSGN(D|S)P$"),
49    (instregex "XV(I|X)EXP(D|S)P$"),
50    (instregex "VRL(D|W)(MI|NM)$"),
51    (instregex "VMRG(E|O)W$"),
52    MTVSRDD,
53    VEQV,
54    VNAND,
55    VNEGD,
56    VNEGW,
57    VNOR,
58    VOR,
59    VORC,
60    VSEL,
61    VXOR,
62    XVNEGDP,
63    XVNEGSP,
64    XXLAND,
65    XXLANDC,
66    XXLEQV,
67    XXLEQVOnes,
68    XXLNAND,
69    XXLNOR,
70    XXLOR,
71    XXLORf,
72    XXLORC,
73    XXLXOR,
74    XXLXORdpz,
75    XXLXORspz,
76    XXLXORz,
77    XXSEL,
78    XSABSQP,
79    XSCPSGNQP,
80    XSIEXPQP,
81    XSNABSQP,
82    XSNEGQP,
83    XSXEXPQP
84)>;
85
86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
87// single slice. However, since it is Restricted, it requires all 3 dispatches
88// (DISP) for that superslice.
89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
90      (instrs
91    (instregex "TABORT(D|W)C(I)?$"),
92    (instregex "MTFSB(0|1)$"),
93    (instregex "MFFSC(D)?RN(I)?$"),
94    (instregex "CMPRB(8)?$"),
95    (instregex "TD(I)?$"),
96    (instregex "TW(I)?$"),
97    (instregex "FCMPU(S|D)$"),
98    (instregex "XSTSTDC(S|D)P$"),
99    FTDIV,
100    FTSQRT,
101    CMPEQB
102)>;
103
104// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
106      (instrs
107    (instregex "XSMAX(C|J)?DP$"),
108    (instregex "XSMIN(C|J)?DP$"),
109    (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
110    (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"),
111    (instregex "POPCNT(D|W)$"),
112    (instregex "CMPB(8)?$"),
113    (instregex "SETB(8)?$"),
114    XSTDIVDP,
115    XSTSQRTDP,
116    XSXSIGDP,
117    XSCVSPDPN,
118    BPERMD
119)>;
120
121// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
123      (instrs
124    (instregex "S(L|R)D$"),
125    (instregex "SRAD(I)?$"),
126    (instregex "EXTSWSLI_32_64$"),
127    (instregex "MFV(S)?RD$"),
128    (instregex "MTV(S)?RD$"),
129    (instregex "MTV(S)?RW(A|Z)$"),
130    (instregex "CMP(WI|LWI|W|LW)(8)?$"),
131    (instregex "CMP(L)?D(I)?$"),
132    (instregex "SUBF(I)?C(8)?(O)?$"),
133    (instregex "ANDI(S)?(8)?(_rec)?$"),
134    (instregex "ADDC(8)?(O)?$"),
135    (instregex "ADDIC(8)?(_rec)?$"),
136    (instregex "ADD(8|4)(O)?(_rec)?$"),
137    (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"),
138    (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"),
139    (instregex "NEG(8)?(O)?(_rec)?$"),
140    (instregex "POPCNTB$"),
141    (instregex "ADD(I|IS)?(8)?$"),
142    (instregex "LI(S)?(8)?$"),
143    (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"),
144    (instregex "NAND(8)?(_rec)?$"),
145    (instregex "AND(C)?(8)?(_rec)?$"),
146    (instregex "NOR(8)?(_rec)?$"),
147    (instregex "OR(C)?(8)?(_rec)?$"),
148    (instregex "EQV(8)?(_rec)?$"),
149    (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
150    (instregex "ADD(4|8)(TLS)?(_)?$"),
151    (instregex "NEG(8)?(O)?$"),
152    (instregex "ADDI(S)?toc(HA|L)(8)?$"),
153    COPY,
154    MCRF,
155    MCRXRX,
156    XSNABSDP,
157    XSXEXPDP,
158    XSABSDP,
159    XSNEGDP,
160    XSCPSGNDP,
161    MFVSRWZ,
162    MFVRWZ,
163    EXTSWSLI,
164    SRADI_32,
165    RLDIC,
166    RFEBB,
167    LA,
168    TBEGIN,
169    TRECHKPT,
170    NOP,
171    WAIT
172)>;
173
174// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
175// single slice. However, since it is Restricted, it requires all 3 dispatches
176// (DISP) for that superslice.
177def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
178      (instrs
179    (instregex "RLDC(L|R)$"),
180    (instregex "RLWIMI(8)?$"),
181    (instregex "RLDIC(L|R)(_32)?(_64)?$"),
182    (instregex "M(F|T)OCRF(8)?$"),
183    (instregex "CR(6)?(UN)?SET$"),
184    (instregex "CR(N)?(OR|AND)(C)?$"),
185    (instregex "S(L|R)W(8)?$"),
186    (instregex "RLW(INM|NM)(8)?$"),
187    (instregex "F(N)?ABS(D|S)$"),
188    (instregex "FNEG(D|S)$"),
189    (instregex "FCPSGN(D|S)$"),
190    (instregex "SRAW(I)?$"),
191    (instregex "ISEL(8)?$"),
192    RLDIMI,
193    XSIEXPDP,
194    FMR,
195    CREQV,
196    CRXOR,
197    TRECLAIM,
198    TSR,
199    TABORT
200)>;
201
202// Three cycle ALU vector operation that uses an entire superslice.
203// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
204// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
205def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
206      (instrs
207    (instregex "M(T|F)VSCR$"),
208    (instregex "VCMPNEZ(B|H|W)$"),
209    (instregex "VCMPEQU(B|H|W|D)$"),
210    (instregex "VCMPNE(B|H|W)$"),
211    (instregex "VABSDU(B|H|W)$"),
212    (instregex "VADDU(B|H|W)S$"),
213    (instregex "VAVG(S|U)(B|H|W)$"),
214    (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"),
215    (instregex "VCMPBFP(_rec)?$"),
216    (instregex "VC(L|T)Z(B|H|W|D)$"),
217    (instregex "VADDS(B|H|W)S$"),
218    (instregex "V(MIN|MAX)FP$"),
219    (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
220    VBPERMD,
221    VADDCUW,
222    VPOPCNTW,
223    VPOPCNTD,
224    VPRTYBD,
225    VPRTYBW,
226    VSHASIGMAD,
227    VSHASIGMAW,
228    VSUBSBS,
229    VSUBSHS,
230    VSUBSWS,
231    VSUBUBS,
232    VSUBUHS,
233    VSUBUWS,
234    VSUBCUW,
235    VCMPGTSB,
236    VCMPGTSB_rec,
237    VCMPGTSD,
238    VCMPGTSD_rec,
239    VCMPGTSH,
240    VCMPGTSH_rec,
241    VCMPGTSW,
242    VCMPGTSW_rec,
243    VCMPGTUB,
244    VCMPGTUB_rec,
245    VCMPGTUD,
246    VCMPGTUD_rec,
247    VCMPGTUH,
248    VCMPGTUH_rec,
249    VCMPGTUW,
250    VCMPGTUW_rec,
251    VCMPNEB_rec,
252    VCMPNEH_rec,
253    VCMPNEW_rec,
254    VCMPNEZB_rec,
255    VCMPNEZH_rec,
256    VCMPNEZW_rec,
257    VCMPEQUB_rec,
258    VCMPEQUD_rec,
259    VCMPEQUH_rec,
260    VCMPEQUW_rec,
261    XVCMPEQDP,
262    XVCMPEQDP_rec,
263    XVCMPEQSP,
264    XVCMPEQSP_rec,
265    XVCMPGEDP,
266    XVCMPGEDP_rec,
267    XVCMPGESP,
268    XVCMPGESP_rec,
269    XVCMPGTDP,
270    XVCMPGTDP_rec,
271    XVCMPGTSP,
272    XVCMPGTSP_rec,
273    XVMAXDP,
274    XVMAXSP,
275    XVMINDP,
276    XVMINSP,
277    XVTDIVDP,
278    XVTDIVSP,
279    XVTSQRTDP,
280    XVTSQRTSP,
281    XVTSTDCDP,
282    XVTSTDCSP,
283    XVXSIGDP,
284    XVXSIGSP
285)>;
286
287// 7 cycle DP vector operation that uses an entire superslice.
288// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
289// EXECO) and all three dispatches (DISP) to the given superslice.
290def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
291      (instrs
292    VADDFP,
293    VCTSXS,
294    VCTSXS_0,
295    VCTUXS,
296    VCTUXS_0,
297    VEXPTEFP,
298    VLOGEFP,
299    VMADDFP,
300    VMHADDSHS,
301    VNMSUBFP,
302    VREFP,
303    VRFIM,
304    VRFIN,
305    VRFIP,
306    VRFIZ,
307    VRSQRTEFP,
308    VSUBFP,
309    XVADDDP,
310    XVADDSP,
311    XVCVDPSP,
312    XVCVDPSXDS,
313    XVCVDPSXWS,
314    XVCVDPUXDS,
315    XVCVDPUXWS,
316    XVCVHPSP,
317    XVCVSPDP,
318    XVCVSPHP,
319    XVCVSPSXDS,
320    XVCVSPSXWS,
321    XVCVSPUXDS,
322    XVCVSPUXWS,
323    XVCVSXDDP,
324    XVCVSXDSP,
325    XVCVSXWDP,
326    XVCVSXWSP,
327    XVCVUXDDP,
328    XVCVUXDSP,
329    XVCVUXWDP,
330    XVCVUXWSP,
331    XVMADDADP,
332    XVMADDASP,
333    XVMADDMDP,
334    XVMADDMSP,
335    XVMSUBADP,
336    XVMSUBASP,
337    XVMSUBMDP,
338    XVMSUBMSP,
339    XVMULDP,
340    XVMULSP,
341    XVNMADDADP,
342    XVNMADDASP,
343    XVNMADDMDP,
344    XVNMADDMSP,
345    XVNMSUBADP,
346    XVNMSUBASP,
347    XVNMSUBMDP,
348    XVNMSUBMSP,
349    XVRDPI,
350    XVRDPIC,
351    XVRDPIM,
352    XVRDPIP,
353    XVRDPIZ,
354    XVREDP,
355    XVRESP,
356    XVRSPI,
357    XVRSPIC,
358    XVRSPIM,
359    XVRSPIP,
360    XVRSPIZ,
361    XVRSQRTEDP,
362    XVRSQRTESP,
363    XVSUBDP,
364    XVSUBSP,
365    VCFSX,
366    VCFSX_0,
367    VCFUX,
368    VCFUX_0,
369    VMHRADDSHS,
370    VMLADDUHM,
371    VMSUMMBM,
372    VMSUMSHM,
373    VMSUMSHS,
374    VMSUMUBM,
375    VMSUMUHM,
376    VMSUMUHS,
377    VMULESB,
378    VMULESH,
379    VMULESW,
380    VMULEUB,
381    VMULEUH,
382    VMULEUW,
383    VMULOSB,
384    VMULOSH,
385    VMULOSW,
386    VMULOUB,
387    VMULOUH,
388    VMULOUW,
389    VMULUWM,
390    VSUM2SWS,
391    VSUM4SBS,
392    VSUM4SHS,
393    VSUM4UBS,
394    VSUMSWS
395)>;
396
397// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
398// dispatch units for the superslice.
399def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
400      (instrs
401    (instregex "MADD(HD|HDU|LD|LD8)$"),
402    (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$")
403)>;
404
405// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
406// dispatch units for the superslice.
407def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
408      (instrs
409    FRSP,
410    (instregex "FRI(N|P|Z|M)(D|S)$"),
411    (instregex "FRE(S)?$"),
412    (instregex "FADD(S)?$"),
413    (instregex "FMSUB(S)?$"),
414    (instregex "FMADD(S)?$"),
415    (instregex "FSUB(S)?$"),
416    (instregex "FCFID(U)?(S)?$"),
417    (instregex "FCTID(U)?(Z)?$"),
418    (instregex "FCTIW(U)?(Z)?$"),
419    (instregex "FRSQRTE(S)?$"),
420    FNMADDS,
421    FNMADD,
422    FNMSUBS,
423    FNMSUB,
424    FSELD,
425    FSELS,
426    FMULS,
427    FMUL,
428    XSMADDADP,
429    XSMADDASP,
430    XSMADDMDP,
431    XSMADDMSP,
432    XSMSUBADP,
433    XSMSUBASP,
434    XSMSUBMDP,
435    XSMSUBMSP,
436    XSMULDP,
437    XSMULSP,
438    XSNMADDADP,
439    XSNMADDASP,
440    XSNMADDMDP,
441    XSNMADDMSP,
442    XSNMSUBADP,
443    XSNMSUBASP,
444    XSNMSUBMDP,
445    XSNMSUBMSP
446)>;
447
448// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
449// These operations can be done in parallel. The DP is restricted so we need a
450// full 4 dispatches.
451def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
452              DISP_3SLOTS_1C, DISP_1C],
453      (instrs
454    (instregex "FSEL(D|S)_rec$")
455)>;
456
457// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
458def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
459              DISP_3SLOTS_1C, DISP_1C],
460      (instrs
461    (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$")
462)>;
463
464// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
465// These operations must be done sequentially.The DP is restricted so we need a
466// full 4 dispatches.
467def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
468              DISP_3SLOTS_1C, DISP_1C],
469      (instrs
470    (instregex "FRI(N|P|Z|M)(D|S)_rec$"),
471    (instregex "FRE(S)?_rec$"),
472    (instregex "FADD(S)?_rec$"),
473    (instregex "FSUB(S)?_rec$"),
474    (instregex "F(N)?MSUB(S)?_rec$"),
475    (instregex "F(N)?MADD(S)?_rec$"),
476    (instregex "FCFID(U)?(S)?_rec$"),
477    (instregex "FCTID(U)?(Z)?_rec$"),
478    (instregex "FCTIW(U)?(Z)?_rec$"),
479    (instregex "FMUL(S)?_rec$"),
480    (instregex "FRSQRTE(S)?_rec$"),
481    FRSP_rec
482)>;
483
484// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
485def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
486      (instrs
487    XSADDDP,
488    XSADDSP,
489    XSCVDPHP,
490    XSCVDPSP,
491    XSCVDPSXDS,
492    XSCVDPSXDSs,
493    XSCVDPSXWS,
494    XSCVDPUXDS,
495    XSCVDPUXDSs,
496    XSCVDPUXWS,
497    XSCVDPSXWSs,
498    XSCVDPUXWSs,
499    XSCVHPDP,
500    XSCVSPDP,
501    XSCVSXDDP,
502    XSCVSXDSP,
503    XSCVUXDDP,
504    XSCVUXDSP,
505    XSRDPI,
506    XSRDPIC,
507    XSRDPIM,
508    XSRDPIP,
509    XSRDPIZ,
510    XSREDP,
511    XSRESP,
512    XSRSQRTEDP,
513    XSRSQRTESP,
514    XSSUBDP,
515    XSSUBSP,
516    XSCVDPSPN,
517    XSRSP
518)>;
519
520// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
521// superslice. That includes both exec pipelines (EXECO, EXECE) and one
522// dispatch.
523def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
524      (instrs
525    (instregex "LVS(L|R)$"),
526    (instregex "VSPLTIS(W|H|B)$"),
527    (instregex "VSPLT(W|H|B)(s)?$"),
528    (instregex "V_SETALLONES(B|H)?$"),
529    (instregex "VEXTRACTU(B|H|W)$"),
530    (instregex "VINSERT(B|H|W|D)$"),
531    MFVSRLD,
532    MTVSRWS,
533    VBPERMQ,
534    VCLZLSBB,
535    VCTZLSBB,
536    VEXTRACTD,
537    VEXTUBLX,
538    VEXTUBRX,
539    VEXTUHLX,
540    VEXTUHRX,
541    VEXTUWLX,
542    VEXTUWRX,
543    VGBBD,
544    VMRGHB,
545    VMRGHH,
546    VMRGHW,
547    VMRGLB,
548    VMRGLH,
549    VMRGLW,
550    VPERM,
551    VPERMR,
552    VPERMXOR,
553    VPKPX,
554    VPKSDSS,
555    VPKSDUS,
556    VPKSHSS,
557    VPKSHUS,
558    VPKSWSS,
559    VPKSWUS,
560    VPKUDUM,
561    VPKUDUS,
562    VPKUHUM,
563    VPKUHUS,
564    VPKUWUM,
565    VPKUWUS,
566    VPRTYBQ,
567    VSL,
568    VSLDOI,
569    VSLO,
570    VSLV,
571    VSR,
572    VSRO,
573    VSRV,
574    VUPKHPX,
575    VUPKHSB,
576    VUPKHSH,
577    VUPKHSW,
578    VUPKLPX,
579    VUPKLSB,
580    VUPKLSH,
581    VUPKLSW,
582    XXBRD,
583    XXBRH,
584    XXBRQ,
585    XXBRW,
586    XXEXTRACTUW,
587    XXINSERTW,
588    XXMRGHW,
589    XXMRGLW,
590    XXPERM,
591    XXPERMR,
592    XXSLDWI,
593    XXSLDWIs,
594    XXSPLTIB,
595    XXSPLTW,
596    XXSPLTWs,
597    XXPERMDI,
598    XXPERMDIs,
599    VADDCUQ,
600    VADDECUQ,
601    VADDEUQM,
602    VADDUQM,
603    VMUL10CUQ,
604    VMUL10ECUQ,
605    VMUL10EUQ,
606    VMUL10UQ,
607    VSUBCUQ,
608    VSUBECUQ,
609    VSUBEUQM,
610    VSUBUQM,
611    XSCMPEXPQP,
612    XSCMPOQP,
613    XSCMPUQP,
614    XSTSTDCQP,
615    XSXSIGQP,
616    BCDCFN_rec,
617    BCDCFZ_rec,
618    BCDCPSGN_rec,
619    BCDCTN_rec,
620    BCDCTZ_rec,
621    BCDSETSGN_rec,
622    BCDS_rec,
623    BCDTRUNC_rec,
624    BCDUS_rec,
625    BCDUTRUNC_rec
626)>;
627
628// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
629// superslice. That includes both exec pipelines (EXECO, EXECE) and one
630// dispatch.
631def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
632      (instrs
633    BCDSR_rec,
634    XSADDQP,
635    XSADDQPO,
636    XSCVDPQP,
637    XSCVQPDP,
638    XSCVQPDPO,
639    XSCVQPSDZ,
640    XSCVQPSWZ,
641    XSCVQPUDZ,
642    XSCVQPUWZ,
643    XSCVSDQP,
644    XSCVUDQP,
645    XSRQPI,
646    XSRQPIX,
647    XSRQPXP,
648    XSSUBQP,
649    XSSUBQPO
650)>;
651
652// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
653// superslice. That includes both exec pipelines (EXECO, EXECE) and one
654// dispatch.
655def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
656      (instrs
657    BCDCTSQ_rec
658)>;
659
660// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
661// superslice. That includes both exec pipelines (EXECO, EXECE) and one
662// dispatch.
663def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
664      (instrs
665    XSMADDQP,
666    XSMADDQPO,
667    XSMSUBQP,
668    XSMSUBQPO,
669    XSMULQP,
670    XSMULQPO,
671    XSNMADDQP,
672    XSNMADDQPO,
673    XSNMSUBQP,
674    XSNMSUBQPO
675)>;
676
677// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
678// superslice. That includes both exec pipelines (EXECO, EXECE) and one
679// dispatch.
680def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
681      (instrs
682    BCDCFSQ_rec
683)>;
684
685// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
686// superslice. That includes both exec pipelines (EXECO, EXECE) and one
687// dispatch.
688def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
689      (instrs
690    XSDIVQP,
691    XSDIVQPO
692)>;
693
694// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
695// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
696// dispatches.
697def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
698      (instrs
699    XSSQRTQP,
700    XSSQRTQPO
701)>;
702
703// 6 Cycle Load uses a single slice.
704def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
705      (instrs
706    (instregex "LXVL(L)?")
707)>;
708
709// 5 Cycle Load uses a single slice.
710def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
711      (instrs
712    (instregex "LVE(B|H|W)X$"),
713    (instregex "LVX(L)?"),
714    (instregex "LXSI(B|H)ZX$"),
715    LXSDX,
716    LXVB16X,
717    LXVD2X,
718    LXVWSX,
719    LXSIWZX,
720    LXV,
721    LXVX,
722    LXSD,
723    DFLOADf64,
724    XFLOADf64,
725    LIWZX
726)>;
727
728// 4 Cycle Load uses a single slice.
729def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
730      (instrs
731    (instregex "DCB(F|T|ST)(EP)?$"),
732    (instregex "DCBZ(L)?(EP)?$"),
733    (instregex "DCBTST(EP)?$"),
734    (instregex "CP_COPY(8)?$"),
735    (instregex "CP_PASTE(8)?$"),
736    (instregex "ICBI(EP)?$"),
737    (instregex "ICBT(LS)?$"),
738    (instregex "LBARX(L)?$"),
739    (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
740    (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
741    (instregex "LH(A|B)RX(L)?(8)?$"),
742    (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
743    (instregex "LWARX(L)?$"),
744    (instregex "LWBRX(8)?$"),
745    (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
746    CP_ABORT,
747    DARN,
748    EnforceIEIO,
749    ISYNC,
750    MSGSYNC,
751    TLBSYNC,
752    SYNC,
753    LMW,
754    LSWI
755)>;
756
757// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
758// superslice.
759def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
760      (instrs
761    LFIWZX,
762    LFDX,
763    LFD
764)>;
765
766// Cracked Load Instructions.
767// Load instructions that can be done in parallel.
768def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
769              DISP_PAIR_1C],
770      (instrs
771    SLBIA,
772    SLBIE,
773    SLBMFEE,
774    SLBMFEV,
775    SLBMTE,
776    TLBIEL
777)>;
778
779// Cracked Load Instruction.
780// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
781// operations can be run in parallel.
782def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
783              DISP_PAIR_1C, DISP_PAIR_1C],
784      (instrs
785    (instregex "L(W|H)ZU(X)?(8)?$")
786)>;
787
788// Cracked TEND Instruction.
789// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
790// operations can be run in parallel.
791def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
792              DISP_1C, DISP_1C],
793      (instrs
794    TEND
795)>;
796
797
798// Cracked Store Instruction
799// Consecutive Store and ALU instructions. The store is restricted and requires
800// three dispatches.
801def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
802              DISP_3SLOTS_1C, DISP_1C],
803      (instrs
804    (instregex "ST(B|H|W|D)CX$")
805)>;
806
807// Cracked Load Instruction.
808// Two consecutive load operations for a total of 8 cycles.
809def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
810              DISP_1C, DISP_1C],
811      (instrs
812    LDMX
813)>;
814
815// Cracked Load instruction.
816// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
817// operations cannot be done at the same time and so their latencies are added.
818def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
819              DISP_1C, DISP_1C],
820      (instrs
821    (instregex "LHA(X)?(8)?$"),
822    (instregex "CP_PASTE(8)?_rec$"),
823    (instregex "LWA(X)?(_32)?$"),
824    TCHECK
825)>;
826
827// Cracked Restricted Load instruction.
828// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
829// operations cannot be done at the same time and so their latencies are added.
830// Full 6 dispatches are required as this is both cracked and restricted.
831def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
832              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
833      (instrs
834    LFIWAX
835)>;
836
837// Cracked Load instruction.
838// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
839// operations cannot be done at the same time and so their latencies are added.
840// Full 4 dispatches are required as this is a cracked instruction.
841def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
842      (instrs
843    LXSIWAX,
844    LIWAX
845)>;
846
847// Cracked Load instruction.
848// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
849// cycles. The Load and ALU operations cannot be done at the same time and so
850// their latencies are added.
851// Full 6 dispatches are required as this is a restricted instruction.
852def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
853              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
854      (instrs
855    LFSX,
856    LFS
857)>;
858
859// Cracked Load instruction.
860// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
861// operations cannot be done at the same time and so their latencies are added.
862// Full 4 dispatches are required as this is a cracked instruction.
863def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
864      (instrs
865    LXSSP,
866    LXSSPX,
867    XFLOADf32,
868    DFLOADf32
869)>;
870
871// Cracked 3-Way Load Instruction
872// Load with two ALU operations that depend on each other
873def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
874              DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
875      (instrs
876    (instregex "LHAU(X)?(8)?$"),
877    LWAUX
878)>;
879
880// Cracked Load that requires the PM resource.
881// Since the Load and the PM cannot be done at the same time the latencies are
882// added. Requires 8 cycles. Since the PM requires the full superslice we need
883// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
884// requires the remaining 1 dispatch.
885def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
886              DISP_1C, DISP_1C],
887      (instrs
888    LXVH8X,
889    LXVDSX,
890    LXVW4X
891)>;
892
893// Single slice Restricted store operation. The restricted operation requires
894// all three dispatches for the superslice.
895def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
896      (instrs
897    (instregex "STF(S|D|IWX|SX|DX)$"),
898    (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
899    (instregex "STW(8)?$"),
900    (instregex "(D|X)FSTORE(f32|f64)$"),
901    (instregex "ST(W|H|D)BRX$"),
902    (instregex "ST(B|H|D)(8)?$"),
903    (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
904    STIWX,
905    SLBIEG,
906    STMW,
907    STSWI,
908    TLBIE
909)>;
910
911// Vector Store Instruction
912// Requires the whole superslice and therefore requires one dispatch
913// as well as both the Even and Odd exec pipelines.
914def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
915      (instrs
916    (instregex "STVE(B|H|W)X$"),
917    (instregex "STVX(L)?$"),
918    (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
919)>;
920
921// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
922// superslice. That includes both exec pipelines (EXECO, EXECE) and two
923// dispatches.
924def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
925      (instrs
926    (instregex "MTCTR(8)?(loop)?$"),
927    (instregex "MTLR(8)?$")
928)>;
929
930// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
931// superslice. That includes both exec pipelines (EXECO, EXECE) and two
932// dispatches.
933def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
934      (instrs
935    (instregex "M(T|F)VRSAVE(v)?$"),
936    (instregex "M(T|F)PMR$"),
937    (instregex "M(T|F)TB(8)?$"),
938    (instregex "MF(SPR|CTR|LR)(8)?$"),
939    (instregex "M(T|F)MSR(D)?$"),
940    (instregex "MTSPR(8)?$")
941)>;
942
943// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
944// superslice. That includes both exec pipelines (EXECO, EXECE) and two
945// dispatches.
946def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
947      (instrs
948    DIVW,
949    DIVWO,
950    DIVWU,
951    DIVWUO,
952    MODSW
953)>;
954
955// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
956// superslice. That includes both exec pipelines (EXECO, EXECE) and two
957// dispatches.
958def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
959      (instrs
960    DIVWE,
961    DIVWEO,
962    DIVD,
963    DIVDO,
964    DIVWEU,
965    DIVWEUO,
966    DIVDU,
967    DIVDUO,
968    MODSD,
969    MODUD,
970    MODUW
971)>;
972
973// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
974// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
975// dispatches.
976def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
977      (instrs
978    DIVDE,
979    DIVDEO,
980    DIVDEU,
981    DIVDEUO
982)>;
983
984// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
985// and one full superslice for the DIV operation since there is only one DIV per
986// superslice. Latency of DIV plus ALU is 26.
987def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
988              DISP_EVEN_1C, DISP_1C],
989      (instrs
990    (instregex "DIVW(U)?(O)?_rec$")
991)>;
992
993// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
994// and one full superslice for the DIV operation since there is only one DIV per
995// superslice. Latency of DIV plus ALU is 26.
996def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
997              DISP_EVEN_1C, DISP_1C],
998      (instrs
999    DIVD_rec,
1000    DIVDO_rec,
1001    DIVDU_rec,
1002    DIVDUO_rec,
1003    DIVWE_rec,
1004    DIVWEO_rec,
1005    DIVWEU_rec,
1006    DIVWEUO_rec
1007)>;
1008
1009// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
1010// and one full superslice for the DIV operation since there is only one DIV per
1011// superslice. Latency of DIV plus ALU is 42.
1012def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
1013              DISP_EVEN_1C, DISP_1C],
1014      (instrs
1015    DIVDE_rec,
1016    DIVDEO_rec,
1017    DIVDEU_rec,
1018    DIVDEUO_rec
1019)>;
1020
1021// CR access instructions in _BrMCR, IIC_BrMCRX.
1022
1023// Cracked, restricted, ALU operations.
1024// Here the two ALU ops can actually be done in parallel and therefore the
1025// latencies are not added together. Otherwise this is like having two
1026// instructions running together on two pipelines and 6 dispatches. ALU ops are
1027// 2 cycles each.
1028def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1029              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1030      (instrs
1031    MTCRF,
1032    MTCRF8
1033)>;
1034
1035// Cracked ALU operations.
1036// Here the two ALU ops can actually be done in parallel and therefore the
1037// latencies are not added together. Otherwise this is like having two
1038// instructions running together on two pipelines and 2 dispatches. ALU ops are
1039// 2 cycles each.
1040def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1041              DISP_1C, DISP_1C],
1042      (instrs
1043    (instregex "ADDC(8)?(O)?_rec$"),
1044    (instregex "SUBFC(8)?(O)?_rec$")
1045)>;
1046
1047// Cracked ALU operations.
1048// Two ALU ops can be done in parallel.
1049// One is three cycle ALU the ohter is a two cycle ALU.
1050// One of the ALU ops is restricted the other is not so we have a total of
1051// 5 dispatches.
1052def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1053              DISP_3SLOTS_1C, DISP_1C],
1054      (instrs
1055    (instregex "F(N)?ABS(D|S)_rec$"),
1056    (instregex "FCPSGN(D|S)_rec$"),
1057    (instregex "FNEG(D|S)_rec$"),
1058    FMR_rec
1059)>;
1060
1061// Cracked ALU operations.
1062// Here the two ALU ops can actually be done in parallel and therefore the
1063// latencies are not added together. Otherwise this is like having two
1064// instructions running together on two pipelines and 2 dispatches.
1065// ALU ops are 3 cycles each.
1066def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1067              DISP_1C, DISP_1C],
1068      (instrs
1069    MCRFS
1070)>;
1071
1072// Cracked Restricted ALU operations.
1073// Here the two ALU ops can actually be done in parallel and therefore the
1074// latencies are not added together. Otherwise this is like having two
1075// instructions running together on two pipelines and 6 dispatches.
1076// ALU ops are 3 cycles each.
1077def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1078              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1079      (instrs
1080    (instregex "MTFSF(b|_rec)?$"),
1081    (instregex "MTFSFI(_rec)?$")
1082)>;
1083
1084// Cracked instruction made of two ALU ops.
1085// The two ops cannot be done in parallel.
1086// One of the ALU ops is restricted and takes 3 dispatches.
1087def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1088              DISP_3SLOTS_1C, DISP_1C],
1089      (instrs
1090    (instregex "RLD(I)?C(R|L)_rec$"),
1091    (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
1092    (instregex "SLW(8)?_rec$"),
1093    (instregex "SRAW(I)?_rec$"),
1094    (instregex "SRW(8)?_rec$"),
1095    RLDICL_32_rec,
1096    RLDIMI_rec
1097)>;
1098
1099// Cracked instruction made of two ALU ops.
1100// The two ops cannot be done in parallel.
1101// Both of the ALU ops are restricted and take 3 dispatches.
1102def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
1103              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1104      (instrs
1105    (instregex "MFFS(L|CE|_rec)?$")
1106)>;
1107
1108// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
1109// total of 6 cycles. All of the ALU operations are also restricted so each
1110// takes 3 dispatches for a total of 9.
1111def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
1112              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1113      (instrs
1114    (instregex "MFCR(8)?$")
1115)>;
1116
1117// Cracked instruction made of two ALU ops.
1118// The two ops cannot be done in parallel.
1119def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
1120      (instrs
1121    (instregex "EXTSWSLI_32_64_rec$"),
1122    (instregex "SRAD(I)?_rec$"),
1123    EXTSWSLI_rec,
1124    SLD_rec,
1125    SRD_rec,
1126    RLDIC_rec
1127)>;
1128
1129// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1130def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
1131      (instrs
1132    FDIV
1133)>;
1134
1135// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1136def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
1137              DISP_3SLOTS_1C, DISP_1C],
1138      (instrs
1139    FDIV_rec
1140)>;
1141
1142// 36 Cycle DP Instruction.
1143// Instruction can be done on a single slice.
1144def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
1145      (instrs
1146    XSSQRTDP
1147)>;
1148
1149// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1150def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
1151      (instrs
1152    FSQRT
1153)>;
1154
1155// 36 Cycle DP Vector Instruction.
1156def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
1157              DISP_1C],
1158      (instrs
1159    XVSQRTDP
1160)>;
1161
1162// 27 Cycle DP Vector Instruction.
1163def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
1164              DISP_1C],
1165      (instrs
1166    XVSQRTSP
1167)>;
1168
1169// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1170def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
1171              DISP_3SLOTS_1C, DISP_1C],
1172      (instrs
1173    FSQRT_rec
1174)>;
1175
1176// 26 Cycle DP Instruction.
1177def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
1178      (instrs
1179    XSSQRTSP
1180)>;
1181
1182// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1183def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1184      (instrs
1185    FSQRTS
1186)>;
1187
1188// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1189def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
1190              DISP_3SLOTS_1C, DISP_1C],
1191      (instrs
1192    FSQRTS_rec
1193)>;
1194
1195// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
1196def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
1197      (instrs
1198    XSDIVDP
1199)>;
1200
1201// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1202def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1203      (instrs
1204    FDIVS
1205)>;
1206
1207// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
1208def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
1209              DISP_3SLOTS_1C, DISP_1C],
1210      (instrs
1211    FDIVS_rec
1212)>;
1213
1214// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
1215def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
1216      (instrs
1217    XSDIVSP
1218)>;
1219
1220// 24 Cycle DP Vector Instruction. Takes one full superslice.
1221// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1222// superslice.
1223def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
1224              DISP_1C],
1225      (instrs
1226    XVDIVSP
1227)>;
1228
1229// 33 Cycle DP Vector Instruction. Takes one full superslice.
1230// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1231// superslice.
1232def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
1233              DISP_1C],
1234      (instrs
1235    XVDIVDP
1236)>;
1237
1238// Instruction cracked into three pieces. One Load and two ALU operations.
1239// The Load and one of the ALU ops cannot be run at the same time and so the
1240// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
1241// Both the load and the ALU that depends on it are restricted and so they take
1242// a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
1243// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
1244def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
1245              IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1246              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
1247      (instrs
1248    (instregex "LF(SU|SUX)$")
1249)>;
1250
1251// Cracked instruction made up of a Store and an ALU. The ALU does not depend on
1252// the store and so it can be run at the same time as the store. The store is
1253// also restricted.
1254def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1255              DISP_3SLOTS_1C, DISP_1C],
1256      (instrs
1257    (instregex "STF(S|D)U(X)?$"),
1258    (instregex "ST(B|H|W|D)U(X)?(8)?$")
1259)>;
1260
1261// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1262// the load and so it can be run at the same time as the load.
1263def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1264              DISP_PAIR_1C, DISP_PAIR_1C],
1265      (instrs
1266    (instregex "LBZU(X)?(8)?$"),
1267    (instregex "LDU(X)?$")
1268)>;
1269
1270// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1271// the load and so it can be run at the same time as the load. The load is also
1272// restricted. 3 dispatches are from the restricted load while the other two
1273// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
1274// is required for the ALU.
1275def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1276              DISP_3SLOTS_1C, DISP_1C],
1277      (instrs
1278    (instregex "LF(DU|DUX)$")
1279)>;
1280
1281// Crypto Instructions
1282
1283// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
1284// superslice. That includes both exec pipelines (EXECO, EXECE) and one
1285// dispatch.
1286def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
1287      (instrs
1288    (instregex "VPMSUM(B|H|W|D)$"),
1289    (instregex "V(N)?CIPHER(LAST)?$"),
1290    VSBOX
1291)>;
1292
1293// Branch Instructions
1294
1295// Two Cycle Branch
1296def : InstRW<[P9_BR_2C, DISP_BR_1C],
1297      (instrs
1298  (instregex "BCCCTR(L)?(8)?$"),
1299  (instregex "BCCL(A|R|RL)?$"),
1300  (instregex "BCCTR(L)?(8)?(n)?$"),
1301  (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
1302  (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
1303  (instregex "BL(_TLS|_NOP)?$"),
1304  (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
1305  (instregex "BLA(8|8_NOP)?$"),
1306  (instregex "BLR(8|L)?$"),
1307  (instregex "TAILB(A)?(8)?$"),
1308  (instregex "TAILBCTR(8)?$"),
1309  (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
1310  (instregex "BCLR(L)?(n)?$"),
1311  (instregex "BCTR(L)?(8)?$"),
1312  B,
1313  BA,
1314  BC,
1315  BCC,
1316  BCCA,
1317  BCL,
1318  BCLalways,
1319  BCLn,
1320  BCTRL8_LDinto_toc,
1321  BCTRL_LWZinto_toc,
1322  BCn,
1323  CTRL_DEP
1324)>;
1325
1326// Five Cycle Branch with a 2 Cycle ALU Op
1327// Operations must be done consecutively and not in parallel.
1328def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
1329      (instrs
1330    ADDPCIS
1331)>;
1332
1333// Special Extracted Instructions For Atomics
1334
1335// Atomic Load
1336def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
1337              IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
1338              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 
1339              DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
1340      (instrs
1341    (instregex "L(D|W)AT$")
1342)>;
1343
1344// Atomic Store
1345def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
1346              IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
1347      (instrs
1348    (instregex "ST(D|W)AT$")
1349)>;
1350
1351// Signal Processing Engine (SPE) Instructions
1352// These instructions are not supported on Power 9
1353def : InstRW<[],
1354    (instrs
1355  BRINC,
1356  EVABS,
1357  EVEQV,
1358  EVMRA,
1359  EVNAND,
1360  EVNEG,
1361  (instregex "EVADD(I)?W$"),
1362  (instregex "EVADD(SM|SS|UM|US)IAAW$"),
1363  (instregex "EVAND(C)?$"),
1364  (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
1365  (instregex "EVCNTL(S|Z)W$"),
1366  (instregex "EVDIVW(S|U)$"),
1367  (instregex "EVEXTS(B|H)$"),
1368  (instregex "EVLD(H|W|D)(X)?$"),
1369  (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
1370  (instregex "EVLWHE(X)?$"),
1371  (instregex "EVLWHO(S|U)(X)?$"),
1372  (instregex "EVLW(H|W)SPLAT(X)?$"),
1373  (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
1374  (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
1375  (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1376  (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
1377  (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
1378  (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1379  (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
1380  (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
1381  (instregex "EVMWHUMI(A)?$"),
1382  (instregex "EVMWLS(M|S)IA(A|N)W$"),
1383  (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
1384  (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
1385  (instregex "EVMWSSF(A|AA|AN)?$"),
1386  (instregex "EVMWUMI(A|AA|AN)?$"),
1387  (instregex "EV(N|X)?OR(C)?$"),
1388  (instregex "EVR(LW|LWI|NDW)$"),
1389  (instregex "EVSLW(I)?$"),
1390  (instregex "EVSPLAT(F)?I$"),
1391  (instregex "EVSRW(I)?(S|U)$"),
1392  (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
1393  (instregex "EVSUBF(S|U)(M|S)IAAW$"),
1394  (instregex "EVSUB(I)?FW$")
1395)> { let Unsupported = 1; }
1396
1397// General Instructions without scheduling support.
1398def : InstRW<[],
1399    (instrs
1400  (instregex "(H)?RFI(D)?$"),
1401  (instregex "DSS(ALL)?$"),
1402  (instregex "DST(ST)?(T)?(64)?$"),
1403  (instregex "ICBL(C|Q)$"),
1404  (instregex "L(W|H|B)EPX$"),
1405  (instregex "ST(W|H|B)EPX$"),
1406  (instregex "(L|ST)FDEPX$"),
1407  (instregex "M(T|F)SR(IN)?$"),
1408  (instregex "M(T|F)DCR$"),
1409  (instregex "NOP_GT_PWR(6|7)$"),
1410  (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
1411  (instregex "WRTEE(I)?$"),
1412  ATTN,
1413  CLRBHRB,
1414  MFBHRBE,
1415  MBAR,
1416  MSYNC,
1417  SLBSYNC,
1418  SLBFEE_rec,
1419  NAP,
1420  STOP,
1421  TRAP,
1422  RFCI,
1423  RFDI,
1424  RFMCI,
1425  SC,
1426  DCBA,
1427  DCBI,
1428  DCCCI,
1429  ICCCI
1430)> { let Unsupported = 1; }
1431