1239313Sdim/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
2239313Sdim *
3239313Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy
4239313Sdim * of this software and associated documentation files (the "Software"), to deal
5239313Sdim * in the Software without restriction, including without limitation the rights
6239313Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7239313Sdim * copies of the Software, and to permit persons to whom the Software is
8239313Sdim * furnished to do so, subject to the following conditions:
9239313Sdim *
10239313Sdim * The above copyright notice and this permission notice shall be included in
11239313Sdim * all copies or substantial portions of the Software.
12239313Sdim *
13239313Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14239313Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15239313Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16239313Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17239313Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18239313Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19239313Sdim * THE SOFTWARE.
20239313Sdim *
21239313Sdim *===-----------------------------------------------------------------------===
22239313Sdim */
23239313Sdim
24239313Sdim#ifndef __IMMINTRIN_H
25239313Sdim#error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
26239313Sdim#endif
27239313Sdim
28239313Sdim#ifndef __FMAINTRIN_H
29239313Sdim#define __FMAINTRIN_H
30239313Sdim
31239313Sdim#ifndef __FMA__
32239313Sdim# error "FMA instruction set is not enabled"
33239313Sdim#else
34239313Sdim
35239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
36239313Sdim_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
37239313Sdim{
38239313Sdim  return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C);
39239313Sdim}
40239313Sdim
41239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
42239313Sdim_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
43239313Sdim{
44239313Sdim  return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C);
45239313Sdim}
46239313Sdim
47239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
48239313Sdim_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
49239313Sdim{
50239313Sdim  return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C);
51239313Sdim}
52239313Sdim
53239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
54239313Sdim_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
55239313Sdim{
56239313Sdim  return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C);
57239313Sdim}
58239313Sdim
59239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
60239313Sdim_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
61239313Sdim{
62239313Sdim  return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C);
63239313Sdim}
64239313Sdim
65239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
66239313Sdim_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
67239313Sdim{
68239313Sdim  return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C);
69239313Sdim}
70239313Sdim
71239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
72239313Sdim_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
73239313Sdim{
74239313Sdim  return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C);
75239313Sdim}
76239313Sdim
77239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
78239313Sdim_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
79239313Sdim{
80239313Sdim  return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C);
81239313Sdim}
82239313Sdim
83239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
84239313Sdim_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
85239313Sdim{
86239313Sdim  return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C);
87239313Sdim}
88239313Sdim
89239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
90239313Sdim_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
91239313Sdim{
92239313Sdim  return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C);
93239313Sdim}
94239313Sdim
95239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
96239313Sdim_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
97239313Sdim{
98239313Sdim  return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C);
99239313Sdim}
100239313Sdim
101239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
102239313Sdim_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
103239313Sdim{
104239313Sdim  return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C);
105239313Sdim}
106239313Sdim
107239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
108239313Sdim_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
109239313Sdim{
110239313Sdim  return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C);
111239313Sdim}
112239313Sdim
113239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
114239313Sdim_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
115239313Sdim{
116239313Sdim  return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C);
117239313Sdim}
118239313Sdim
119239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
120239313Sdim_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
121239313Sdim{
122239313Sdim  return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C);
123239313Sdim}
124239313Sdim
125239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
126239313Sdim_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
127239313Sdim{
128239313Sdim  return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C);
129239313Sdim}
130239313Sdim
131239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
132239313Sdim_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
133239313Sdim{
134239313Sdim  return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C);
135239313Sdim}
136239313Sdim
137239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
138239313Sdim_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
139239313Sdim{
140239313Sdim  return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C);
141239313Sdim}
142239313Sdim
143239313Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
144239313Sdim_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
145239313Sdim{
146239313Sdim  return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C);
147239313Sdim}
148239313Sdim
149239313Sdimstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
150239313Sdim_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
151239313Sdim{
152239313Sdim  return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C);
153239313Sdim}
154239313Sdim
155239313Sdimstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
156239313Sdim_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
157239313Sdim{
158239313Sdim  return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C);
159239313Sdim}
160239313Sdim
161239313Sdimstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
162239313Sdim_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
163239313Sdim{
164239313Sdim  return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C);
165239313Sdim}
166239313Sdim
167239313Sdimstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
168239313Sdim_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
169239313Sdim{
170239313Sdim  return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C);
171239313Sdim}
172239313Sdim
173239313Sdimstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
174239313Sdim_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
175239313Sdim{
176239313Sdim  return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C);
177239313Sdim}
178239313Sdim
179239313Sdimstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
180239313Sdim_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
181239313Sdim{
182239313Sdim  return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C);
183239313Sdim}
184239313Sdim
185239313Sdimstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
186239313Sdim_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
187239313Sdim{
188239313Sdim  return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C);
189239313Sdim}
190239313Sdim
191239313Sdimstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
192239313Sdim_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
193239313Sdim{
194239313Sdim  return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C);
195239313Sdim}
196239313Sdim
197239313Sdimstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
198239313Sdim_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
199239313Sdim{
200239313Sdim  return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C);
201239313Sdim}
202239313Sdim
203239313Sdimstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
204239313Sdim_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
205239313Sdim{
206239313Sdim  return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C);
207239313Sdim}
208239313Sdim
209239313Sdimstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
210239313Sdim_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
211239313Sdim{
212239313Sdim  return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C);
213239313Sdim}
214239313Sdim
215239313Sdimstatic __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
216239313Sdim_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
217239313Sdim{
218239313Sdim  return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C);
219239313Sdim}
220239313Sdim
221239313Sdimstatic __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
222239313Sdim_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
223239313Sdim{
224239313Sdim  return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C);
225239313Sdim}
226239313Sdim
227239313Sdim#endif /* __FMA__ */
228239313Sdim
229239313Sdim#endif /* __FMAINTRIN_H */
230