pmmintrin.h revision 360784
1/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10#ifndef __PMMINTRIN_H 11#define __PMMINTRIN_H 12 13#include <emmintrin.h> 14 15/* Define the default attributes for the functions in this file. */ 16#define __DEFAULT_FN_ATTRS \ 17 __attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128))) 18 19/// Loads data from an unaligned memory location to elements in a 128-bit 20/// vector. 21/// 22/// If the address of the data is not 16-byte aligned, the instruction may 23/// read two adjacent aligned blocks of memory to retrieve the requested 24/// data. 25/// 26/// \headerfile <x86intrin.h> 27/// 28/// This intrinsic corresponds to the <c> VLDDQU </c> instruction. 29/// 30/// \param __p 31/// A pointer to a 128-bit integer vector containing integer values. 32/// \returns A 128-bit vector containing the moved values. 33static __inline__ __m128i __DEFAULT_FN_ATTRS 34_mm_lddqu_si128(__m128i const *__p) 35{ 36 return (__m128i)__builtin_ia32_lddqu((char const *)__p); 37} 38 39/// Adds the even-indexed values and subtracts the odd-indexed values of 40/// two 128-bit vectors of [4 x float]. 41/// 42/// \headerfile <x86intrin.h> 43/// 44/// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction. 45/// 46/// \param __a 47/// A 128-bit vector of [4 x float] containing the left source operand. 48/// \param __b 49/// A 128-bit vector of [4 x float] containing the right source operand. 50/// \returns A 128-bit vector of [4 x float] containing the alternating sums and 51/// differences of both operands. 52static __inline__ __m128 __DEFAULT_FN_ATTRS 53_mm_addsub_ps(__m128 __a, __m128 __b) 54{ 55 return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b); 56} 57 58/// Horizontally adds the adjacent pairs of values contained in two 59/// 128-bit vectors of [4 x float]. 60/// 61/// \headerfile <x86intrin.h> 62/// 63/// This intrinsic corresponds to the <c> VHADDPS </c> instruction. 64/// 65/// \param __a 66/// A 128-bit vector of [4 x float] containing one of the source operands. 67/// The horizontal sums of the values are stored in the lower bits of the 68/// destination. 69/// \param __b 70/// A 128-bit vector of [4 x float] containing one of the source operands. 71/// The horizontal sums of the values are stored in the upper bits of the 72/// destination. 73/// \returns A 128-bit vector of [4 x float] containing the horizontal sums of 74/// both operands. 75static __inline__ __m128 __DEFAULT_FN_ATTRS 76_mm_hadd_ps(__m128 __a, __m128 __b) 77{ 78 return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b); 79} 80 81/// Horizontally subtracts the adjacent pairs of values contained in two 82/// 128-bit vectors of [4 x float]. 83/// 84/// \headerfile <x86intrin.h> 85/// 86/// This intrinsic corresponds to the <c> VHSUBPS </c> instruction. 87/// 88/// \param __a 89/// A 128-bit vector of [4 x float] containing one of the source operands. 90/// The horizontal differences between the values are stored in the lower 91/// bits of the destination. 92/// \param __b 93/// A 128-bit vector of [4 x float] containing one of the source operands. 94/// The horizontal differences between the values are stored in the upper 95/// bits of the destination. 96/// \returns A 128-bit vector of [4 x float] containing the horizontal 97/// differences of both operands. 98static __inline__ __m128 __DEFAULT_FN_ATTRS 99_mm_hsub_ps(__m128 __a, __m128 __b) 100{ 101 return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b); 102} 103 104/// Moves and duplicates odd-indexed values from a 128-bit vector 105/// of [4 x float] to float values stored in a 128-bit vector of 106/// [4 x float]. 107/// 108/// \headerfile <x86intrin.h> 109/// 110/// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction. 111/// 112/// \param __a 113/// A 128-bit vector of [4 x float]. \n 114/// Bits [127:96] of the source are written to bits [127:96] and [95:64] of 115/// the destination. \n 116/// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the 117/// destination. 118/// \returns A 128-bit vector of [4 x float] containing the moved and duplicated 119/// values. 120static __inline__ __m128 __DEFAULT_FN_ATTRS 121_mm_movehdup_ps(__m128 __a) 122{ 123 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3); 124} 125 126/// Duplicates even-indexed values from a 128-bit vector of 127/// [4 x float] to float values stored in a 128-bit vector of [4 x float]. 128/// 129/// \headerfile <x86intrin.h> 130/// 131/// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction. 132/// 133/// \param __a 134/// A 128-bit vector of [4 x float] \n 135/// Bits [95:64] of the source are written to bits [127:96] and [95:64] of 136/// the destination. \n 137/// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the 138/// destination. 139/// \returns A 128-bit vector of [4 x float] containing the moved and duplicated 140/// values. 141static __inline__ __m128 __DEFAULT_FN_ATTRS 142_mm_moveldup_ps(__m128 __a) 143{ 144 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2); 145} 146 147/// Adds the even-indexed values and subtracts the odd-indexed values of 148/// two 128-bit vectors of [2 x double]. 149/// 150/// \headerfile <x86intrin.h> 151/// 152/// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction. 153/// 154/// \param __a 155/// A 128-bit vector of [2 x double] containing the left source operand. 156/// \param __b 157/// A 128-bit vector of [2 x double] containing the right source operand. 158/// \returns A 128-bit vector of [2 x double] containing the alternating sums 159/// and differences of both operands. 160static __inline__ __m128d __DEFAULT_FN_ATTRS 161_mm_addsub_pd(__m128d __a, __m128d __b) 162{ 163 return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); 164} 165 166/// Horizontally adds the pairs of values contained in two 128-bit 167/// vectors of [2 x double]. 168/// 169/// \headerfile <x86intrin.h> 170/// 171/// This intrinsic corresponds to the <c> VHADDPD </c> instruction. 172/// 173/// \param __a 174/// A 128-bit vector of [2 x double] containing one of the source operands. 175/// The horizontal sum of the values is stored in the lower bits of the 176/// destination. 177/// \param __b 178/// A 128-bit vector of [2 x double] containing one of the source operands. 179/// The horizontal sum of the values is stored in the upper bits of the 180/// destination. 181/// \returns A 128-bit vector of [2 x double] containing the horizontal sums of 182/// both operands. 183static __inline__ __m128d __DEFAULT_FN_ATTRS 184_mm_hadd_pd(__m128d __a, __m128d __b) 185{ 186 return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b); 187} 188 189/// Horizontally subtracts the pairs of values contained in two 128-bit 190/// vectors of [2 x double]. 191/// 192/// \headerfile <x86intrin.h> 193/// 194/// This intrinsic corresponds to the <c> VHSUBPD </c> instruction. 195/// 196/// \param __a 197/// A 128-bit vector of [2 x double] containing one of the source operands. 198/// The horizontal difference of the values is stored in the lower bits of 199/// the destination. 200/// \param __b 201/// A 128-bit vector of [2 x double] containing one of the source operands. 202/// The horizontal difference of the values is stored in the upper bits of 203/// the destination. 204/// \returns A 128-bit vector of [2 x double] containing the horizontal 205/// differences of both operands. 206static __inline__ __m128d __DEFAULT_FN_ATTRS 207_mm_hsub_pd(__m128d __a, __m128d __b) 208{ 209 return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b); 210} 211 212/// Moves and duplicates one double-precision value to double-precision 213/// values stored in a 128-bit vector of [2 x double]. 214/// 215/// \headerfile <x86intrin.h> 216/// 217/// \code 218/// __m128d _mm_loaddup_pd(double const *dp); 219/// \endcode 220/// 221/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction. 222/// 223/// \param dp 224/// A pointer to a double-precision value to be moved and duplicated. 225/// \returns A 128-bit vector of [2 x double] containing the moved and 226/// duplicated values. 227#define _mm_loaddup_pd(dp) _mm_load1_pd(dp) 228 229/// Moves and duplicates the double-precision value in the lower bits of 230/// a 128-bit vector of [2 x double] to double-precision values stored in a 231/// 128-bit vector of [2 x double]. 232/// 233/// \headerfile <x86intrin.h> 234/// 235/// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction. 236/// 237/// \param __a 238/// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits 239/// [127:64] and [63:0] of the destination. 240/// \returns A 128-bit vector of [2 x double] containing the moved and 241/// duplicated values. 242static __inline__ __m128d __DEFAULT_FN_ATTRS 243_mm_movedup_pd(__m128d __a) 244{ 245 return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); 246} 247 248/// Establishes a linear address memory range to be monitored and puts 249/// the processor in the monitor event pending state. Data stored in the 250/// monitored address range causes the processor to exit the pending state. 251/// 252/// \headerfile <x86intrin.h> 253/// 254/// This intrinsic corresponds to the <c> MONITOR </c> instruction. 255/// 256/// \param __p 257/// The memory range to be monitored. The size of the range is determined by 258/// CPUID function 0000_0005h. 259/// \param __extensions 260/// Optional extensions for the monitoring state. 261/// \param __hints 262/// Optional hints for the monitoring state. 263static __inline__ void __DEFAULT_FN_ATTRS 264_mm_monitor(void const *__p, unsigned __extensions, unsigned __hints) 265{ 266 __builtin_ia32_monitor(__p, __extensions, __hints); 267} 268 269/// Used with the MONITOR instruction to wait while the processor is in 270/// the monitor event pending state. Data stored in the monitored address 271/// range causes the processor to exit the pending state. 272/// 273/// \headerfile <x86intrin.h> 274/// 275/// This intrinsic corresponds to the <c> MWAIT </c> instruction. 276/// 277/// \param __extensions 278/// Optional extensions for the monitoring state, which may vary by 279/// processor. 280/// \param __hints 281/// Optional hints for the monitoring state, which may vary by processor. 282static __inline__ void __DEFAULT_FN_ATTRS 283_mm_mwait(unsigned __extensions, unsigned __hints) 284{ 285 __builtin_ia32_mwait(__extensions, __hints); 286} 287 288#undef __DEFAULT_FN_ATTRS 289 290#endif /* __PMMINTRIN_H */ 291