core/stdarch/crates/core_arch/src/x86/
sse2.rs

1//! Streaming SIMD Extensions 2 (SSE2)
2
3#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7    core_arch::{simd::*, x86::*},
8    intrinsics::simd::*,
9    intrinsics::sqrtf64,
10    mem, ptr,
11};
12
13/// Provides a hint to the processor that the code sequence is a spin-wait loop.
14///
15/// This can help improve the performance and power consumption of spin-wait
16/// loops.
17///
18/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause)
19#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub fn _mm_pause() {
23    // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without
24    // the SSE2 target-feature - therefore it does not require any target features
25    unsafe { pause() }
26}
27
28/// Invalidates and flushes the cache line that contains `p` from all levels of
29/// the cache hierarchy.
30///
31/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush)
32#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37    clflush(p)
38}
39
40/// Performs a serializing operation on all load-from-memory instructions
41/// that were issued prior to this instruction.
42///
43/// Guarantees that every load instruction that precedes, in program order, is
44/// globally visible before any load instruction which follows the fence in
45/// program order.
46///
47/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence)
48#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub fn _mm_lfence() {
53    unsafe { lfence() }
54}
55
56/// Performs a serializing operation on all load-from-memory and store-to-memory
57/// instructions that were issued prior to this instruction.
58///
59/// Guarantees that every memory access that precedes, in program order, the
60/// memory fence instruction is globally visible before any memory instruction
61/// which follows the fence in program order.
62///
63/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mfence)
64#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub fn _mm_mfence() {
69    unsafe { mfence() }
70}
71
72/// Adds packed 8-bit integers in `a` and `b`.
73///
74/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi8)
75#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
80pub const fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
81    unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) }
82}
83
84/// Adds packed 16-bit integers in `a` and `b`.
85///
86/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi16)
87#[inline]
88#[target_feature(enable = "sse2")]
89#[cfg_attr(test, assert_instr(paddw))]
90#[stable(feature = "simd_x86", since = "1.27.0")]
91#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
92pub const fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
93    unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) }
94}
95
96/// Adds packed 32-bit integers in `a` and `b`.
97///
98/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi32)
99#[inline]
100#[target_feature(enable = "sse2")]
101#[cfg_attr(test, assert_instr(paddd))]
102#[stable(feature = "simd_x86", since = "1.27.0")]
103#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
104pub const fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
105    unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) }
106}
107
108/// Adds packed 64-bit integers in `a` and `b`.
109///
110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_epi64)
111#[inline]
112#[target_feature(enable = "sse2")]
113#[cfg_attr(test, assert_instr(paddq))]
114#[stable(feature = "simd_x86", since = "1.27.0")]
115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
116pub const fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
117    unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) }
118}
119
120/// Adds packed 8-bit integers in `a` and `b` using saturation.
121///
122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi8)
123#[inline]
124#[target_feature(enable = "sse2")]
125#[cfg_attr(test, assert_instr(paddsb))]
126#[stable(feature = "simd_x86", since = "1.27.0")]
127#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
128pub const fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
129    unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) }
130}
131
132/// Adds packed 16-bit integers in `a` and `b` using saturation.
133///
134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epi16)
135#[inline]
136#[target_feature(enable = "sse2")]
137#[cfg_attr(test, assert_instr(paddsw))]
138#[stable(feature = "simd_x86", since = "1.27.0")]
139#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
140pub const fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
141    unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) }
142}
143
144/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
145///
146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu8)
147#[inline]
148#[target_feature(enable = "sse2")]
149#[cfg_attr(test, assert_instr(paddusb))]
150#[stable(feature = "simd_x86", since = "1.27.0")]
151#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
152pub const fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
153    unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) }
154}
155
156/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
157///
158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_adds_epu16)
159#[inline]
160#[target_feature(enable = "sse2")]
161#[cfg_attr(test, assert_instr(paddusw))]
162#[stable(feature = "simd_x86", since = "1.27.0")]
163#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
164pub const fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
165    unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) }
166}
167
168/// Averages packed unsigned 8-bit integers in `a` and `b`.
169///
170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu8)
171#[inline]
172#[target_feature(enable = "sse2")]
173#[cfg_attr(test, assert_instr(pavgb))]
174#[stable(feature = "simd_x86", since = "1.27.0")]
175#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
176pub const fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
177    unsafe {
178        let a = simd_cast::<_, u16x16>(a.as_u8x16());
179        let b = simd_cast::<_, u16x16>(b.as_u8x16());
180        let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1));
181        transmute(simd_cast::<_, u8x16>(r))
182    }
183}
184
185/// Averages packed unsigned 16-bit integers in `a` and `b`.
186///
187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_avg_epu16)
188#[inline]
189#[target_feature(enable = "sse2")]
190#[cfg_attr(test, assert_instr(pavgw))]
191#[stable(feature = "simd_x86", since = "1.27.0")]
192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
193pub const fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
194    unsafe {
195        let a = simd_cast::<_, u32x8>(a.as_u16x8());
196        let b = simd_cast::<_, u32x8>(b.as_u16x8());
197        let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1));
198        transmute(simd_cast::<_, u16x8>(r))
199    }
200}
201
202/// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`.
203///
204/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
205/// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
206/// intermediate 32-bit integers.
207///
208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd_epi16)
209#[inline]
210#[target_feature(enable = "sse2")]
211#[cfg_attr(test, assert_instr(pmaddwd))]
212#[stable(feature = "simd_x86", since = "1.27.0")]
213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
214pub const fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
215    unsafe {
216        let r: i32x8 = simd_mul(simd_cast(a.as_i16x8()), simd_cast(b.as_i16x8()));
217        let even: i32x4 = simd_shuffle!(r, r, [0, 2, 4, 6]);
218        let odd: i32x4 = simd_shuffle!(r, r, [1, 3, 5, 7]);
219        simd_add(even, odd).as_m128i()
220    }
221}
222
223/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
224/// maximum values.
225///
226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi16)
227#[inline]
228#[target_feature(enable = "sse2")]
229#[cfg_attr(test, assert_instr(pmaxsw))]
230#[stable(feature = "simd_x86", since = "1.27.0")]
231#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
232pub const fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
233    unsafe { simd_imax(a.as_i16x8(), b.as_i16x8()).as_m128i() }
234}
235
236/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
237/// packed maximum values.
238///
239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu8)
240#[inline]
241#[target_feature(enable = "sse2")]
242#[cfg_attr(test, assert_instr(pmaxub))]
243#[stable(feature = "simd_x86", since = "1.27.0")]
244#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
245pub const fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
246    unsafe { simd_imax(a.as_u8x16(), b.as_u8x16()).as_m128i() }
247}
248
249/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
250/// minimum values.
251///
252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi16)
253#[inline]
254#[target_feature(enable = "sse2")]
255#[cfg_attr(test, assert_instr(pminsw))]
256#[stable(feature = "simd_x86", since = "1.27.0")]
257#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
258pub const fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
259    unsafe { simd_imin(a.as_i16x8(), b.as_i16x8()).as_m128i() }
260}
261
262/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
263/// packed minimum values.
264///
265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu8)
266#[inline]
267#[target_feature(enable = "sse2")]
268#[cfg_attr(test, assert_instr(pminub))]
269#[stable(feature = "simd_x86", since = "1.27.0")]
270#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
271pub const fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
272    unsafe { simd_imin(a.as_u8x16(), b.as_u8x16()).as_m128i() }
273}
274
275/// Multiplies the packed 16-bit integers in `a` and `b`.
276///
277/// The multiplication produces intermediate 32-bit integers, and returns the
278/// high 16 bits of the intermediate integers.
279///
280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epi16)
281#[inline]
282#[target_feature(enable = "sse2")]
283#[cfg_attr(test, assert_instr(pmulhw))]
284#[stable(feature = "simd_x86", since = "1.27.0")]
285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
286pub const fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
287    unsafe {
288        let a = simd_cast::<_, i32x8>(a.as_i16x8());
289        let b = simd_cast::<_, i32x8>(b.as_i16x8());
290        let r = simd_shr(simd_mul(a, b), i32x8::splat(16));
291        transmute(simd_cast::<i32x8, i16x8>(r))
292    }
293}
294
295/// Multiplies the packed unsigned 16-bit integers in `a` and `b`.
296///
297/// The multiplication produces intermediate 32-bit integers, and returns the
298/// high 16 bits of the intermediate integers.
299///
300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhi_epu16)
301#[inline]
302#[target_feature(enable = "sse2")]
303#[cfg_attr(test, assert_instr(pmulhuw))]
304#[stable(feature = "simd_x86", since = "1.27.0")]
305#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
306pub const fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
307    unsafe {
308        let a = simd_cast::<_, u32x8>(a.as_u16x8());
309        let b = simd_cast::<_, u32x8>(b.as_u16x8());
310        let r = simd_shr(simd_mul(a, b), u32x8::splat(16));
311        transmute(simd_cast::<u32x8, u16x8>(r))
312    }
313}
314
315/// Multiplies the packed 16-bit integers in `a` and `b`.
316///
317/// The multiplication produces intermediate 32-bit integers, and returns the
318/// low 16 bits of the intermediate integers.
319///
320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi16)
321#[inline]
322#[target_feature(enable = "sse2")]
323#[cfg_attr(test, assert_instr(pmullw))]
324#[stable(feature = "simd_x86", since = "1.27.0")]
325#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
326pub const fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
327    unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) }
328}
329
330/// Multiplies the low unsigned 32-bit integers from each packed 64-bit element
331/// in `a` and `b`.
332///
333/// Returns the unsigned 64-bit results.
334///
335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epu32)
336#[inline]
337#[target_feature(enable = "sse2")]
338#[cfg_attr(test, assert_instr(pmuludq))]
339#[stable(feature = "simd_x86", since = "1.27.0")]
340#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
341pub const fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
342    unsafe {
343        let a = a.as_u64x2();
344        let b = b.as_u64x2();
345        let mask = u64x2::splat(u32::MAX as u64);
346        transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
347    }
348}
349
350/// Sum the absolute differences of packed unsigned 8-bit integers.
351///
352/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
353/// and `b`, then horizontally sum each consecutive 8 differences to produce
354/// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
355/// the low 16 bits of 64-bit elements returned.
356///
357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sad_epu8)
358#[inline]
359#[target_feature(enable = "sse2")]
360#[cfg_attr(test, assert_instr(psadbw))]
361#[stable(feature = "simd_x86", since = "1.27.0")]
362pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
363    unsafe { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) }
364}
365
366/// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`.
367///
368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi8)
369#[inline]
370#[target_feature(enable = "sse2")]
371#[cfg_attr(test, assert_instr(psubb))]
372#[stable(feature = "simd_x86", since = "1.27.0")]
373#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
374pub const fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
375    unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) }
376}
377
378/// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`.
379///
380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi16)
381#[inline]
382#[target_feature(enable = "sse2")]
383#[cfg_attr(test, assert_instr(psubw))]
384#[stable(feature = "simd_x86", since = "1.27.0")]
385#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
386pub const fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
387    unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) }
388}
389
390/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
391///
392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi32)
393#[inline]
394#[target_feature(enable = "sse2")]
395#[cfg_attr(test, assert_instr(psubd))]
396#[stable(feature = "simd_x86", since = "1.27.0")]
397#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
398pub const fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
399    unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) }
400}
401
402/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`.
403///
404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_epi64)
405#[inline]
406#[target_feature(enable = "sse2")]
407#[cfg_attr(test, assert_instr(psubq))]
408#[stable(feature = "simd_x86", since = "1.27.0")]
409#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
410pub const fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
411    unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) }
412}
413
414/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
415/// using saturation.
416///
417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi8)
418#[inline]
419#[target_feature(enable = "sse2")]
420#[cfg_attr(test, assert_instr(psubsb))]
421#[stable(feature = "simd_x86", since = "1.27.0")]
422#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
423pub const fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
424    unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) }
425}
426
427/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
428/// using saturation.
429///
430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epi16)
431#[inline]
432#[target_feature(enable = "sse2")]
433#[cfg_attr(test, assert_instr(psubsw))]
434#[stable(feature = "simd_x86", since = "1.27.0")]
435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
436pub const fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
437    unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) }
438}
439
440/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
441/// integers in `a` using saturation.
442///
443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu8)
444#[inline]
445#[target_feature(enable = "sse2")]
446#[cfg_attr(test, assert_instr(psubusb))]
447#[stable(feature = "simd_x86", since = "1.27.0")]
448#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
449pub const fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
450    unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) }
451}
452
453/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
454/// integers in `a` using saturation.
455///
456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_subs_epu16)
457#[inline]
458#[target_feature(enable = "sse2")]
459#[cfg_attr(test, assert_instr(psubusw))]
460#[stable(feature = "simd_x86", since = "1.27.0")]
461#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
462pub const fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
463    unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) }
464}
465
466/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
467///
468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_si128)
469#[inline]
470#[target_feature(enable = "sse2")]
471#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
472#[rustc_legacy_const_generics(1)]
473#[stable(feature = "simd_x86", since = "1.27.0")]
474#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
475pub const fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
476    static_assert_uimm_bits!(IMM8, 8);
477    unsafe { _mm_slli_si128_impl::<IMM8>(a) }
478}
479
480/// Implementation detail: converts the immediate argument of the
481/// `_mm_slli_si128` intrinsic into a compile-time constant.
482#[inline]
483#[target_feature(enable = "sse2")]
484#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
485const unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
486    const fn mask(shift: i32, i: u32) -> u32 {
487        let shift = shift as u32 & 0xff;
488        if shift > 15 { i } else { 16 - shift + i }
489    }
490    transmute::<i8x16, _>(simd_shuffle!(
491        i8x16::ZERO,
492        a.as_i8x16(),
493        [
494            mask(IMM8, 0),
495            mask(IMM8, 1),
496            mask(IMM8, 2),
497            mask(IMM8, 3),
498            mask(IMM8, 4),
499            mask(IMM8, 5),
500            mask(IMM8, 6),
501            mask(IMM8, 7),
502            mask(IMM8, 8),
503            mask(IMM8, 9),
504            mask(IMM8, 10),
505            mask(IMM8, 11),
506            mask(IMM8, 12),
507            mask(IMM8, 13),
508            mask(IMM8, 14),
509            mask(IMM8, 15),
510        ],
511    ))
512}
513
514/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
515///
516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bslli_si128)
517#[inline]
518#[target_feature(enable = "sse2")]
519#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
520#[rustc_legacy_const_generics(1)]
521#[stable(feature = "simd_x86", since = "1.27.0")]
522#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
523pub const fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
524    unsafe {
525        static_assert_uimm_bits!(IMM8, 8);
526        _mm_slli_si128_impl::<IMM8>(a)
527    }
528}
529
530/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
531///
532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bsrli_si128)
533#[inline]
534#[target_feature(enable = "sse2")]
535#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
536#[rustc_legacy_const_generics(1)]
537#[stable(feature = "simd_x86", since = "1.27.0")]
538#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
539pub const fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
540    unsafe {
541        static_assert_uimm_bits!(IMM8, 8);
542        _mm_srli_si128_impl::<IMM8>(a)
543    }
544}
545
546/// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros.
547///
548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi16)
549#[inline]
550#[target_feature(enable = "sse2")]
551#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
552#[rustc_legacy_const_generics(1)]
553#[stable(feature = "simd_x86", since = "1.27.0")]
554#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
555pub const fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
556    static_assert_uimm_bits!(IMM8, 8);
557    unsafe {
558        if IMM8 >= 16 {
559            _mm_setzero_si128()
560        } else {
561            transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
562        }
563    }
564}
565
566/// Shifts packed 16-bit integers in `a` left by `count` while shifting in
567/// zeros.
568///
569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi16)
570#[inline]
571#[target_feature(enable = "sse2")]
572#[cfg_attr(test, assert_instr(psllw))]
573#[stable(feature = "simd_x86", since = "1.27.0")]
574pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
575    unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) }
576}
577
578/// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros.
579///
580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi32)
581#[inline]
582#[target_feature(enable = "sse2")]
583#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
584#[rustc_legacy_const_generics(1)]
585#[stable(feature = "simd_x86", since = "1.27.0")]
586#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
587pub const fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
588    static_assert_uimm_bits!(IMM8, 8);
589    unsafe {
590        if IMM8 >= 32 {
591            _mm_setzero_si128()
592        } else {
593            transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
594        }
595    }
596}
597
598/// Shifts packed 32-bit integers in `a` left by `count` while shifting in
599/// zeros.
600///
601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi32)
602#[inline]
603#[target_feature(enable = "sse2")]
604#[cfg_attr(test, assert_instr(pslld))]
605#[stable(feature = "simd_x86", since = "1.27.0")]
606pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
607    unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) }
608}
609
610/// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros.
611///
612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_slli_epi64)
613#[inline]
614#[target_feature(enable = "sse2")]
615#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
616#[rustc_legacy_const_generics(1)]
617#[stable(feature = "simd_x86", since = "1.27.0")]
618#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
619pub const fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
620    static_assert_uimm_bits!(IMM8, 8);
621    unsafe {
622        if IMM8 >= 64 {
623            _mm_setzero_si128()
624        } else {
625            transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
626        }
627    }
628}
629
630/// Shifts packed 64-bit integers in `a` left by `count` while shifting in
631/// zeros.
632///
633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sll_epi64)
634#[inline]
635#[target_feature(enable = "sse2")]
636#[cfg_attr(test, assert_instr(psllq))]
637#[stable(feature = "simd_x86", since = "1.27.0")]
638pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
639    unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) }
640}
641
642/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign
643/// bits.
644///
645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi16)
646#[inline]
647#[target_feature(enable = "sse2")]
648#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
649#[rustc_legacy_const_generics(1)]
650#[stable(feature = "simd_x86", since = "1.27.0")]
651#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
652pub const fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
653    static_assert_uimm_bits!(IMM8, 8);
654    unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) }
655}
656
657/// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign
658/// bits.
659///
660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi16)
661#[inline]
662#[target_feature(enable = "sse2")]
663#[cfg_attr(test, assert_instr(psraw))]
664#[stable(feature = "simd_x86", since = "1.27.0")]
665pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
666    unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) }
667}
668
669/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign
670/// bits.
671///
672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi32)
673#[inline]
674#[target_feature(enable = "sse2")]
675#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
676#[rustc_legacy_const_generics(1)]
677#[stable(feature = "simd_x86", since = "1.27.0")]
678#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
679pub const fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
680    static_assert_uimm_bits!(IMM8, 8);
681    unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) }
682}
683
684/// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign
685/// bits.
686///
687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi32)
688#[inline]
689#[target_feature(enable = "sse2")]
690#[cfg_attr(test, assert_instr(psrad))]
691#[stable(feature = "simd_x86", since = "1.27.0")]
692pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
693    unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) }
694}
695
696/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
697///
698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_si128)
699#[inline]
700#[target_feature(enable = "sse2")]
701#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
702#[rustc_legacy_const_generics(1)]
703#[stable(feature = "simd_x86", since = "1.27.0")]
704#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
705pub const fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
706    static_assert_uimm_bits!(IMM8, 8);
707    unsafe { _mm_srli_si128_impl::<IMM8>(a) }
708}
709
710/// Implementation detail: converts the immediate argument of the
711/// `_mm_srli_si128` intrinsic into a compile-time constant.
712#[inline]
713#[target_feature(enable = "sse2")]
714#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
715const unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
716    const fn mask(shift: i32, i: u32) -> u32 {
717        if (shift as u32) > 15 {
718            i + 16
719        } else {
720            i + (shift as u32)
721        }
722    }
723    let x: i8x16 = simd_shuffle!(
724        a.as_i8x16(),
725        i8x16::ZERO,
726        [
727            mask(IMM8, 0),
728            mask(IMM8, 1),
729            mask(IMM8, 2),
730            mask(IMM8, 3),
731            mask(IMM8, 4),
732            mask(IMM8, 5),
733            mask(IMM8, 6),
734            mask(IMM8, 7),
735            mask(IMM8, 8),
736            mask(IMM8, 9),
737            mask(IMM8, 10),
738            mask(IMM8, 11),
739            mask(IMM8, 12),
740            mask(IMM8, 13),
741            mask(IMM8, 14),
742            mask(IMM8, 15),
743        ],
744    );
745    transmute(x)
746}
747
748/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
749/// zeros.
750///
751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi16)
752#[inline]
753#[target_feature(enable = "sse2")]
754#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
755#[rustc_legacy_const_generics(1)]
756#[stable(feature = "simd_x86", since = "1.27.0")]
757#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
758pub const fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
759    static_assert_uimm_bits!(IMM8, 8);
760    unsafe {
761        if IMM8 >= 16 {
762            _mm_setzero_si128()
763        } else {
764            transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
765        }
766    }
767}
768
769/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
770/// zeros.
771///
772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi16)
773#[inline]
774#[target_feature(enable = "sse2")]
775#[cfg_attr(test, assert_instr(psrlw))]
776#[stable(feature = "simd_x86", since = "1.27.0")]
777pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
778    unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) }
779}
780
781/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
782/// zeros.
783///
784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi32)
785#[inline]
786#[target_feature(enable = "sse2")]
787#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
788#[rustc_legacy_const_generics(1)]
789#[stable(feature = "simd_x86", since = "1.27.0")]
790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
791pub const fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
792    static_assert_uimm_bits!(IMM8, 8);
793    unsafe {
794        if IMM8 >= 32 {
795            _mm_setzero_si128()
796        } else {
797            transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
798        }
799    }
800}
801
802/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
803/// zeros.
804///
805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi32)
806#[inline]
807#[target_feature(enable = "sse2")]
808#[cfg_attr(test, assert_instr(psrld))]
809#[stable(feature = "simd_x86", since = "1.27.0")]
810pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
811    unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) }
812}
813
814/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
815/// zeros.
816///
817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srli_epi64)
818#[inline]
819#[target_feature(enable = "sse2")]
820#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
821#[rustc_legacy_const_generics(1)]
822#[stable(feature = "simd_x86", since = "1.27.0")]
823#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
824pub const fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
825    static_assert_uimm_bits!(IMM8, 8);
826    unsafe {
827        if IMM8 >= 64 {
828            _mm_setzero_si128()
829        } else {
830            transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
831        }
832    }
833}
834
835/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
836/// zeros.
837///
838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srl_epi64)
839#[inline]
840#[target_feature(enable = "sse2")]
841#[cfg_attr(test, assert_instr(psrlq))]
842#[stable(feature = "simd_x86", since = "1.27.0")]
843pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
844    unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) }
845}
846
847/// Computes the bitwise AND of 128 bits (representing integer data) in `a` and
848/// `b`.
849///
850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_si128)
851#[inline]
852#[target_feature(enable = "sse2")]
853#[cfg_attr(test, assert_instr(andps))]
854#[stable(feature = "simd_x86", since = "1.27.0")]
855#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
856pub const fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
857    unsafe { simd_and(a, b) }
858}
859
860/// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and
861/// then AND with `b`.
862///
863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_si128)
864#[inline]
865#[target_feature(enable = "sse2")]
866#[cfg_attr(test, assert_instr(andnps))]
867#[stable(feature = "simd_x86", since = "1.27.0")]
868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
869pub const fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
870    unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) }
871}
872
873/// Computes the bitwise OR of 128 bits (representing integer data) in `a` and
874/// `b`.
875///
876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_si128)
877#[inline]
878#[target_feature(enable = "sse2")]
879#[cfg_attr(test, assert_instr(orps))]
880#[stable(feature = "simd_x86", since = "1.27.0")]
881#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
882pub const fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
883    unsafe { simd_or(a, b) }
884}
885
886/// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and
887/// `b`.
888///
889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_si128)
890#[inline]
891#[target_feature(enable = "sse2")]
892#[cfg_attr(test, assert_instr(xorps))]
893#[stable(feature = "simd_x86", since = "1.27.0")]
894#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
895pub const fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
896    unsafe { simd_xor(a, b) }
897}
898
899/// Compares packed 8-bit integers in `a` and `b` for equality.
900///
901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8)
902#[inline]
903#[target_feature(enable = "sse2")]
904#[cfg_attr(test, assert_instr(pcmpeqb))]
905#[stable(feature = "simd_x86", since = "1.27.0")]
906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
907pub const fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
908    unsafe { transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
909}
910
911/// Compares packed 16-bit integers in `a` and `b` for equality.
912///
913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16)
914#[inline]
915#[target_feature(enable = "sse2")]
916#[cfg_attr(test, assert_instr(pcmpeqw))]
917#[stable(feature = "simd_x86", since = "1.27.0")]
918#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
919pub const fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
920    unsafe { transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
921}
922
923/// Compares packed 32-bit integers in `a` and `b` for equality.
924///
925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32)
926#[inline]
927#[target_feature(enable = "sse2")]
928#[cfg_attr(test, assert_instr(pcmpeqd))]
929#[stable(feature = "simd_x86", since = "1.27.0")]
930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
931pub const fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
932    unsafe { transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
933}
934
935/// Compares packed 8-bit integers in `a` and `b` for greater-than.
936///
937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8)
938#[inline]
939#[target_feature(enable = "sse2")]
940#[cfg_attr(test, assert_instr(pcmpgtb))]
941#[stable(feature = "simd_x86", since = "1.27.0")]
942#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
943pub const fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
944    unsafe { transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
945}
946
947/// Compares packed 16-bit integers in `a` and `b` for greater-than.
948///
949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16)
950#[inline]
951#[target_feature(enable = "sse2")]
952#[cfg_attr(test, assert_instr(pcmpgtw))]
953#[stable(feature = "simd_x86", since = "1.27.0")]
954#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
955pub const fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
956    unsafe { transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
957}
958
959/// Compares packed 32-bit integers in `a` and `b` for greater-than.
960///
961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32)
962#[inline]
963#[target_feature(enable = "sse2")]
964#[cfg_attr(test, assert_instr(pcmpgtd))]
965#[stable(feature = "simd_x86", since = "1.27.0")]
966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
967pub const fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
968    unsafe { transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
969}
970
971/// Compares packed 8-bit integers in `a` and `b` for less-than.
972///
973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8)
974#[inline]
975#[target_feature(enable = "sse2")]
976#[cfg_attr(test, assert_instr(pcmpgtb))]
977#[stable(feature = "simd_x86", since = "1.27.0")]
978#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
979pub const fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
980    unsafe { transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
981}
982
983/// Compares packed 16-bit integers in `a` and `b` for less-than.
984///
985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16)
986#[inline]
987#[target_feature(enable = "sse2")]
988#[cfg_attr(test, assert_instr(pcmpgtw))]
989#[stable(feature = "simd_x86", since = "1.27.0")]
990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
991pub const fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
992    unsafe { transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
993}
994
995/// Compares packed 32-bit integers in `a` and `b` for less-than.
996///
997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32)
998#[inline]
999#[target_feature(enable = "sse2")]
1000#[cfg_attr(test, assert_instr(pcmpgtd))]
1001#[stable(feature = "simd_x86", since = "1.27.0")]
1002#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1003pub const fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
1004    unsafe { transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
1005}
1006
1007/// Converts the lower two packed 32-bit integers in `a` to packed
1008/// double-precision (64-bit) floating-point elements.
1009///
1010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_pd)
1011#[inline]
1012#[target_feature(enable = "sse2")]
1013#[cfg_attr(test, assert_instr(cvtdq2pd))]
1014#[stable(feature = "simd_x86", since = "1.27.0")]
1015#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1016pub const fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
1017    unsafe {
1018        let a = a.as_i32x4();
1019        simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
1020    }
1021}
1022
1023/// Returns `a` with its lower element replaced by `b` after converting it to
1024/// an `f64`.
1025///
1026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_sd)
1027#[inline]
1028#[target_feature(enable = "sse2")]
1029#[cfg_attr(test, assert_instr(cvtsi2sd))]
1030#[stable(feature = "simd_x86", since = "1.27.0")]
1031#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1032pub const fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
1033    unsafe { simd_insert!(a, 0, b as f64) }
1034}
1035
1036/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
1037/// floating-point elements.
1038///
1039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_ps)
1040#[inline]
1041#[target_feature(enable = "sse2")]
1042#[cfg_attr(test, assert_instr(cvtdq2ps))]
1043#[stable(feature = "simd_x86", since = "1.27.0")]
1044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1045pub const fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1046    unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) }
1047}
1048
1049/// Converts packed single-precision (32-bit) floating-point elements in `a`
1050/// to packed 32-bit integers.
1051///
1052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi32)
1053#[inline]
1054#[target_feature(enable = "sse2")]
1055#[cfg_attr(test, assert_instr(cvtps2dq))]
1056#[stable(feature = "simd_x86", since = "1.27.0")]
1057pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1058    unsafe { transmute(cvtps2dq(a)) }
1059}
1060
1061/// Returns a vector whose lowest element is `a` and all higher elements are
1062/// `0`.
1063///
1064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi32_si128)
1065#[inline]
1066#[target_feature(enable = "sse2")]
1067#[stable(feature = "simd_x86", since = "1.27.0")]
1068#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1069pub const fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1070    unsafe { transmute(i32x4::new(a, 0, 0, 0)) }
1071}
1072
1073/// Returns the lowest element of `a`.
1074///
1075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsi128_si32)
1076#[inline]
1077#[target_feature(enable = "sse2")]
1078#[stable(feature = "simd_x86", since = "1.27.0")]
1079#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1080pub const fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1081    unsafe { simd_extract!(a.as_i32x4(), 0) }
1082}
1083
1084/// Sets packed 64-bit integers with the supplied values, from highest to
1085/// lowest.
1086///
1087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi64x)
1088#[inline]
1089#[target_feature(enable = "sse2")]
1090// no particular instruction to test
1091#[stable(feature = "simd_x86", since = "1.27.0")]
1092#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1093pub const fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1094    unsafe { transmute(i64x2::new(e0, e1)) }
1095}
1096
1097/// Sets packed 32-bit integers with the supplied values.
1098///
1099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi32)
1100#[inline]
1101#[target_feature(enable = "sse2")]
1102// no particular instruction to test
1103#[stable(feature = "simd_x86", since = "1.27.0")]
1104#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1105pub const fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1106    unsafe { transmute(i32x4::new(e0, e1, e2, e3)) }
1107}
1108
1109/// Sets packed 16-bit integers with the supplied values.
1110///
1111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi16)
1112#[inline]
1113#[target_feature(enable = "sse2")]
1114// no particular instruction to test
1115#[stable(feature = "simd_x86", since = "1.27.0")]
1116#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1117pub const fn _mm_set_epi16(
1118    e7: i16,
1119    e6: i16,
1120    e5: i16,
1121    e4: i16,
1122    e3: i16,
1123    e2: i16,
1124    e1: i16,
1125    e0: i16,
1126) -> __m128i {
1127    unsafe { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
1128}
1129
1130/// Sets packed 8-bit integers with the supplied values.
1131///
1132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_epi8)
1133#[inline]
1134#[target_feature(enable = "sse2")]
1135// no particular instruction to test
1136#[stable(feature = "simd_x86", since = "1.27.0")]
1137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1138pub const fn _mm_set_epi8(
1139    e15: i8,
1140    e14: i8,
1141    e13: i8,
1142    e12: i8,
1143    e11: i8,
1144    e10: i8,
1145    e9: i8,
1146    e8: i8,
1147    e7: i8,
1148    e6: i8,
1149    e5: i8,
1150    e4: i8,
1151    e3: i8,
1152    e2: i8,
1153    e1: i8,
1154    e0: i8,
1155) -> __m128i {
1156    unsafe {
1157        #[rustfmt::skip]
1158        transmute(i8x16::new(
1159            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1160        ))
1161    }
1162}
1163
1164/// Broadcasts 64-bit integer `a` to all elements.
1165///
1166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi64x)
1167#[inline]
1168#[target_feature(enable = "sse2")]
1169// no particular instruction to test
1170#[stable(feature = "simd_x86", since = "1.27.0")]
1171#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1172pub const fn _mm_set1_epi64x(a: i64) -> __m128i {
1173    _mm_set_epi64x(a, a)
1174}
1175
1176/// Broadcasts 32-bit integer `a` to all elements.
1177///
1178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi32)
1179#[inline]
1180#[target_feature(enable = "sse2")]
1181// no particular instruction to test
1182#[stable(feature = "simd_x86", since = "1.27.0")]
1183#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1184pub const fn _mm_set1_epi32(a: i32) -> __m128i {
1185    _mm_set_epi32(a, a, a, a)
1186}
1187
1188/// Broadcasts 16-bit integer `a` to all elements.
1189///
1190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi16)
1191#[inline]
1192#[target_feature(enable = "sse2")]
1193// no particular instruction to test
1194#[stable(feature = "simd_x86", since = "1.27.0")]
1195#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1196pub const fn _mm_set1_epi16(a: i16) -> __m128i {
1197    _mm_set_epi16(a, a, a, a, a, a, a, a)
1198}
1199
1200/// Broadcasts 8-bit integer `a` to all elements.
1201///
1202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_epi8)
1203#[inline]
1204#[target_feature(enable = "sse2")]
1205// no particular instruction to test
1206#[stable(feature = "simd_x86", since = "1.27.0")]
1207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1208pub const fn _mm_set1_epi8(a: i8) -> __m128i {
1209    _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
1210}
1211
1212/// Sets packed 32-bit integers with the supplied values in reverse order.
1213///
1214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi32)
1215#[inline]
1216#[target_feature(enable = "sse2")]
1217// no particular instruction to test
1218#[stable(feature = "simd_x86", since = "1.27.0")]
1219#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1220pub const fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1221    _mm_set_epi32(e0, e1, e2, e3)
1222}
1223
1224/// Sets packed 16-bit integers with the supplied values in reverse order.
1225///
1226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi16)
1227#[inline]
1228#[target_feature(enable = "sse2")]
1229// no particular instruction to test
1230#[stable(feature = "simd_x86", since = "1.27.0")]
1231#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1232pub const fn _mm_setr_epi16(
1233    e7: i16,
1234    e6: i16,
1235    e5: i16,
1236    e4: i16,
1237    e3: i16,
1238    e2: i16,
1239    e1: i16,
1240    e0: i16,
1241) -> __m128i {
1242    _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1243}
1244
1245/// Sets packed 8-bit integers with the supplied values in reverse order.
1246///
1247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_epi8)
1248#[inline]
1249#[target_feature(enable = "sse2")]
1250// no particular instruction to test
1251#[stable(feature = "simd_x86", since = "1.27.0")]
1252#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1253pub const fn _mm_setr_epi8(
1254    e15: i8,
1255    e14: i8,
1256    e13: i8,
1257    e12: i8,
1258    e11: i8,
1259    e10: i8,
1260    e9: i8,
1261    e8: i8,
1262    e7: i8,
1263    e6: i8,
1264    e5: i8,
1265    e4: i8,
1266    e3: i8,
1267    e2: i8,
1268    e1: i8,
1269    e0: i8,
1270) -> __m128i {
1271    #[rustfmt::skip]
1272    _mm_set_epi8(
1273        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1274    )
1275}
1276
1277/// Returns a vector with all elements set to zero.
1278///
1279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_si128)
1280#[inline]
1281#[target_feature(enable = "sse2")]
1282#[cfg_attr(test, assert_instr(xorps))]
1283#[stable(feature = "simd_x86", since = "1.27.0")]
1284#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1285pub const fn _mm_setzero_si128() -> __m128i {
1286    const { unsafe { mem::zeroed() } }
1287}
1288
1289/// Loads 64-bit integer from memory into first element of returned vector.
1290///
1291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_epi64)
1292#[inline]
1293#[target_feature(enable = "sse2")]
1294#[stable(feature = "simd_x86", since = "1.27.0")]
1295#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1296pub const unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1297    _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1298}
1299
1300/// Loads 128-bits of integer data from memory into a new vector.
1301///
1302/// `mem_addr` must be aligned on a 16-byte boundary.
1303///
1304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_si128)
1305#[inline]
1306#[target_feature(enable = "sse2")]
1307#[cfg_attr(
1308    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1309    assert_instr(movaps)
1310)]
1311#[stable(feature = "simd_x86", since = "1.27.0")]
1312#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1313pub const unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1314    *mem_addr
1315}
1316
1317/// Loads 128-bits of integer data from memory into a new vector.
1318///
1319/// `mem_addr` does not need to be aligned on any particular boundary.
1320///
1321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si128)
1322#[inline]
1323#[target_feature(enable = "sse2")]
1324#[cfg_attr(test, assert_instr(movups))]
1325#[stable(feature = "simd_x86", since = "1.27.0")]
1326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1327pub const unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1328    let mut dst: __m128i = _mm_undefined_si128();
1329    ptr::copy_nonoverlapping(
1330        mem_addr as *const u8,
1331        ptr::addr_of_mut!(dst) as *mut u8,
1332        mem::size_of::<__m128i>(),
1333    );
1334    dst
1335}
1336
1337/// Conditionally store 8-bit integer elements from `a` into memory using
1338/// `mask` flagged as non-temporal (unlikely to be used again soon).
1339///
1340/// Elements are not stored when the highest bit is not set in the
1341/// corresponding element.
1342///
1343/// `mem_addr` should correspond to a 128-bit memory location and does not need
1344/// to be aligned on any particular boundary.
1345///
1346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskmoveu_si128)
1347///
1348/// # Safety of non-temporal stores
1349///
1350/// After using this intrinsic, but before any other access to the memory that this intrinsic
1351/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1352/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1353/// return.
1354///
1355/// See [`_mm_sfence`] for details.
1356#[inline]
1357#[target_feature(enable = "sse2")]
1358#[cfg_attr(test, assert_instr(maskmovdqu))]
1359#[stable(feature = "simd_x86", since = "1.27.0")]
1360pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1361    maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1362}
1363
1364/// Stores 128-bits of integer data from `a` into memory.
1365///
1366/// `mem_addr` must be aligned on a 16-byte boundary.
1367///
1368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_si128)
1369#[inline]
1370#[target_feature(enable = "sse2")]
1371#[cfg_attr(
1372    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1373    assert_instr(movaps)
1374)]
1375#[stable(feature = "simd_x86", since = "1.27.0")]
1376#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1377pub const unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1378    *mem_addr = a;
1379}
1380
1381/// Stores 128-bits of integer data from `a` into memory.
1382///
1383/// `mem_addr` does not need to be aligned on any particular boundary.
1384///
1385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si128)
1386#[inline]
1387#[target_feature(enable = "sse2")]
1388#[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
1389#[stable(feature = "simd_x86", since = "1.27.0")]
1390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1391pub const unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1392    mem_addr.write_unaligned(a);
1393}
1394
1395/// Stores the lower 64-bit integer `a` to a memory location.
1396///
1397/// `mem_addr` does not need to be aligned on any particular boundary.
1398///
1399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_epi64)
1400#[inline]
1401#[target_feature(enable = "sse2")]
1402#[stable(feature = "simd_x86", since = "1.27.0")]
1403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1404pub const unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1405    ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8);
1406}
1407
1408/// Stores a 128-bit integer vector to a 128-bit aligned memory location.
1409/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1410/// used again soon).
1411///
1412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si128)
1413///
1414/// # Safety of non-temporal stores
1415///
1416/// After using this intrinsic, but before any other access to the memory that this intrinsic
1417/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1418/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1419/// return.
1420///
1421/// See [`_mm_sfence`] for details.
1422#[inline]
1423#[target_feature(enable = "sse2")]
1424#[cfg_attr(test, assert_instr(movntdq))]
1425#[stable(feature = "simd_x86", since = "1.27.0")]
1426pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1427    // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
1428    crate::arch::asm!(
1429        vps!("movntdq",  ",{a}"),
1430        p = in(reg) mem_addr,
1431        a = in(xmm_reg) a,
1432        options(nostack, preserves_flags),
1433    );
1434}
1435
1436/// Stores a 32-bit integer value in the specified memory location.
1437/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1438/// used again soon).
1439///
1440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_si32)
1441///
1442/// # Safety of non-temporal stores
1443///
1444/// After using this intrinsic, but before any other access to the memory that this intrinsic
1445/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1446/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1447/// return.
1448///
1449/// See [`_mm_sfence`] for details.
1450#[inline]
1451#[target_feature(enable = "sse2")]
1452#[cfg_attr(test, assert_instr(movnti))]
1453#[stable(feature = "simd_x86", since = "1.27.0")]
1454pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1455    // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
1456    crate::arch::asm!(
1457        vps!("movnti", ",{a:e}"), // `:e` for 32bit value
1458        p = in(reg) mem_addr,
1459        a = in(reg) a,
1460        options(nostack, preserves_flags),
1461    );
1462}
1463
1464/// Returns a vector where the low element is extracted from `a` and its upper
1465/// element is zero.
1466///
1467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_epi64)
1468#[inline]
1469#[target_feature(enable = "sse2")]
1470// FIXME movd on msvc, movd on i686
1471#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movq))]
1472#[stable(feature = "simd_x86", since = "1.27.0")]
1473#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1474pub const fn _mm_move_epi64(a: __m128i) -> __m128i {
1475    unsafe {
1476        let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1477        transmute(r)
1478    }
1479}
1480
1481/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1482/// using signed saturation.
1483///
1484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16)
1485#[inline]
1486#[target_feature(enable = "sse2")]
1487#[cfg_attr(test, assert_instr(packsswb))]
1488#[stable(feature = "simd_x86", since = "1.27.0")]
1489pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1490    unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) }
1491}
1492
1493/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
1494/// using signed saturation.
1495///
1496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32)
1497#[inline]
1498#[target_feature(enable = "sse2")]
1499#[cfg_attr(test, assert_instr(packssdw))]
1500#[stable(feature = "simd_x86", since = "1.27.0")]
1501pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1502    unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) }
1503}
1504
1505/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1506/// using unsigned saturation.
1507///
1508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16)
1509#[inline]
1510#[target_feature(enable = "sse2")]
1511#[cfg_attr(test, assert_instr(packuswb))]
1512#[stable(feature = "simd_x86", since = "1.27.0")]
1513pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1514    unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) }
1515}
1516
1517/// Returns the `imm8` element of `a`.
1518///
1519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi16)
1520#[inline]
1521#[target_feature(enable = "sse2")]
1522#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1523#[rustc_legacy_const_generics(1)]
1524#[stable(feature = "simd_x86", since = "1.27.0")]
1525#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1526pub const fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1527    static_assert_uimm_bits!(IMM8, 3);
1528    unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1529}
1530
1531/// Returns a new vector where the `imm8` element of `a` is replaced with `i`.
1532///
1533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi16)
1534#[inline]
1535#[target_feature(enable = "sse2")]
1536#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1537#[rustc_legacy_const_generics(2)]
1538#[stable(feature = "simd_x86", since = "1.27.0")]
1539#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1540pub const fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1541    static_assert_uimm_bits!(IMM8, 3);
1542    unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1543}
1544
1545/// Returns a mask of the most significant bit of each element in `a`.
1546///
1547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_epi8)
1548#[inline]
1549#[target_feature(enable = "sse2")]
1550#[cfg_attr(test, assert_instr(pmovmskb))]
1551#[stable(feature = "simd_x86", since = "1.27.0")]
1552#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1553pub const fn _mm_movemask_epi8(a: __m128i) -> i32 {
1554    unsafe {
1555        let z = i8x16::ZERO;
1556        let m: i8x16 = simd_lt(a.as_i8x16(), z);
1557        simd_bitmask::<_, u16>(m) as u32 as i32
1558    }
1559}
1560
1561/// Shuffles 32-bit integers in `a` using the control in `IMM8`.
1562///
1563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi32)
1564#[inline]
1565#[target_feature(enable = "sse2")]
1566#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1567#[rustc_legacy_const_generics(1)]
1568#[stable(feature = "simd_x86", since = "1.27.0")]
1569#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1570pub const fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1571    static_assert_uimm_bits!(IMM8, 8);
1572    unsafe {
1573        let a = a.as_i32x4();
1574        let x: i32x4 = simd_shuffle!(
1575            a,
1576            a,
1577            [
1578                IMM8 as u32 & 0b11,
1579                (IMM8 as u32 >> 2) & 0b11,
1580                (IMM8 as u32 >> 4) & 0b11,
1581                (IMM8 as u32 >> 6) & 0b11,
1582            ],
1583        );
1584        transmute(x)
1585    }
1586}
1587
1588/// Shuffles 16-bit integers in the high 64 bits of `a` using the control in
1589/// `IMM8`.
1590///
1591/// Put the results in the high 64 bits of the returned vector, with the low 64
1592/// bits being copied from `a`.
1593///
1594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflehi_epi16)
1595#[inline]
1596#[target_feature(enable = "sse2")]
1597#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1598#[rustc_legacy_const_generics(1)]
1599#[stable(feature = "simd_x86", since = "1.27.0")]
1600#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1601pub const fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1602    static_assert_uimm_bits!(IMM8, 8);
1603    unsafe {
1604        let a = a.as_i16x8();
1605        let x: i16x8 = simd_shuffle!(
1606            a,
1607            a,
1608            [
1609                0,
1610                1,
1611                2,
1612                3,
1613                (IMM8 as u32 & 0b11) + 4,
1614                ((IMM8 as u32 >> 2) & 0b11) + 4,
1615                ((IMM8 as u32 >> 4) & 0b11) + 4,
1616                ((IMM8 as u32 >> 6) & 0b11) + 4,
1617            ],
1618        );
1619        transmute(x)
1620    }
1621}
1622
1623/// Shuffles 16-bit integers in the low 64 bits of `a` using the control in
1624/// `IMM8`.
1625///
1626/// Put the results in the low 64 bits of the returned vector, with the high 64
1627/// bits being copied from `a`.
1628///
1629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shufflelo_epi16)
1630#[inline]
1631#[target_feature(enable = "sse2")]
1632#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1633#[rustc_legacy_const_generics(1)]
1634#[stable(feature = "simd_x86", since = "1.27.0")]
1635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1636pub const fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1637    static_assert_uimm_bits!(IMM8, 8);
1638    unsafe {
1639        let a = a.as_i16x8();
1640        let x: i16x8 = simd_shuffle!(
1641            a,
1642            a,
1643            [
1644                IMM8 as u32 & 0b11,
1645                (IMM8 as u32 >> 2) & 0b11,
1646                (IMM8 as u32 >> 4) & 0b11,
1647                (IMM8 as u32 >> 6) & 0b11,
1648                4,
1649                5,
1650                6,
1651                7,
1652            ],
1653        );
1654        transmute(x)
1655    }
1656}
1657
1658/// Unpacks and interleave 8-bit integers from the high half of `a` and `b`.
1659///
1660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi8)
1661#[inline]
1662#[target_feature(enable = "sse2")]
1663#[cfg_attr(test, assert_instr(punpckhbw))]
1664#[stable(feature = "simd_x86", since = "1.27.0")]
1665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1666pub const fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1667    unsafe {
1668        transmute::<i8x16, _>(simd_shuffle!(
1669            a.as_i8x16(),
1670            b.as_i8x16(),
1671            [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1672        ))
1673    }
1674}
1675
1676/// Unpacks and interleave 16-bit integers from the high half of `a` and `b`.
1677///
1678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi16)
1679#[inline]
1680#[target_feature(enable = "sse2")]
1681#[cfg_attr(test, assert_instr(punpckhwd))]
1682#[stable(feature = "simd_x86", since = "1.27.0")]
1683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1684pub const fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1685    unsafe {
1686        let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1687        transmute::<i16x8, _>(x)
1688    }
1689}
1690
1691/// Unpacks and interleave 32-bit integers from the high half of `a` and `b`.
1692///
1693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi32)
1694#[inline]
1695#[target_feature(enable = "sse2")]
1696#[cfg_attr(test, assert_instr(unpckhps))]
1697#[stable(feature = "simd_x86", since = "1.27.0")]
1698#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1699pub const fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1700    unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
1701}
1702
1703/// Unpacks and interleave 64-bit integers from the high half of `a` and `b`.
1704///
1705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_epi64)
1706#[inline]
1707#[target_feature(enable = "sse2")]
1708#[cfg_attr(test, assert_instr(unpckhpd))]
1709#[stable(feature = "simd_x86", since = "1.27.0")]
1710#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1711pub const fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1712    unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
1713}
1714
1715/// Unpacks and interleave 8-bit integers from the low half of `a` and `b`.
1716///
1717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi8)
1718#[inline]
1719#[target_feature(enable = "sse2")]
1720#[cfg_attr(test, assert_instr(punpcklbw))]
1721#[stable(feature = "simd_x86", since = "1.27.0")]
1722#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1723pub const fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1724    unsafe {
1725        transmute::<i8x16, _>(simd_shuffle!(
1726            a.as_i8x16(),
1727            b.as_i8x16(),
1728            [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1729        ))
1730    }
1731}
1732
1733/// Unpacks and interleave 16-bit integers from the low half of `a` and `b`.
1734///
1735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi16)
1736#[inline]
1737#[target_feature(enable = "sse2")]
1738#[cfg_attr(test, assert_instr(punpcklwd))]
1739#[stable(feature = "simd_x86", since = "1.27.0")]
1740#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1741pub const fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1742    unsafe {
1743        let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1744        transmute::<i16x8, _>(x)
1745    }
1746}
1747
1748/// Unpacks and interleave 32-bit integers from the low half of `a` and `b`.
1749///
1750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi32)
1751#[inline]
1752#[target_feature(enable = "sse2")]
1753#[cfg_attr(test, assert_instr(unpcklps))]
1754#[stable(feature = "simd_x86", since = "1.27.0")]
1755#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1756pub const fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1757    unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
1758}
1759
1760/// Unpacks and interleave 64-bit integers from the low half of `a` and `b`.
1761///
1762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_epi64)
1763#[inline]
1764#[target_feature(enable = "sse2")]
1765#[cfg_attr(test, assert_instr(movlhps))]
1766#[stable(feature = "simd_x86", since = "1.27.0")]
1767#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1768pub const fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1769    unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
1770}
1771
1772/// Returns a new vector with the low element of `a` replaced by the sum of the
1773/// low elements of `a` and `b`.
1774///
1775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_sd)
1776#[inline]
1777#[target_feature(enable = "sse2")]
1778#[cfg_attr(test, assert_instr(addsd))]
1779#[stable(feature = "simd_x86", since = "1.27.0")]
1780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1781pub const fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1782    unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1783}
1784
1785/// Adds packed double-precision (64-bit) floating-point elements in `a` and
1786/// `b`.
1787///
1788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_pd)
1789#[inline]
1790#[target_feature(enable = "sse2")]
1791#[cfg_attr(test, assert_instr(addpd))]
1792#[stable(feature = "simd_x86", since = "1.27.0")]
1793#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1794pub const fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1795    unsafe { simd_add(a, b) }
1796}
1797
1798/// Returns a new vector with the low element of `a` replaced by the result of
1799/// diving the lower element of `a` by the lower element of `b`.
1800///
1801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_sd)
1802#[inline]
1803#[target_feature(enable = "sse2")]
1804#[cfg_attr(test, assert_instr(divsd))]
1805#[stable(feature = "simd_x86", since = "1.27.0")]
1806#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1807pub const fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1808    unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1809}
1810
1811/// Divide packed double-precision (64-bit) floating-point elements in `a` by
1812/// packed elements in `b`.
1813///
1814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_div_pd)
1815#[inline]
1816#[target_feature(enable = "sse2")]
1817#[cfg_attr(test, assert_instr(divpd))]
1818#[stable(feature = "simd_x86", since = "1.27.0")]
1819#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1820pub const fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1821    unsafe { simd_div(a, b) }
1822}
1823
1824/// Returns a new vector with the low element of `a` replaced by the maximum
1825/// of the lower elements of `a` and `b`.
1826///
1827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_sd)
1828#[inline]
1829#[target_feature(enable = "sse2")]
1830#[cfg_attr(test, assert_instr(maxsd))]
1831#[stable(feature = "simd_x86", since = "1.27.0")]
1832pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1833    unsafe { maxsd(a, b) }
1834}
1835
1836/// Returns a new vector with the maximum values from corresponding elements in
1837/// `a` and `b`.
1838///
1839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_pd)
1840#[inline]
1841#[target_feature(enable = "sse2")]
1842#[cfg_attr(test, assert_instr(maxpd))]
1843#[stable(feature = "simd_x86", since = "1.27.0")]
1844pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1845    unsafe { maxpd(a, b) }
1846}
1847
1848/// Returns a new vector with the low element of `a` replaced by the minimum
1849/// of the lower elements of `a` and `b`.
1850///
1851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_sd)
1852#[inline]
1853#[target_feature(enable = "sse2")]
1854#[cfg_attr(test, assert_instr(minsd))]
1855#[stable(feature = "simd_x86", since = "1.27.0")]
1856pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1857    unsafe { minsd(a, b) }
1858}
1859
1860/// Returns a new vector with the minimum values from corresponding elements in
1861/// `a` and `b`.
1862///
1863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_pd)
1864#[inline]
1865#[target_feature(enable = "sse2")]
1866#[cfg_attr(test, assert_instr(minpd))]
1867#[stable(feature = "simd_x86", since = "1.27.0")]
1868pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1869    unsafe { minpd(a, b) }
1870}
1871
1872/// Returns a new vector with the low element of `a` replaced by multiplying the
1873/// low elements of `a` and `b`.
1874///
1875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_sd)
1876#[inline]
1877#[target_feature(enable = "sse2")]
1878#[cfg_attr(test, assert_instr(mulsd))]
1879#[stable(feature = "simd_x86", since = "1.27.0")]
1880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1881pub const fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1882    unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1883}
1884
1885/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
1886/// and `b`.
1887///
1888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_pd)
1889#[inline]
1890#[target_feature(enable = "sse2")]
1891#[cfg_attr(test, assert_instr(mulpd))]
1892#[stable(feature = "simd_x86", since = "1.27.0")]
1893#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1894pub const fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1895    unsafe { simd_mul(a, b) }
1896}
1897
1898/// Returns a new vector with the low element of `a` replaced by the square
1899/// root of the lower element `b`.
1900///
1901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_sd)
1902#[inline]
1903#[target_feature(enable = "sse2")]
1904#[cfg_attr(test, assert_instr(sqrtsd))]
1905#[stable(feature = "simd_x86", since = "1.27.0")]
1906pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1907    unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) }
1908}
1909
1910/// Returns a new vector with the square root of each of the values in `a`.
1911///
1912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_pd)
1913#[inline]
1914#[target_feature(enable = "sse2")]
1915#[cfg_attr(test, assert_instr(sqrtpd))]
1916#[stable(feature = "simd_x86", since = "1.27.0")]
1917pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1918    unsafe { simd_fsqrt(a) }
1919}
1920
1921/// Returns a new vector with the low element of `a` replaced by subtracting the
1922/// low element by `b` from the low element of `a`.
1923///
1924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_sd)
1925#[inline]
1926#[target_feature(enable = "sse2")]
1927#[cfg_attr(test, assert_instr(subsd))]
1928#[stable(feature = "simd_x86", since = "1.27.0")]
1929#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1930pub const fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1931    unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1932}
1933
1934/// Subtract packed double-precision (64-bit) floating-point elements in `b`
1935/// from `a`.
1936///
1937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sub_pd)
1938#[inline]
1939#[target_feature(enable = "sse2")]
1940#[cfg_attr(test, assert_instr(subpd))]
1941#[stable(feature = "simd_x86", since = "1.27.0")]
1942#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1943pub const fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1944    unsafe { simd_sub(a, b) }
1945}
1946
1947/// Computes the bitwise AND of packed double-precision (64-bit) floating-point
1948/// elements in `a` and `b`.
1949///
1950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_and_pd)
1951#[inline]
1952#[target_feature(enable = "sse2")]
1953#[cfg_attr(test, assert_instr(andps))]
1954#[stable(feature = "simd_x86", since = "1.27.0")]
1955#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1956pub const fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1957    unsafe {
1958        let a: __m128i = transmute(a);
1959        let b: __m128i = transmute(b);
1960        transmute(_mm_and_si128(a, b))
1961    }
1962}
1963
1964/// Computes the bitwise NOT of `a` and then AND with `b`.
1965///
1966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_andnot_pd)
1967#[inline]
1968#[target_feature(enable = "sse2")]
1969#[cfg_attr(test, assert_instr(andnps))]
1970#[stable(feature = "simd_x86", since = "1.27.0")]
1971#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1972pub const fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1973    unsafe {
1974        let a: __m128i = transmute(a);
1975        let b: __m128i = transmute(b);
1976        transmute(_mm_andnot_si128(a, b))
1977    }
1978}
1979
1980/// Computes the bitwise OR of `a` and `b`.
1981///
1982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_pd)
1983#[inline]
1984#[target_feature(enable = "sse2")]
1985#[cfg_attr(test, assert_instr(orps))]
1986#[stable(feature = "simd_x86", since = "1.27.0")]
1987#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1988pub const fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1989    unsafe {
1990        let a: __m128i = transmute(a);
1991        let b: __m128i = transmute(b);
1992        transmute(_mm_or_si128(a, b))
1993    }
1994}
1995
1996/// Computes the bitwise XOR of `a` and `b`.
1997///
1998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_pd)
1999#[inline]
2000#[target_feature(enable = "sse2")]
2001#[cfg_attr(test, assert_instr(xorps))]
2002#[stable(feature = "simd_x86", since = "1.27.0")]
2003#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2004pub const fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
2005    unsafe {
2006        let a: __m128i = transmute(a);
2007        let b: __m128i = transmute(b);
2008        transmute(_mm_xor_si128(a, b))
2009    }
2010}
2011
2012/// Returns a new vector with the low element of `a` replaced by the equality
2013/// comparison of the lower elements of `a` and `b`.
2014///
2015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_sd)
2016#[inline]
2017#[target_feature(enable = "sse2")]
2018#[cfg_attr(test, assert_instr(cmpeqsd))]
2019#[stable(feature = "simd_x86", since = "1.27.0")]
2020pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
2021    unsafe { cmpsd(a, b, 0) }
2022}
2023
2024/// Returns a new vector with the low element of `a` replaced by the less-than
2025/// comparison of the lower elements of `a` and `b`.
2026///
2027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_sd)
2028#[inline]
2029#[target_feature(enable = "sse2")]
2030#[cfg_attr(test, assert_instr(cmpltsd))]
2031#[stable(feature = "simd_x86", since = "1.27.0")]
2032pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
2033    unsafe { cmpsd(a, b, 1) }
2034}
2035
2036/// Returns a new vector with the low element of `a` replaced by the
2037/// less-than-or-equal comparison of the lower elements of `a` and `b`.
2038///
2039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_sd)
2040#[inline]
2041#[target_feature(enable = "sse2")]
2042#[cfg_attr(test, assert_instr(cmplesd))]
2043#[stable(feature = "simd_x86", since = "1.27.0")]
2044pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
2045    unsafe { cmpsd(a, b, 2) }
2046}
2047
2048/// Returns a new vector with the low element of `a` replaced by the
2049/// greater-than comparison of the lower elements of `a` and `b`.
2050///
2051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_sd)
2052#[inline]
2053#[target_feature(enable = "sse2")]
2054#[cfg_attr(test, assert_instr(cmpltsd))]
2055#[stable(feature = "simd_x86", since = "1.27.0")]
2056pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
2057    unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2058}
2059
2060/// Returns a new vector with the low element of `a` replaced by the
2061/// greater-than-or-equal comparison of the lower elements of `a` and `b`.
2062///
2063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_sd)
2064#[inline]
2065#[target_feature(enable = "sse2")]
2066#[cfg_attr(test, assert_instr(cmplesd))]
2067#[stable(feature = "simd_x86", since = "1.27.0")]
2068pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
2069    unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2070}
2071
2072/// Returns a new vector with the low element of `a` replaced by the result
2073/// of comparing both of the lower elements of `a` and `b` to `NaN`. If
2074/// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
2075/// otherwise.
2076///
2077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_sd)
2078#[inline]
2079#[target_feature(enable = "sse2")]
2080#[cfg_attr(test, assert_instr(cmpordsd))]
2081#[stable(feature = "simd_x86", since = "1.27.0")]
2082pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
2083    unsafe { cmpsd(a, b, 7) }
2084}
2085
2086/// Returns a new vector with the low element of `a` replaced by the result of
2087/// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
2088/// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
2089///
2090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_sd)
2091#[inline]
2092#[target_feature(enable = "sse2")]
2093#[cfg_attr(test, assert_instr(cmpunordsd))]
2094#[stable(feature = "simd_x86", since = "1.27.0")]
2095pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
2096    unsafe { cmpsd(a, b, 3) }
2097}
2098
2099/// Returns a new vector with the low element of `a` replaced by the not-equal
2100/// comparison of the lower elements of `a` and `b`.
2101///
2102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_sd)
2103#[inline]
2104#[target_feature(enable = "sse2")]
2105#[cfg_attr(test, assert_instr(cmpneqsd))]
2106#[stable(feature = "simd_x86", since = "1.27.0")]
2107pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2108    unsafe { cmpsd(a, b, 4) }
2109}
2110
2111/// Returns a new vector with the low element of `a` replaced by the
2112/// not-less-than comparison of the lower elements of `a` and `b`.
2113///
2114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_sd)
2115#[inline]
2116#[target_feature(enable = "sse2")]
2117#[cfg_attr(test, assert_instr(cmpnltsd))]
2118#[stable(feature = "simd_x86", since = "1.27.0")]
2119pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2120    unsafe { cmpsd(a, b, 5) }
2121}
2122
2123/// Returns a new vector with the low element of `a` replaced by the
2124/// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
2125///
2126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_sd)
2127#[inline]
2128#[target_feature(enable = "sse2")]
2129#[cfg_attr(test, assert_instr(cmpnlesd))]
2130#[stable(feature = "simd_x86", since = "1.27.0")]
2131pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2132    unsafe { cmpsd(a, b, 6) }
2133}
2134
2135/// Returns a new vector with the low element of `a` replaced by the
2136/// not-greater-than comparison of the lower elements of `a` and `b`.
2137///
2138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_sd)
2139#[inline]
2140#[target_feature(enable = "sse2")]
2141#[cfg_attr(test, assert_instr(cmpnltsd))]
2142#[stable(feature = "simd_x86", since = "1.27.0")]
2143pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2144    unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2145}
2146
2147/// Returns a new vector with the low element of `a` replaced by the
2148/// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
2149///
2150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_sd)
2151#[inline]
2152#[target_feature(enable = "sse2")]
2153#[cfg_attr(test, assert_instr(cmpnlesd))]
2154#[stable(feature = "simd_x86", since = "1.27.0")]
2155pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2156    unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2157}
2158
2159/// Compares corresponding elements in `a` and `b` for equality.
2160///
2161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_pd)
2162#[inline]
2163#[target_feature(enable = "sse2")]
2164#[cfg_attr(test, assert_instr(cmpeqpd))]
2165#[stable(feature = "simd_x86", since = "1.27.0")]
2166pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2167    unsafe { cmppd(a, b, 0) }
2168}
2169
2170/// Compares corresponding elements in `a` and `b` for less-than.
2171///
2172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_pd)
2173#[inline]
2174#[target_feature(enable = "sse2")]
2175#[cfg_attr(test, assert_instr(cmpltpd))]
2176#[stable(feature = "simd_x86", since = "1.27.0")]
2177pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2178    unsafe { cmppd(a, b, 1) }
2179}
2180
2181/// Compares corresponding elements in `a` and `b` for less-than-or-equal
2182///
2183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_pd)
2184#[inline]
2185#[target_feature(enable = "sse2")]
2186#[cfg_attr(test, assert_instr(cmplepd))]
2187#[stable(feature = "simd_x86", since = "1.27.0")]
2188pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2189    unsafe { cmppd(a, b, 2) }
2190}
2191
2192/// Compares corresponding elements in `a` and `b` for greater-than.
2193///
2194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_pd)
2195#[inline]
2196#[target_feature(enable = "sse2")]
2197#[cfg_attr(test, assert_instr(cmpltpd))]
2198#[stable(feature = "simd_x86", since = "1.27.0")]
2199pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2200    _mm_cmplt_pd(b, a)
2201}
2202
2203/// Compares corresponding elements in `a` and `b` for greater-than-or-equal.
2204///
2205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_pd)
2206#[inline]
2207#[target_feature(enable = "sse2")]
2208#[cfg_attr(test, assert_instr(cmplepd))]
2209#[stable(feature = "simd_x86", since = "1.27.0")]
2210pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2211    _mm_cmple_pd(b, a)
2212}
2213
2214/// Compares corresponding elements in `a` and `b` to see if neither is `NaN`.
2215///
2216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpord_pd)
2217#[inline]
2218#[target_feature(enable = "sse2")]
2219#[cfg_attr(test, assert_instr(cmpordpd))]
2220#[stable(feature = "simd_x86", since = "1.27.0")]
2221pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2222    unsafe { cmppd(a, b, 7) }
2223}
2224
2225/// Compares corresponding elements in `a` and `b` to see if either is `NaN`.
2226///
2227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpunord_pd)
2228#[inline]
2229#[target_feature(enable = "sse2")]
2230#[cfg_attr(test, assert_instr(cmpunordpd))]
2231#[stable(feature = "simd_x86", since = "1.27.0")]
2232pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2233    unsafe { cmppd(a, b, 3) }
2234}
2235
2236/// Compares corresponding elements in `a` and `b` for not-equal.
2237///
2238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_pd)
2239#[inline]
2240#[target_feature(enable = "sse2")]
2241#[cfg_attr(test, assert_instr(cmpneqpd))]
2242#[stable(feature = "simd_x86", since = "1.27.0")]
2243pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2244    unsafe { cmppd(a, b, 4) }
2245}
2246
2247/// Compares corresponding elements in `a` and `b` for not-less-than.
2248///
2249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnlt_pd)
2250#[inline]
2251#[target_feature(enable = "sse2")]
2252#[cfg_attr(test, assert_instr(cmpnltpd))]
2253#[stable(feature = "simd_x86", since = "1.27.0")]
2254pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2255    unsafe { cmppd(a, b, 5) }
2256}
2257
2258/// Compares corresponding elements in `a` and `b` for not-less-than-or-equal.
2259///
2260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnle_pd)
2261#[inline]
2262#[target_feature(enable = "sse2")]
2263#[cfg_attr(test, assert_instr(cmpnlepd))]
2264#[stable(feature = "simd_x86", since = "1.27.0")]
2265pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2266    unsafe { cmppd(a, b, 6) }
2267}
2268
2269/// Compares corresponding elements in `a` and `b` for not-greater-than.
2270///
2271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpngt_pd)
2272#[inline]
2273#[target_feature(enable = "sse2")]
2274#[cfg_attr(test, assert_instr(cmpnltpd))]
2275#[stable(feature = "simd_x86", since = "1.27.0")]
2276pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2277    _mm_cmpnlt_pd(b, a)
2278}
2279
2280/// Compares corresponding elements in `a` and `b` for
2281/// not-greater-than-or-equal.
2282///
2283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpnge_pd)
2284#[inline]
2285#[target_feature(enable = "sse2")]
2286#[cfg_attr(test, assert_instr(cmpnlepd))]
2287#[stable(feature = "simd_x86", since = "1.27.0")]
2288pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2289    _mm_cmpnle_pd(b, a)
2290}
2291
2292/// Compares the lower element of `a` and `b` for equality.
2293///
2294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comieq_sd)
2295#[inline]
2296#[target_feature(enable = "sse2")]
2297#[cfg_attr(test, assert_instr(comisd))]
2298#[stable(feature = "simd_x86", since = "1.27.0")]
2299pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2300    unsafe { comieqsd(a, b) }
2301}
2302
2303/// Compares the lower element of `a` and `b` for less-than.
2304///
2305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comilt_sd)
2306#[inline]
2307#[target_feature(enable = "sse2")]
2308#[cfg_attr(test, assert_instr(comisd))]
2309#[stable(feature = "simd_x86", since = "1.27.0")]
2310pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2311    unsafe { comiltsd(a, b) }
2312}
2313
2314/// Compares the lower element of `a` and `b` for less-than-or-equal.
2315///
2316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comile_sd)
2317#[inline]
2318#[target_feature(enable = "sse2")]
2319#[cfg_attr(test, assert_instr(comisd))]
2320#[stable(feature = "simd_x86", since = "1.27.0")]
2321pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2322    unsafe { comilesd(a, b) }
2323}
2324
2325/// Compares the lower element of `a` and `b` for greater-than.
2326///
2327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comigt_sd)
2328#[inline]
2329#[target_feature(enable = "sse2")]
2330#[cfg_attr(test, assert_instr(comisd))]
2331#[stable(feature = "simd_x86", since = "1.27.0")]
2332pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2333    unsafe { comigtsd(a, b) }
2334}
2335
2336/// Compares the lower element of `a` and `b` for greater-than-or-equal.
2337///
2338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comige_sd)
2339#[inline]
2340#[target_feature(enable = "sse2")]
2341#[cfg_attr(test, assert_instr(comisd))]
2342#[stable(feature = "simd_x86", since = "1.27.0")]
2343pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2344    unsafe { comigesd(a, b) }
2345}
2346
2347/// Compares the lower element of `a` and `b` for not-equal.
2348///
2349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comineq_sd)
2350#[inline]
2351#[target_feature(enable = "sse2")]
2352#[cfg_attr(test, assert_instr(comisd))]
2353#[stable(feature = "simd_x86", since = "1.27.0")]
2354pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2355    unsafe { comineqsd(a, b) }
2356}
2357
2358/// Compares the lower element of `a` and `b` for equality.
2359///
2360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomieq_sd)
2361#[inline]
2362#[target_feature(enable = "sse2")]
2363#[cfg_attr(test, assert_instr(ucomisd))]
2364#[stable(feature = "simd_x86", since = "1.27.0")]
2365pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2366    unsafe { ucomieqsd(a, b) }
2367}
2368
2369/// Compares the lower element of `a` and `b` for less-than.
2370///
2371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomilt_sd)
2372#[inline]
2373#[target_feature(enable = "sse2")]
2374#[cfg_attr(test, assert_instr(ucomisd))]
2375#[stable(feature = "simd_x86", since = "1.27.0")]
2376pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2377    unsafe { ucomiltsd(a, b) }
2378}
2379
2380/// Compares the lower element of `a` and `b` for less-than-or-equal.
2381///
2382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomile_sd)
2383#[inline]
2384#[target_feature(enable = "sse2")]
2385#[cfg_attr(test, assert_instr(ucomisd))]
2386#[stable(feature = "simd_x86", since = "1.27.0")]
2387pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2388    unsafe { ucomilesd(a, b) }
2389}
2390
2391/// Compares the lower element of `a` and `b` for greater-than.
2392///
2393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomigt_sd)
2394#[inline]
2395#[target_feature(enable = "sse2")]
2396#[cfg_attr(test, assert_instr(ucomisd))]
2397#[stable(feature = "simd_x86", since = "1.27.0")]
2398pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2399    unsafe { ucomigtsd(a, b) }
2400}
2401
2402/// Compares the lower element of `a` and `b` for greater-than-or-equal.
2403///
2404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomige_sd)
2405#[inline]
2406#[target_feature(enable = "sse2")]
2407#[cfg_attr(test, assert_instr(ucomisd))]
2408#[stable(feature = "simd_x86", since = "1.27.0")]
2409pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2410    unsafe { ucomigesd(a, b) }
2411}
2412
2413/// Compares the lower element of `a` and `b` for not-equal.
2414///
2415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ucomineq_sd)
2416#[inline]
2417#[target_feature(enable = "sse2")]
2418#[cfg_attr(test, assert_instr(ucomisd))]
2419#[stable(feature = "simd_x86", since = "1.27.0")]
2420pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2421    unsafe { ucomineqsd(a, b) }
2422}
2423
2424/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2425/// packed single-precision (32-bit) floating-point elements
2426///
2427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_ps)
2428#[inline]
2429#[target_feature(enable = "sse2")]
2430#[cfg_attr(test, assert_instr(cvtpd2ps))]
2431#[stable(feature = "simd_x86", since = "1.27.0")]
2432#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2433pub const fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2434    unsafe {
2435        let r = simd_cast::<_, f32x2>(a.as_f64x2());
2436        let zero = f32x2::ZERO;
2437        transmute::<f32x4, _>(simd_shuffle!(r, zero, [0, 1, 2, 3]))
2438    }
2439}
2440
2441/// Converts packed single-precision (32-bit) floating-point elements in `a` to
2442/// packed
2443/// double-precision (64-bit) floating-point elements.
2444///
2445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_pd)
2446#[inline]
2447#[target_feature(enable = "sse2")]
2448#[cfg_attr(test, assert_instr(cvtps2pd))]
2449#[stable(feature = "simd_x86", since = "1.27.0")]
2450#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2451pub const fn _mm_cvtps_pd(a: __m128) -> __m128d {
2452    unsafe {
2453        let a = a.as_f32x4();
2454        transmute(simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2455    }
2456}
2457
2458/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2459/// packed 32-bit integers.
2460///
2461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi32)
2462#[inline]
2463#[target_feature(enable = "sse2")]
2464#[cfg_attr(test, assert_instr(cvtpd2dq))]
2465#[stable(feature = "simd_x86", since = "1.27.0")]
2466pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2467    unsafe { transmute(cvtpd2dq(a)) }
2468}
2469
2470/// Converts the lower double-precision (64-bit) floating-point element in a to
2471/// a 32-bit integer.
2472///
2473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_si32)
2474#[inline]
2475#[target_feature(enable = "sse2")]
2476#[cfg_attr(test, assert_instr(cvtsd2si))]
2477#[stable(feature = "simd_x86", since = "1.27.0")]
2478pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2479    unsafe { cvtsd2si(a) }
2480}
2481
2482/// Converts the lower double-precision (64-bit) floating-point element in `b`
2483/// to a single-precision (32-bit) floating-point element, store the result in
2484/// the lower element of the return value, and copies the upper element from `a`
2485/// to the upper element the return value.
2486///
2487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_ss)
2488#[inline]
2489#[target_feature(enable = "sse2")]
2490#[cfg_attr(test, assert_instr(cvtsd2ss))]
2491#[stable(feature = "simd_x86", since = "1.27.0")]
2492pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2493    unsafe { cvtsd2ss(a, b) }
2494}
2495
2496/// Returns the lower double-precision (64-bit) floating-point element of `a`.
2497///
2498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsd_f64)
2499#[inline]
2500#[target_feature(enable = "sse2")]
2501#[stable(feature = "simd_x86", since = "1.27.0")]
2502#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2503pub const fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2504    unsafe { simd_extract!(a, 0) }
2505}
2506
2507/// Converts the lower single-precision (32-bit) floating-point element in `b`
2508/// to a double-precision (64-bit) floating-point element, store the result in
2509/// the lower element of the return value, and copies the upper element from `a`
2510/// to the upper element the return value.
2511///
2512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtss_sd)
2513#[inline]
2514#[target_feature(enable = "sse2")]
2515#[cfg_attr(test, assert_instr(cvtss2sd))]
2516#[stable(feature = "simd_x86", since = "1.27.0")]
2517#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2518pub const fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2519    unsafe {
2520        let elt: f32 = simd_extract!(b, 0);
2521        simd_insert!(a, 0, elt as f64)
2522    }
2523}
2524
2525/// Converts packed double-precision (64-bit) floating-point elements in `a` to
2526/// packed 32-bit integers with truncation.
2527///
2528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi32)
2529#[inline]
2530#[target_feature(enable = "sse2")]
2531#[cfg_attr(test, assert_instr(cvttpd2dq))]
2532#[stable(feature = "simd_x86", since = "1.27.0")]
2533pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2534    unsafe { transmute(cvttpd2dq(a)) }
2535}
2536
2537/// Converts the lower double-precision (64-bit) floating-point element in `a`
2538/// to a 32-bit integer with truncation.
2539///
2540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_si32)
2541#[inline]
2542#[target_feature(enable = "sse2")]
2543#[cfg_attr(test, assert_instr(cvttsd2si))]
2544#[stable(feature = "simd_x86", since = "1.27.0")]
2545pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2546    unsafe { cvttsd2si(a) }
2547}
2548
2549/// Converts packed single-precision (32-bit) floating-point elements in `a` to
2550/// packed 32-bit integers with truncation.
2551///
2552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi32)
2553#[inline]
2554#[target_feature(enable = "sse2")]
2555#[cfg_attr(test, assert_instr(cvttps2dq))]
2556#[stable(feature = "simd_x86", since = "1.27.0")]
2557pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2558    unsafe { transmute(cvttps2dq(a)) }
2559}
2560
2561/// Copies double-precision (64-bit) floating-point element `a` to the lower
2562/// element of the packed 64-bit return value.
2563///
2564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_sd)
2565#[inline]
2566#[target_feature(enable = "sse2")]
2567#[stable(feature = "simd_x86", since = "1.27.0")]
2568#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2569pub const fn _mm_set_sd(a: f64) -> __m128d {
2570    _mm_set_pd(0.0, a)
2571}
2572
2573/// Broadcasts double-precision (64-bit) floating-point value a to all elements
2574/// of the return value.
2575///
2576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set1_pd)
2577#[inline]
2578#[target_feature(enable = "sse2")]
2579#[stable(feature = "simd_x86", since = "1.27.0")]
2580#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2581pub const fn _mm_set1_pd(a: f64) -> __m128d {
2582    _mm_set_pd(a, a)
2583}
2584
2585/// Broadcasts double-precision (64-bit) floating-point value a to all elements
2586/// of the return value.
2587///
2588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd1)
2589#[inline]
2590#[target_feature(enable = "sse2")]
2591#[stable(feature = "simd_x86", since = "1.27.0")]
2592#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2593pub const fn _mm_set_pd1(a: f64) -> __m128d {
2594    _mm_set_pd(a, a)
2595}
2596
2597/// Sets packed double-precision (64-bit) floating-point elements in the return
2598/// value with the supplied values.
2599///
2600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_pd)
2601#[inline]
2602#[target_feature(enable = "sse2")]
2603#[stable(feature = "simd_x86", since = "1.27.0")]
2604#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2605pub const fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2606    __m128d([b, a])
2607}
2608
2609/// Sets packed double-precision (64-bit) floating-point elements in the return
2610/// value with the supplied values in reverse order.
2611///
2612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setr_pd)
2613#[inline]
2614#[target_feature(enable = "sse2")]
2615#[stable(feature = "simd_x86", since = "1.27.0")]
2616#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2617pub const fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2618    _mm_set_pd(b, a)
2619}
2620
2621/// Returns packed double-precision (64-bit) floating-point elements with all
2622/// zeros.
2623///
2624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd)
2625#[inline]
2626#[target_feature(enable = "sse2")]
2627#[cfg_attr(test, assert_instr(xorp))]
2628#[stable(feature = "simd_x86", since = "1.27.0")]
2629#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2630pub const fn _mm_setzero_pd() -> __m128d {
2631    const { unsafe { mem::zeroed() } }
2632}
2633
2634/// Returns a mask of the most significant bit of each element in `a`.
2635///
2636/// The mask is stored in the 2 least significant bits of the return value.
2637/// All other bits are set to `0`.
2638///
2639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movemask_pd)
2640#[inline]
2641#[target_feature(enable = "sse2")]
2642#[cfg_attr(test, assert_instr(movmskpd))]
2643#[stable(feature = "simd_x86", since = "1.27.0")]
2644#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2645pub const fn _mm_movemask_pd(a: __m128d) -> i32 {
2646    // Propagate the highest bit to the rest, because simd_bitmask
2647    // requires all-1 or all-0.
2648    unsafe {
2649        let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO);
2650        simd_bitmask::<i64x2, u8>(mask) as i32
2651    }
2652}
2653
2654/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2655/// floating-point elements) from memory into the returned vector.
2656/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2657/// exception may be generated.
2658///
2659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd)
2660#[inline]
2661#[target_feature(enable = "sse2")]
2662#[cfg_attr(
2663    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2664    assert_instr(movaps)
2665)]
2666#[stable(feature = "simd_x86", since = "1.27.0")]
2667#[allow(clippy::cast_ptr_alignment)]
2668#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2669pub const unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2670    *(mem_addr as *const __m128d)
2671}
2672
2673/// Loads a 64-bit double-precision value to the low element of a
2674/// 128-bit integer vector and clears the upper element.
2675///
2676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_sd)
2677#[inline]
2678#[target_feature(enable = "sse2")]
2679#[cfg_attr(test, assert_instr(movsd))]
2680#[stable(feature = "simd_x86", since = "1.27.0")]
2681#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2682pub const unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2683    _mm_setr_pd(*mem_addr, 0.)
2684}
2685
2686/// Loads a double-precision value into the high-order bits of a 128-bit
2687/// vector of `[2 x double]`. The low-order bits are copied from the low-order
2688/// bits of the first operand.
2689///
2690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadh_pd)
2691#[inline]
2692#[target_feature(enable = "sse2")]
2693#[cfg_attr(test, assert_instr(movhps))]
2694#[stable(feature = "simd_x86", since = "1.27.0")]
2695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2696pub const unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2697    _mm_setr_pd(simd_extract!(a, 0), *mem_addr)
2698}
2699
2700/// Loads a double-precision value into the low-order bits of a 128-bit
2701/// vector of `[2 x double]`. The high-order bits are copied from the
2702/// high-order bits of the first operand.
2703///
2704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadl_pd)
2705#[inline]
2706#[target_feature(enable = "sse2")]
2707#[cfg_attr(test, assert_instr(movlps))]
2708#[stable(feature = "simd_x86", since = "1.27.0")]
2709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2710pub const unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2711    _mm_setr_pd(*mem_addr, simd_extract!(a, 1))
2712}
2713
2714/// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit
2715/// aligned memory location.
2716/// To minimize caching, the data is flagged as non-temporal (unlikely to be
2717/// used again soon).
2718///
2719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_stream_pd)
2720///
2721/// # Safety of non-temporal stores
2722///
2723/// After using this intrinsic, but before any other access to the memory that this intrinsic
2724/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
2725/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
2726/// return.
2727///
2728/// See [`_mm_sfence`] for details.
2729#[inline]
2730#[target_feature(enable = "sse2")]
2731#[cfg_attr(test, assert_instr(movntpd))]
2732#[stable(feature = "simd_x86", since = "1.27.0")]
2733#[allow(clippy::cast_ptr_alignment)]
2734pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2735    // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
2736    crate::arch::asm!(
2737        vps!("movntpd", ",{a}"),
2738        p = in(reg) mem_addr,
2739        a = in(xmm_reg) a,
2740        options(nostack, preserves_flags),
2741    );
2742}
2743
2744/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2745/// memory location.
2746///
2747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_sd)
2748#[inline]
2749#[target_feature(enable = "sse2")]
2750#[cfg_attr(test, assert_instr(movlps))]
2751#[stable(feature = "simd_x86", since = "1.27.0")]
2752#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2753pub const unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2754    *mem_addr = simd_extract!(a, 0)
2755}
2756
2757/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2758/// floating-point elements) from `a` into memory. `mem_addr` must be aligned
2759/// on a 16-byte boundary or a general-protection exception may be generated.
2760///
2761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd)
2762#[inline]
2763#[target_feature(enable = "sse2")]
2764#[cfg_attr(
2765    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2766    assert_instr(movaps)
2767)]
2768#[stable(feature = "simd_x86", since = "1.27.0")]
2769#[allow(clippy::cast_ptr_alignment)]
2770#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2771pub const unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2772    *(mem_addr as *mut __m128d) = a;
2773}
2774
2775/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2776/// floating-point elements) from `a` into memory.
2777/// `mem_addr` does not need to be aligned on any particular boundary.
2778///
2779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_pd)
2780#[inline]
2781#[target_feature(enable = "sse2")]
2782#[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
2783#[stable(feature = "simd_x86", since = "1.27.0")]
2784#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2785pub const unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2786    mem_addr.cast::<__m128d>().write_unaligned(a);
2787}
2788
2789/// Store 16-bit integer from the first element of a into memory.
2790///
2791/// `mem_addr` does not need to be aligned on any particular boundary.
2792///
2793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si16)
2794#[inline]
2795#[target_feature(enable = "sse2")]
2796#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2797#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2798pub const unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2799    ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0))
2800}
2801
2802/// Store 32-bit integer from the first element of a into memory.
2803///
2804/// `mem_addr` does not need to be aligned on any particular boundary.
2805///
2806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si32)
2807#[inline]
2808#[target_feature(enable = "sse2")]
2809#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2810#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2811pub const unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2812    ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0))
2813}
2814
2815/// Store 64-bit integer from the first element of a into memory.
2816///
2817/// `mem_addr` does not need to be aligned on any particular boundary.
2818///
2819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_si64)
2820#[inline]
2821#[target_feature(enable = "sse2")]
2822#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2823#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2824pub const unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2825    ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0))
2826}
2827
2828/// Stores the lower double-precision (64-bit) floating-point element from `a`
2829/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2830/// 16-byte boundary or a general-protection exception may be generated.
2831///
2832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store1_pd)
2833#[inline]
2834#[target_feature(enable = "sse2")]
2835#[stable(feature = "simd_x86", since = "1.27.0")]
2836#[allow(clippy::cast_ptr_alignment)]
2837#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2838pub const unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2839    let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2840    *(mem_addr as *mut __m128d) = b;
2841}
2842
2843/// Stores the lower double-precision (64-bit) floating-point element from `a`
2844/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2845/// 16-byte boundary or a general-protection exception may be generated.
2846///
2847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_pd1)
2848#[inline]
2849#[target_feature(enable = "sse2")]
2850#[stable(feature = "simd_x86", since = "1.27.0")]
2851#[allow(clippy::cast_ptr_alignment)]
2852#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2853pub const unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2854    let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2855    *(mem_addr as *mut __m128d) = b;
2856}
2857
2858/// Stores 2 double-precision (64-bit) floating-point elements from `a` into
2859/// memory in reverse order.
2860/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2861/// exception may be generated.
2862///
2863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storer_pd)
2864#[inline]
2865#[target_feature(enable = "sse2")]
2866#[stable(feature = "simd_x86", since = "1.27.0")]
2867#[allow(clippy::cast_ptr_alignment)]
2868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2869pub const unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2870    let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2871    *(mem_addr as *mut __m128d) = b;
2872}
2873
2874/// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a
2875/// memory location.
2876///
2877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeh_pd)
2878#[inline]
2879#[target_feature(enable = "sse2")]
2880#[cfg_attr(test, assert_instr(movhps))]
2881#[stable(feature = "simd_x86", since = "1.27.0")]
2882#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2883pub const unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2884    *mem_addr = simd_extract!(a, 1);
2885}
2886
2887/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2888/// memory location.
2889///
2890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storel_pd)
2891#[inline]
2892#[target_feature(enable = "sse2")]
2893#[cfg_attr(test, assert_instr(movlps))]
2894#[stable(feature = "simd_x86", since = "1.27.0")]
2895#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2896pub const unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2897    *mem_addr = simd_extract!(a, 0);
2898}
2899
2900/// Loads a double-precision (64-bit) floating-point element from memory
2901/// into both elements of returned vector.
2902///
2903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load1_pd)
2904#[inline]
2905#[target_feature(enable = "sse2")]
2906// #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen
2907#[stable(feature = "simd_x86", since = "1.27.0")]
2908#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2909pub const unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2910    let d = *mem_addr;
2911    _mm_setr_pd(d, d)
2912}
2913
2914/// Loads a double-precision (64-bit) floating-point element from memory
2915/// into both elements of returned vector.
2916///
2917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_pd1)
2918#[inline]
2919#[target_feature(enable = "sse2")]
2920// #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd
2921#[stable(feature = "simd_x86", since = "1.27.0")]
2922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2923pub const unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2924    _mm_load1_pd(mem_addr)
2925}
2926
2927/// Loads 2 double-precision (64-bit) floating-point elements from memory into
2928/// the returned vector in reverse order. `mem_addr` must be aligned on a
2929/// 16-byte boundary or a general-protection exception may be generated.
2930///
2931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadr_pd)
2932#[inline]
2933#[target_feature(enable = "sse2")]
2934#[cfg_attr(
2935    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2936    assert_instr(movaps)
2937)]
2938#[stable(feature = "simd_x86", since = "1.27.0")]
2939#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2940pub const unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2941    let a = _mm_load_pd(mem_addr);
2942    simd_shuffle!(a, a, [1, 0])
2943}
2944
2945/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2946/// floating-point elements) from memory into the returned vector.
2947/// `mem_addr` does not need to be aligned on any particular boundary.
2948///
2949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_pd)
2950#[inline]
2951#[target_feature(enable = "sse2")]
2952#[cfg_attr(test, assert_instr(movups))]
2953#[stable(feature = "simd_x86", since = "1.27.0")]
2954#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2955pub const unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2956    let mut dst = _mm_undefined_pd();
2957    ptr::copy_nonoverlapping(
2958        mem_addr as *const u8,
2959        ptr::addr_of_mut!(dst) as *mut u8,
2960        mem::size_of::<__m128d>(),
2961    );
2962    dst
2963}
2964
2965/// Loads unaligned 16-bits of integer data from memory into new vector.
2966///
2967/// `mem_addr` does not need to be aligned on any particular boundary.
2968///
2969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si16)
2970#[inline]
2971#[target_feature(enable = "sse2")]
2972#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2973#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2974pub const unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2975    transmute(i16x8::new(
2976        ptr::read_unaligned(mem_addr as *const i16),
2977        0,
2978        0,
2979        0,
2980        0,
2981        0,
2982        0,
2983        0,
2984    ))
2985}
2986
2987/// Loads unaligned 32-bits of integer data from memory into new vector.
2988///
2989/// `mem_addr` does not need to be aligned on any particular boundary.
2990///
2991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si32)
2992#[inline]
2993#[target_feature(enable = "sse2")]
2994#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2995#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2996pub const unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
2997    transmute(i32x4::new(
2998        ptr::read_unaligned(mem_addr as *const i32),
2999        0,
3000        0,
3001        0,
3002    ))
3003}
3004
3005/// Loads unaligned 64-bits of integer data from memory into new vector.
3006///
3007/// `mem_addr` does not need to be aligned on any particular boundary.
3008///
3009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_si64)
3010#[inline]
3011#[target_feature(enable = "sse2")]
3012#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
3013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3014pub const unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
3015    transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
3016}
3017
3018/// Constructs a 128-bit floating-point vector of `[2 x double]` from two
3019/// 128-bit vector parameters of `[2 x double]`, using the immediate-value
3020/// parameter as a specifier.
3021///
3022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_pd)
3023#[inline]
3024#[target_feature(enable = "sse2")]
3025#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
3026#[rustc_legacy_const_generics(2)]
3027#[stable(feature = "simd_x86", since = "1.27.0")]
3028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3029pub const fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
3030    static_assert_uimm_bits!(MASK, 8);
3031    unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
3032}
3033
3034/// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
3035/// 64 bits are set to the lower 64 bits of the second parameter. The upper
3036/// 64 bits are set to the upper 64 bits of the first parameter.
3037///
3038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_move_sd)
3039#[inline]
3040#[target_feature(enable = "sse2")]
3041#[cfg_attr(test, assert_instr(movsd))]
3042#[stable(feature = "simd_x86", since = "1.27.0")]
3043#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3044pub const fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
3045    unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) }
3046}
3047
3048/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
3049/// floating-point vector of `[4 x float]`.
3050///
3051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_ps)
3052#[inline]
3053#[target_feature(enable = "sse2")]
3054#[stable(feature = "simd_x86", since = "1.27.0")]
3055#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3056pub const fn _mm_castpd_ps(a: __m128d) -> __m128 {
3057    unsafe { transmute(a) }
3058}
3059
3060/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
3061/// integer vector.
3062///
3063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castpd_si128)
3064#[inline]
3065#[target_feature(enable = "sse2")]
3066#[stable(feature = "simd_x86", since = "1.27.0")]
3067#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3068pub const fn _mm_castpd_si128(a: __m128d) -> __m128i {
3069    unsafe { transmute(a) }
3070}
3071
3072/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
3073/// floating-point vector of `[2 x double]`.
3074///
3075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_pd)
3076#[inline]
3077#[target_feature(enable = "sse2")]
3078#[stable(feature = "simd_x86", since = "1.27.0")]
3079#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3080pub const fn _mm_castps_pd(a: __m128) -> __m128d {
3081    unsafe { transmute(a) }
3082}
3083
3084/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
3085/// integer vector.
3086///
3087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castps_si128)
3088#[inline]
3089#[target_feature(enable = "sse2")]
3090#[stable(feature = "simd_x86", since = "1.27.0")]
3091#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3092pub const fn _mm_castps_si128(a: __m128) -> __m128i {
3093    unsafe { transmute(a) }
3094}
3095
3096/// Casts a 128-bit integer vector into a 128-bit floating-point vector
3097/// of `[2 x double]`.
3098///
3099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_pd)
3100#[inline]
3101#[target_feature(enable = "sse2")]
3102#[stable(feature = "simd_x86", since = "1.27.0")]
3103#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3104pub const fn _mm_castsi128_pd(a: __m128i) -> __m128d {
3105    unsafe { transmute(a) }
3106}
3107
3108/// Casts a 128-bit integer vector into a 128-bit floating-point vector
3109/// of `[4 x float]`.
3110///
3111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_castsi128_ps)
3112#[inline]
3113#[target_feature(enable = "sse2")]
3114#[stable(feature = "simd_x86", since = "1.27.0")]
3115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3116pub const fn _mm_castsi128_ps(a: __m128i) -> __m128 {
3117    unsafe { transmute(a) }
3118}
3119
3120/// Returns vector of type __m128d with indeterminate elements.with indetermination elements.
3121/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
3122/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
3123/// In practice, this is typically equivalent to [`mem::zeroed`].
3124///
3125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_pd)
3126#[inline]
3127#[target_feature(enable = "sse2")]
3128#[stable(feature = "simd_x86", since = "1.27.0")]
3129#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3130pub const fn _mm_undefined_pd() -> __m128d {
3131    const { unsafe { mem::zeroed() } }
3132}
3133
3134/// Returns vector of type __m128i with indeterminate elements.with indetermination elements.
3135/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
3136/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
3137/// In practice, this is typically equivalent to [`mem::zeroed`].
3138///
3139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_undefined_si128)
3140#[inline]
3141#[target_feature(enable = "sse2")]
3142#[stable(feature = "simd_x86", since = "1.27.0")]
3143#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3144pub const fn _mm_undefined_si128() -> __m128i {
3145    const { unsafe { mem::zeroed() } }
3146}
3147
3148/// The resulting `__m128d` element is composed by the low-order values of
3149/// the two `__m128d` interleaved input elements, i.e.:
3150///
3151/// * The `[127:64]` bits are copied from the `[127:64]` bits of the second input
3152/// * The `[63:0]` bits are copied from the `[127:64]` bits of the first input
3153///
3154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpackhi_pd)
3155#[inline]
3156#[target_feature(enable = "sse2")]
3157#[cfg_attr(test, assert_instr(unpckhpd))]
3158#[stable(feature = "simd_x86", since = "1.27.0")]
3159#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3160pub const fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
3161    unsafe { simd_shuffle!(a, b, [1, 3]) }
3162}
3163
3164/// The resulting `__m128d` element is composed by the high-order values of
3165/// the two `__m128d` interleaved input elements, i.e.:
3166///
3167/// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input
3168/// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input
3169///
3170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_unpacklo_pd)
3171#[inline]
3172#[target_feature(enable = "sse2")]
3173#[cfg_attr(test, assert_instr(movlhps))]
3174#[stable(feature = "simd_x86", since = "1.27.0")]
3175#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3176pub const fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3177    unsafe { simd_shuffle!(a, b, [0, 2]) }
3178}
3179
3180#[allow(improper_ctypes)]
3181unsafe extern "C" {
3182    #[link_name = "llvm.x86.sse2.pause"]
3183    fn pause();
3184    #[link_name = "llvm.x86.sse2.clflush"]
3185    fn clflush(p: *const u8);
3186    #[link_name = "llvm.x86.sse2.lfence"]
3187    fn lfence();
3188    #[link_name = "llvm.x86.sse2.mfence"]
3189    fn mfence();
3190    #[link_name = "llvm.x86.sse2.psad.bw"]
3191    fn psadbw(a: u8x16, b: u8x16) -> u64x2;
3192    #[link_name = "llvm.x86.sse2.psll.w"]
3193    fn psllw(a: i16x8, count: i16x8) -> i16x8;
3194    #[link_name = "llvm.x86.sse2.psll.d"]
3195    fn pslld(a: i32x4, count: i32x4) -> i32x4;
3196    #[link_name = "llvm.x86.sse2.psll.q"]
3197    fn psllq(a: i64x2, count: i64x2) -> i64x2;
3198    #[link_name = "llvm.x86.sse2.psra.w"]
3199    fn psraw(a: i16x8, count: i16x8) -> i16x8;
3200    #[link_name = "llvm.x86.sse2.psra.d"]
3201    fn psrad(a: i32x4, count: i32x4) -> i32x4;
3202    #[link_name = "llvm.x86.sse2.psrl.w"]
3203    fn psrlw(a: i16x8, count: i16x8) -> i16x8;
3204    #[link_name = "llvm.x86.sse2.psrl.d"]
3205    fn psrld(a: i32x4, count: i32x4) -> i32x4;
3206    #[link_name = "llvm.x86.sse2.psrl.q"]
3207    fn psrlq(a: i64x2, count: i64x2) -> i64x2;
3208    #[link_name = "llvm.x86.sse2.cvtps2dq"]
3209    fn cvtps2dq(a: __m128) -> i32x4;
3210    #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3211    fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3212    #[link_name = "llvm.x86.sse2.packsswb.128"]
3213    fn packsswb(a: i16x8, b: i16x8) -> i8x16;
3214    #[link_name = "llvm.x86.sse2.packssdw.128"]
3215    fn packssdw(a: i32x4, b: i32x4) -> i16x8;
3216    #[link_name = "llvm.x86.sse2.packuswb.128"]
3217    fn packuswb(a: i16x8, b: i16x8) -> u8x16;
3218    #[link_name = "llvm.x86.sse2.max.sd"]
3219    fn maxsd(a: __m128d, b: __m128d) -> __m128d;
3220    #[link_name = "llvm.x86.sse2.max.pd"]
3221    fn maxpd(a: __m128d, b: __m128d) -> __m128d;
3222    #[link_name = "llvm.x86.sse2.min.sd"]
3223    fn minsd(a: __m128d, b: __m128d) -> __m128d;
3224    #[link_name = "llvm.x86.sse2.min.pd"]
3225    fn minpd(a: __m128d, b: __m128d) -> __m128d;
3226    #[link_name = "llvm.x86.sse2.cmp.sd"]
3227    fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3228    #[link_name = "llvm.x86.sse2.cmp.pd"]
3229    fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3230    #[link_name = "llvm.x86.sse2.comieq.sd"]
3231    fn comieqsd(a: __m128d, b: __m128d) -> i32;
3232    #[link_name = "llvm.x86.sse2.comilt.sd"]
3233    fn comiltsd(a: __m128d, b: __m128d) -> i32;
3234    #[link_name = "llvm.x86.sse2.comile.sd"]
3235    fn comilesd(a: __m128d, b: __m128d) -> i32;
3236    #[link_name = "llvm.x86.sse2.comigt.sd"]
3237    fn comigtsd(a: __m128d, b: __m128d) -> i32;
3238    #[link_name = "llvm.x86.sse2.comige.sd"]
3239    fn comigesd(a: __m128d, b: __m128d) -> i32;
3240    #[link_name = "llvm.x86.sse2.comineq.sd"]
3241    fn comineqsd(a: __m128d, b: __m128d) -> i32;
3242    #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3243    fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3244    #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3245    fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3246    #[link_name = "llvm.x86.sse2.ucomile.sd"]
3247    fn ucomilesd(a: __m128d, b: __m128d) -> i32;
3248    #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3249    fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3250    #[link_name = "llvm.x86.sse2.ucomige.sd"]
3251    fn ucomigesd(a: __m128d, b: __m128d) -> i32;
3252    #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3253    fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3254    #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3255    fn cvtpd2dq(a: __m128d) -> i32x4;
3256    #[link_name = "llvm.x86.sse2.cvtsd2si"]
3257    fn cvtsd2si(a: __m128d) -> i32;
3258    #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3259    fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3260    #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3261    fn cvttpd2dq(a: __m128d) -> i32x4;
3262    #[link_name = "llvm.x86.sse2.cvttsd2si"]
3263    fn cvttsd2si(a: __m128d) -> i32;
3264    #[link_name = "llvm.x86.sse2.cvttps2dq"]
3265    fn cvttps2dq(a: __m128) -> i32x4;
3266}
3267
3268#[cfg(test)]
3269mod tests {
3270    use crate::core_arch::assert_eq_const as assert_eq;
3271    use crate::{
3272        core_arch::{simd::*, x86::*},
3273        hint::black_box,
3274    };
3275    use std::{
3276        boxed, f32, f64,
3277        mem::{self, transmute},
3278        ptr,
3279    };
3280    use stdarch_test::simd_test;
3281
3282    const NAN: f64 = f64::NAN;
3283
3284    #[test]
3285    fn test_mm_pause() {
3286        _mm_pause()
3287    }
3288
3289    #[simd_test(enable = "sse2")]
3290    unsafe fn test_mm_clflush() {
3291        let x = 0_u8;
3292        _mm_clflush(ptr::addr_of!(x));
3293    }
3294
3295    #[simd_test(enable = "sse2")]
3296    // Miri cannot support this until it is clear how it fits in the Rust memory model
3297    #[cfg_attr(miri, ignore)]
3298    unsafe fn test_mm_lfence() {
3299        _mm_lfence();
3300    }
3301
3302    #[simd_test(enable = "sse2")]
3303    // Miri cannot support this until it is clear how it fits in the Rust memory model
3304    #[cfg_attr(miri, ignore)]
3305    unsafe fn test_mm_mfence() {
3306        _mm_mfence();
3307    }
3308
3309    #[simd_test(enable = "sse2")]
3310    const unsafe fn test_mm_add_epi8() {
3311        let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3312        #[rustfmt::skip]
3313        let b = _mm_setr_epi8(
3314            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3315        );
3316        let r = _mm_add_epi8(a, b);
3317        #[rustfmt::skip]
3318        let e = _mm_setr_epi8(
3319            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3320        );
3321        assert_eq_m128i(r, e);
3322    }
3323
3324    #[simd_test(enable = "sse2")]
3325    unsafe fn test_mm_add_epi8_overflow() {
3326        let a = _mm_set1_epi8(0x7F);
3327        let b = _mm_set1_epi8(1);
3328        let r = _mm_add_epi8(a, b);
3329        assert_eq_m128i(r, _mm_set1_epi8(-128));
3330    }
3331
3332    #[simd_test(enable = "sse2")]
3333    const unsafe fn test_mm_add_epi16() {
3334        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3335        let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3336        let r = _mm_add_epi16(a, b);
3337        let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3338        assert_eq_m128i(r, e);
3339    }
3340
3341    #[simd_test(enable = "sse2")]
3342    const unsafe fn test_mm_add_epi32() {
3343        let a = _mm_setr_epi32(0, 1, 2, 3);
3344        let b = _mm_setr_epi32(4, 5, 6, 7);
3345        let r = _mm_add_epi32(a, b);
3346        let e = _mm_setr_epi32(4, 6, 8, 10);
3347        assert_eq_m128i(r, e);
3348    }
3349
3350    #[simd_test(enable = "sse2")]
3351    const unsafe fn test_mm_add_epi64() {
3352        let a = _mm_setr_epi64x(0, 1);
3353        let b = _mm_setr_epi64x(2, 3);
3354        let r = _mm_add_epi64(a, b);
3355        let e = _mm_setr_epi64x(2, 4);
3356        assert_eq_m128i(r, e);
3357    }
3358
3359    #[simd_test(enable = "sse2")]
3360    const unsafe fn test_mm_adds_epi8() {
3361        let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3362        #[rustfmt::skip]
3363        let b = _mm_setr_epi8(
3364            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3365        );
3366        let r = _mm_adds_epi8(a, b);
3367        #[rustfmt::skip]
3368        let e = _mm_setr_epi8(
3369            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3370        );
3371        assert_eq_m128i(r, e);
3372    }
3373
3374    #[simd_test(enable = "sse2")]
3375    unsafe fn test_mm_adds_epi8_saturate_positive() {
3376        let a = _mm_set1_epi8(0x7F);
3377        let b = _mm_set1_epi8(1);
3378        let r = _mm_adds_epi8(a, b);
3379        assert_eq_m128i(r, a);
3380    }
3381
3382    #[simd_test(enable = "sse2")]
3383    unsafe fn test_mm_adds_epi8_saturate_negative() {
3384        let a = _mm_set1_epi8(-0x80);
3385        let b = _mm_set1_epi8(-1);
3386        let r = _mm_adds_epi8(a, b);
3387        assert_eq_m128i(r, a);
3388    }
3389
3390    #[simd_test(enable = "sse2")]
3391    const unsafe fn test_mm_adds_epi16() {
3392        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3393        let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3394        let r = _mm_adds_epi16(a, b);
3395        let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3396        assert_eq_m128i(r, e);
3397    }
3398
3399    #[simd_test(enable = "sse2")]
3400    unsafe fn test_mm_adds_epi16_saturate_positive() {
3401        let a = _mm_set1_epi16(0x7FFF);
3402        let b = _mm_set1_epi16(1);
3403        let r = _mm_adds_epi16(a, b);
3404        assert_eq_m128i(r, a);
3405    }
3406
3407    #[simd_test(enable = "sse2")]
3408    unsafe fn test_mm_adds_epi16_saturate_negative() {
3409        let a = _mm_set1_epi16(-0x8000);
3410        let b = _mm_set1_epi16(-1);
3411        let r = _mm_adds_epi16(a, b);
3412        assert_eq_m128i(r, a);
3413    }
3414
3415    #[simd_test(enable = "sse2")]
3416    const unsafe fn test_mm_adds_epu8() {
3417        let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3418        #[rustfmt::skip]
3419        let b = _mm_setr_epi8(
3420            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3421        );
3422        let r = _mm_adds_epu8(a, b);
3423        #[rustfmt::skip]
3424        let e = _mm_setr_epi8(
3425            16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3426        );
3427        assert_eq_m128i(r, e);
3428    }
3429
3430    #[simd_test(enable = "sse2")]
3431    unsafe fn test_mm_adds_epu8_saturate() {
3432        let a = _mm_set1_epi8(!0);
3433        let b = _mm_set1_epi8(1);
3434        let r = _mm_adds_epu8(a, b);
3435        assert_eq_m128i(r, a);
3436    }
3437
3438    #[simd_test(enable = "sse2")]
3439    const unsafe fn test_mm_adds_epu16() {
3440        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3441        let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3442        let r = _mm_adds_epu16(a, b);
3443        let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3444        assert_eq_m128i(r, e);
3445    }
3446
3447    #[simd_test(enable = "sse2")]
3448    unsafe fn test_mm_adds_epu16_saturate() {
3449        let a = _mm_set1_epi16(!0);
3450        let b = _mm_set1_epi16(1);
3451        let r = _mm_adds_epu16(a, b);
3452        assert_eq_m128i(r, a);
3453    }
3454
3455    #[simd_test(enable = "sse2")]
3456    const unsafe fn test_mm_avg_epu8() {
3457        let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3458        let r = _mm_avg_epu8(a, b);
3459        assert_eq_m128i(r, _mm_set1_epi8(6));
3460    }
3461
3462    #[simd_test(enable = "sse2")]
3463    const unsafe fn test_mm_avg_epu16() {
3464        let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3465        let r = _mm_avg_epu16(a, b);
3466        assert_eq_m128i(r, _mm_set1_epi16(6));
3467    }
3468
3469    #[simd_test(enable = "sse2")]
3470    const unsafe fn test_mm_madd_epi16() {
3471        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3472        let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3473        let r = _mm_madd_epi16(a, b);
3474        let e = _mm_setr_epi32(29, 81, 149, 233);
3475        assert_eq_m128i(r, e);
3476
3477        // Test large values.
3478        // MIN*MIN+MIN*MIN will overflow into i32::MIN.
3479        let a = _mm_setr_epi16(
3480            i16::MAX,
3481            i16::MAX,
3482            i16::MIN,
3483            i16::MIN,
3484            i16::MIN,
3485            i16::MAX,
3486            0,
3487            0,
3488        );
3489        let b = _mm_setr_epi16(
3490            i16::MAX,
3491            i16::MAX,
3492            i16::MIN,
3493            i16::MIN,
3494            i16::MAX,
3495            i16::MIN,
3496            0,
3497            0,
3498        );
3499        let r = _mm_madd_epi16(a, b);
3500        let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3501        assert_eq_m128i(r, e);
3502    }
3503
3504    #[simd_test(enable = "sse2")]
3505    const unsafe fn test_mm_max_epi16() {
3506        let a = _mm_set1_epi16(1);
3507        let b = _mm_set1_epi16(-1);
3508        let r = _mm_max_epi16(a, b);
3509        assert_eq_m128i(r, a);
3510    }
3511
3512    #[simd_test(enable = "sse2")]
3513    const unsafe fn test_mm_max_epu8() {
3514        let a = _mm_set1_epi8(1);
3515        let b = _mm_set1_epi8(!0);
3516        let r = _mm_max_epu8(a, b);
3517        assert_eq_m128i(r, b);
3518    }
3519
3520    #[simd_test(enable = "sse2")]
3521    const unsafe fn test_mm_min_epi16() {
3522        let a = _mm_set1_epi16(1);
3523        let b = _mm_set1_epi16(-1);
3524        let r = _mm_min_epi16(a, b);
3525        assert_eq_m128i(r, b);
3526    }
3527
3528    #[simd_test(enable = "sse2")]
3529    const unsafe fn test_mm_min_epu8() {
3530        let a = _mm_set1_epi8(1);
3531        let b = _mm_set1_epi8(!0);
3532        let r = _mm_min_epu8(a, b);
3533        assert_eq_m128i(r, a);
3534    }
3535
3536    #[simd_test(enable = "sse2")]
3537    const unsafe fn test_mm_mulhi_epi16() {
3538        let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3539        let r = _mm_mulhi_epi16(a, b);
3540        assert_eq_m128i(r, _mm_set1_epi16(-16));
3541    }
3542
3543    #[simd_test(enable = "sse2")]
3544    const unsafe fn test_mm_mulhi_epu16() {
3545        let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3546        let r = _mm_mulhi_epu16(a, b);
3547        assert_eq_m128i(r, _mm_set1_epi16(15));
3548    }
3549
3550    #[simd_test(enable = "sse2")]
3551    const unsafe fn test_mm_mullo_epi16() {
3552        let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3553        let r = _mm_mullo_epi16(a, b);
3554        assert_eq_m128i(r, _mm_set1_epi16(-17960));
3555    }
3556
3557    #[simd_test(enable = "sse2")]
3558    const unsafe fn test_mm_mul_epu32() {
3559        let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3560        let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3561        let r = _mm_mul_epu32(a, b);
3562        let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3563        assert_eq_m128i(r, e);
3564    }
3565
3566    #[simd_test(enable = "sse2")]
3567    unsafe fn test_mm_sad_epu8() {
3568        #[rustfmt::skip]
3569        let a = _mm_setr_epi8(
3570            255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3571            1, 2, 3, 4,
3572            155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3573            1, 2, 3, 4,
3574        );
3575        let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3576        let r = _mm_sad_epu8(a, b);
3577        let e = _mm_setr_epi64x(1020, 614);
3578        assert_eq_m128i(r, e);
3579    }
3580
3581    #[simd_test(enable = "sse2")]
3582    const unsafe fn test_mm_sub_epi8() {
3583        let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3584        let r = _mm_sub_epi8(a, b);
3585        assert_eq_m128i(r, _mm_set1_epi8(-1));
3586    }
3587
3588    #[simd_test(enable = "sse2")]
3589    const unsafe fn test_mm_sub_epi16() {
3590        let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3591        let r = _mm_sub_epi16(a, b);
3592        assert_eq_m128i(r, _mm_set1_epi16(-1));
3593    }
3594
3595    #[simd_test(enable = "sse2")]
3596    const unsafe fn test_mm_sub_epi32() {
3597        let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3598        let r = _mm_sub_epi32(a, b);
3599        assert_eq_m128i(r, _mm_set1_epi32(-1));
3600    }
3601
3602    #[simd_test(enable = "sse2")]
3603    const unsafe fn test_mm_sub_epi64() {
3604        let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3605        let r = _mm_sub_epi64(a, b);
3606        assert_eq_m128i(r, _mm_set1_epi64x(-1));
3607    }
3608
3609    #[simd_test(enable = "sse2")]
3610    const unsafe fn test_mm_subs_epi8() {
3611        let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3612        let r = _mm_subs_epi8(a, b);
3613        assert_eq_m128i(r, _mm_set1_epi8(3));
3614    }
3615
3616    #[simd_test(enable = "sse2")]
3617    unsafe fn test_mm_subs_epi8_saturate_positive() {
3618        let a = _mm_set1_epi8(0x7F);
3619        let b = _mm_set1_epi8(-1);
3620        let r = _mm_subs_epi8(a, b);
3621        assert_eq_m128i(r, a);
3622    }
3623
3624    #[simd_test(enable = "sse2")]
3625    unsafe fn test_mm_subs_epi8_saturate_negative() {
3626        let a = _mm_set1_epi8(-0x80);
3627        let b = _mm_set1_epi8(1);
3628        let r = _mm_subs_epi8(a, b);
3629        assert_eq_m128i(r, a);
3630    }
3631
3632    #[simd_test(enable = "sse2")]
3633    const unsafe fn test_mm_subs_epi16() {
3634        let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3635        let r = _mm_subs_epi16(a, b);
3636        assert_eq_m128i(r, _mm_set1_epi16(3));
3637    }
3638
3639    #[simd_test(enable = "sse2")]
3640    unsafe fn test_mm_subs_epi16_saturate_positive() {
3641        let a = _mm_set1_epi16(0x7FFF);
3642        let b = _mm_set1_epi16(-1);
3643        let r = _mm_subs_epi16(a, b);
3644        assert_eq_m128i(r, a);
3645    }
3646
3647    #[simd_test(enable = "sse2")]
3648    unsafe fn test_mm_subs_epi16_saturate_negative() {
3649        let a = _mm_set1_epi16(-0x8000);
3650        let b = _mm_set1_epi16(1);
3651        let r = _mm_subs_epi16(a, b);
3652        assert_eq_m128i(r, a);
3653    }
3654
3655    #[simd_test(enable = "sse2")]
3656    const unsafe fn test_mm_subs_epu8() {
3657        let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3658        let r = _mm_subs_epu8(a, b);
3659        assert_eq_m128i(r, _mm_set1_epi8(3));
3660    }
3661
3662    #[simd_test(enable = "sse2")]
3663    unsafe fn test_mm_subs_epu8_saturate() {
3664        let a = _mm_set1_epi8(0);
3665        let b = _mm_set1_epi8(1);
3666        let r = _mm_subs_epu8(a, b);
3667        assert_eq_m128i(r, a);
3668    }
3669
3670    #[simd_test(enable = "sse2")]
3671    const unsafe fn test_mm_subs_epu16() {
3672        let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3673        let r = _mm_subs_epu16(a, b);
3674        assert_eq_m128i(r, _mm_set1_epi16(3));
3675    }
3676
3677    #[simd_test(enable = "sse2")]
3678    unsafe fn test_mm_subs_epu16_saturate() {
3679        let a = _mm_set1_epi16(0);
3680        let b = _mm_set1_epi16(1);
3681        let r = _mm_subs_epu16(a, b);
3682        assert_eq_m128i(r, a);
3683    }
3684
3685    #[simd_test(enable = "sse2")]
3686    const unsafe fn test_mm_slli_si128() {
3687        #[rustfmt::skip]
3688        let a = _mm_setr_epi8(
3689            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3690        );
3691        let r = _mm_slli_si128::<1>(a);
3692        let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3693        assert_eq_m128i(r, e);
3694
3695        #[rustfmt::skip]
3696        let a = _mm_setr_epi8(
3697            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3698        );
3699        let r = _mm_slli_si128::<15>(a);
3700        let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3701        assert_eq_m128i(r, e);
3702
3703        #[rustfmt::skip]
3704        let a = _mm_setr_epi8(
3705            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3706        );
3707        let r = _mm_slli_si128::<16>(a);
3708        assert_eq_m128i(r, _mm_set1_epi8(0));
3709    }
3710
3711    #[simd_test(enable = "sse2")]
3712    const unsafe fn test_mm_slli_epi16() {
3713        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3714        let r = _mm_slli_epi16::<4>(a);
3715        assert_eq_m128i(
3716            r,
3717            _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3718        );
3719        let r = _mm_slli_epi16::<16>(a);
3720        assert_eq_m128i(r, _mm_set1_epi16(0));
3721    }
3722
3723    #[simd_test(enable = "sse2")]
3724    unsafe fn test_mm_sll_epi16() {
3725        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3726        let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3727        assert_eq_m128i(
3728            r,
3729            _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3730        );
3731        let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3732        assert_eq_m128i(r, a);
3733        let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3734        assert_eq_m128i(r, _mm_set1_epi16(0));
3735        let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3736        assert_eq_m128i(r, _mm_set1_epi16(0));
3737    }
3738
3739    #[simd_test(enable = "sse2")]
3740    const unsafe fn test_mm_slli_epi32() {
3741        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3742        let r = _mm_slli_epi32::<4>(a);
3743        assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3744        let r = _mm_slli_epi32::<32>(a);
3745        assert_eq_m128i(r, _mm_set1_epi32(0));
3746    }
3747
3748    #[simd_test(enable = "sse2")]
3749    unsafe fn test_mm_sll_epi32() {
3750        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3751        let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3752        assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3753        let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3754        assert_eq_m128i(r, a);
3755        let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3756        assert_eq_m128i(r, _mm_set1_epi32(0));
3757        let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3758        assert_eq_m128i(r, _mm_set1_epi32(0));
3759    }
3760
3761    #[simd_test(enable = "sse2")]
3762    const unsafe fn test_mm_slli_epi64() {
3763        let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3764        let r = _mm_slli_epi64::<4>(a);
3765        assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3766        let r = _mm_slli_epi64::<64>(a);
3767        assert_eq_m128i(r, _mm_set1_epi64x(0));
3768    }
3769
3770    #[simd_test(enable = "sse2")]
3771    unsafe fn test_mm_sll_epi64() {
3772        let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3773        let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3774        assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3775        let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3776        assert_eq_m128i(r, a);
3777        let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3778        assert_eq_m128i(r, _mm_set1_epi64x(0));
3779        let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3780        assert_eq_m128i(r, _mm_set1_epi64x(0));
3781    }
3782
3783    #[simd_test(enable = "sse2")]
3784    const unsafe fn test_mm_srai_epi16() {
3785        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3786        let r = _mm_srai_epi16::<4>(a);
3787        assert_eq_m128i(
3788            r,
3789            _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3790        );
3791        let r = _mm_srai_epi16::<16>(a);
3792        assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3793    }
3794
3795    #[simd_test(enable = "sse2")]
3796    unsafe fn test_mm_sra_epi16() {
3797        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3798        let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3799        assert_eq_m128i(
3800            r,
3801            _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3802        );
3803        let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3804        assert_eq_m128i(r, a);
3805        let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3806        assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3807        let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3808        assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3809    }
3810
3811    #[simd_test(enable = "sse2")]
3812    const unsafe fn test_mm_srai_epi32() {
3813        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3814        let r = _mm_srai_epi32::<4>(a);
3815        assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3816        let r = _mm_srai_epi32::<32>(a);
3817        assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3818    }
3819
3820    #[simd_test(enable = "sse2")]
3821    unsafe fn test_mm_sra_epi32() {
3822        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3823        let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3824        assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3825        let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3826        assert_eq_m128i(r, a);
3827        let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3828        assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3829        let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3830        assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3831    }
3832
3833    #[simd_test(enable = "sse2")]
3834    const unsafe fn test_mm_srli_si128() {
3835        #[rustfmt::skip]
3836        let a = _mm_setr_epi8(
3837            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3838        );
3839        let r = _mm_srli_si128::<1>(a);
3840        #[rustfmt::skip]
3841        let e = _mm_setr_epi8(
3842            2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3843        );
3844        assert_eq_m128i(r, e);
3845
3846        #[rustfmt::skip]
3847        let a = _mm_setr_epi8(
3848            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3849        );
3850        let r = _mm_srli_si128::<15>(a);
3851        let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3852        assert_eq_m128i(r, e);
3853
3854        #[rustfmt::skip]
3855        let a = _mm_setr_epi8(
3856            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3857        );
3858        let r = _mm_srli_si128::<16>(a);
3859        assert_eq_m128i(r, _mm_set1_epi8(0));
3860    }
3861
3862    #[simd_test(enable = "sse2")]
3863    const unsafe fn test_mm_srli_epi16() {
3864        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3865        let r = _mm_srli_epi16::<4>(a);
3866        assert_eq_m128i(
3867            r,
3868            _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3869        );
3870        let r = _mm_srli_epi16::<16>(a);
3871        assert_eq_m128i(r, _mm_set1_epi16(0));
3872    }
3873
3874    #[simd_test(enable = "sse2")]
3875    unsafe fn test_mm_srl_epi16() {
3876        let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3877        let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3878        assert_eq_m128i(
3879            r,
3880            _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3881        );
3882        let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3883        assert_eq_m128i(r, a);
3884        let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3885        assert_eq_m128i(r, _mm_set1_epi16(0));
3886        let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3887        assert_eq_m128i(r, _mm_set1_epi16(0));
3888    }
3889
3890    #[simd_test(enable = "sse2")]
3891    const unsafe fn test_mm_srli_epi32() {
3892        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3893        let r = _mm_srli_epi32::<4>(a);
3894        assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3895        let r = _mm_srli_epi32::<32>(a);
3896        assert_eq_m128i(r, _mm_set1_epi32(0));
3897    }
3898
3899    #[simd_test(enable = "sse2")]
3900    unsafe fn test_mm_srl_epi32() {
3901        let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3902        let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3903        assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3904        let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3905        assert_eq_m128i(r, a);
3906        let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3907        assert_eq_m128i(r, _mm_set1_epi32(0));
3908        let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3909        assert_eq_m128i(r, _mm_set1_epi32(0));
3910    }
3911
3912    #[simd_test(enable = "sse2")]
3913    const unsafe fn test_mm_srli_epi64() {
3914        let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3915        let r = _mm_srli_epi64::<4>(a);
3916        assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3917        let r = _mm_srli_epi64::<64>(a);
3918        assert_eq_m128i(r, _mm_set1_epi64x(0));
3919    }
3920
3921    #[simd_test(enable = "sse2")]
3922    unsafe fn test_mm_srl_epi64() {
3923        let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3924        let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3925        assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3926        let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3927        assert_eq_m128i(r, a);
3928        let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3929        assert_eq_m128i(r, _mm_set1_epi64x(0));
3930        let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3931        assert_eq_m128i(r, _mm_set1_epi64x(0));
3932    }
3933
3934    #[simd_test(enable = "sse2")]
3935    const unsafe fn test_mm_and_si128() {
3936        let a = _mm_set1_epi8(5);
3937        let b = _mm_set1_epi8(3);
3938        let r = _mm_and_si128(a, b);
3939        assert_eq_m128i(r, _mm_set1_epi8(1));
3940    }
3941
3942    #[simd_test(enable = "sse2")]
3943    const unsafe fn test_mm_andnot_si128() {
3944        let a = _mm_set1_epi8(5);
3945        let b = _mm_set1_epi8(3);
3946        let r = _mm_andnot_si128(a, b);
3947        assert_eq_m128i(r, _mm_set1_epi8(2));
3948    }
3949
3950    #[simd_test(enable = "sse2")]
3951    const unsafe fn test_mm_or_si128() {
3952        let a = _mm_set1_epi8(5);
3953        let b = _mm_set1_epi8(3);
3954        let r = _mm_or_si128(a, b);
3955        assert_eq_m128i(r, _mm_set1_epi8(7));
3956    }
3957
3958    #[simd_test(enable = "sse2")]
3959    const unsafe fn test_mm_xor_si128() {
3960        let a = _mm_set1_epi8(5);
3961        let b = _mm_set1_epi8(3);
3962        let r = _mm_xor_si128(a, b);
3963        assert_eq_m128i(r, _mm_set1_epi8(6));
3964    }
3965
3966    #[simd_test(enable = "sse2")]
3967    const unsafe fn test_mm_cmpeq_epi8() {
3968        let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3969        let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3970        let r = _mm_cmpeq_epi8(a, b);
3971        #[rustfmt::skip]
3972        assert_eq_m128i(
3973            r,
3974            _mm_setr_epi8(
3975                0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3976            )
3977        );
3978    }
3979
3980    #[simd_test(enable = "sse2")]
3981    const unsafe fn test_mm_cmpeq_epi16() {
3982        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3983        let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3984        let r = _mm_cmpeq_epi16(a, b);
3985        assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3986    }
3987
3988    #[simd_test(enable = "sse2")]
3989    const unsafe fn test_mm_cmpeq_epi32() {
3990        let a = _mm_setr_epi32(0, 1, 2, 3);
3991        let b = _mm_setr_epi32(3, 2, 2, 0);
3992        let r = _mm_cmpeq_epi32(a, b);
3993        assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3994    }
3995
3996    #[simd_test(enable = "sse2")]
3997    const unsafe fn test_mm_cmpgt_epi8() {
3998        let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3999        let b = _mm_set1_epi8(0);
4000        let r = _mm_cmpgt_epi8(a, b);
4001        let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4002        assert_eq_m128i(r, e);
4003    }
4004
4005    #[simd_test(enable = "sse2")]
4006    const unsafe fn test_mm_cmpgt_epi16() {
4007        let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
4008        let b = _mm_set1_epi16(0);
4009        let r = _mm_cmpgt_epi16(a, b);
4010        let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
4011        assert_eq_m128i(r, e);
4012    }
4013
4014    #[simd_test(enable = "sse2")]
4015    const unsafe fn test_mm_cmpgt_epi32() {
4016        let a = _mm_set_epi32(5, 0, 0, 0);
4017        let b = _mm_set1_epi32(0);
4018        let r = _mm_cmpgt_epi32(a, b);
4019        assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
4020    }
4021
4022    #[simd_test(enable = "sse2")]
4023    const unsafe fn test_mm_cmplt_epi8() {
4024        let a = _mm_set1_epi8(0);
4025        let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4026        let r = _mm_cmplt_epi8(a, b);
4027        let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4028        assert_eq_m128i(r, e);
4029    }
4030
4031    #[simd_test(enable = "sse2")]
4032    const unsafe fn test_mm_cmplt_epi16() {
4033        let a = _mm_set1_epi16(0);
4034        let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
4035        let r = _mm_cmplt_epi16(a, b);
4036        let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
4037        assert_eq_m128i(r, e);
4038    }
4039
4040    #[simd_test(enable = "sse2")]
4041    const unsafe fn test_mm_cmplt_epi32() {
4042        let a = _mm_set1_epi32(0);
4043        let b = _mm_set_epi32(5, 0, 0, 0);
4044        let r = _mm_cmplt_epi32(a, b);
4045        assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
4046    }
4047
4048    #[simd_test(enable = "sse2")]
4049    const unsafe fn test_mm_cvtepi32_pd() {
4050        let a = _mm_set_epi32(35, 25, 15, 5);
4051        let r = _mm_cvtepi32_pd(a);
4052        assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
4053    }
4054
4055    #[simd_test(enable = "sse2")]
4056    const unsafe fn test_mm_cvtsi32_sd() {
4057        let a = _mm_set1_pd(3.5);
4058        let r = _mm_cvtsi32_sd(a, 5);
4059        assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
4060    }
4061
4062    #[simd_test(enable = "sse2")]
4063    const unsafe fn test_mm_cvtepi32_ps() {
4064        let a = _mm_setr_epi32(1, 2, 3, 4);
4065        let r = _mm_cvtepi32_ps(a);
4066        assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
4067    }
4068
4069    #[simd_test(enable = "sse2")]
4070    unsafe fn test_mm_cvtps_epi32() {
4071        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
4072        let r = _mm_cvtps_epi32(a);
4073        assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
4074    }
4075
4076    #[simd_test(enable = "sse2")]
4077    const unsafe fn test_mm_cvtsi32_si128() {
4078        let r = _mm_cvtsi32_si128(5);
4079        assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
4080    }
4081
4082    #[simd_test(enable = "sse2")]
4083    const unsafe fn test_mm_cvtsi128_si32() {
4084        let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
4085        assert_eq!(r, 5);
4086    }
4087
4088    #[simd_test(enable = "sse2")]
4089    const unsafe fn test_mm_set_epi64x() {
4090        let r = _mm_set_epi64x(0, 1);
4091        assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
4092    }
4093
4094    #[simd_test(enable = "sse2")]
4095    const unsafe fn test_mm_set_epi32() {
4096        let r = _mm_set_epi32(0, 1, 2, 3);
4097        assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
4098    }
4099
4100    #[simd_test(enable = "sse2")]
4101    const unsafe fn test_mm_set_epi16() {
4102        let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4103        assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
4104    }
4105
4106    #[simd_test(enable = "sse2")]
4107    const unsafe fn test_mm_set_epi8() {
4108        #[rustfmt::skip]
4109        let r = _mm_set_epi8(
4110            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4111        );
4112        #[rustfmt::skip]
4113        let e = _mm_setr_epi8(
4114            15, 14, 13, 12, 11, 10, 9, 8,
4115            7, 6, 5, 4, 3, 2, 1, 0,
4116        );
4117        assert_eq_m128i(r, e);
4118    }
4119
4120    #[simd_test(enable = "sse2")]
4121    const unsafe fn test_mm_set1_epi64x() {
4122        let r = _mm_set1_epi64x(1);
4123        assert_eq_m128i(r, _mm_set1_epi64x(1));
4124    }
4125
4126    #[simd_test(enable = "sse2")]
4127    const unsafe fn test_mm_set1_epi32() {
4128        let r = _mm_set1_epi32(1);
4129        assert_eq_m128i(r, _mm_set1_epi32(1));
4130    }
4131
4132    #[simd_test(enable = "sse2")]
4133    const unsafe fn test_mm_set1_epi16() {
4134        let r = _mm_set1_epi16(1);
4135        assert_eq_m128i(r, _mm_set1_epi16(1));
4136    }
4137
4138    #[simd_test(enable = "sse2")]
4139    const unsafe fn test_mm_set1_epi8() {
4140        let r = _mm_set1_epi8(1);
4141        assert_eq_m128i(r, _mm_set1_epi8(1));
4142    }
4143
4144    #[simd_test(enable = "sse2")]
4145    const unsafe fn test_mm_setr_epi32() {
4146        let r = _mm_setr_epi32(0, 1, 2, 3);
4147        assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
4148    }
4149
4150    #[simd_test(enable = "sse2")]
4151    const unsafe fn test_mm_setr_epi16() {
4152        let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4153        assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
4154    }
4155
4156    #[simd_test(enable = "sse2")]
4157    const unsafe fn test_mm_setr_epi8() {
4158        #[rustfmt::skip]
4159        let r = _mm_setr_epi8(
4160            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4161        );
4162        #[rustfmt::skip]
4163        let e = _mm_setr_epi8(
4164            0, 1, 2, 3, 4, 5, 6, 7,
4165            8, 9, 10, 11, 12, 13, 14, 15,
4166        );
4167        assert_eq_m128i(r, e);
4168    }
4169
4170    #[simd_test(enable = "sse2")]
4171    const unsafe fn test_mm_setzero_si128() {
4172        let r = _mm_setzero_si128();
4173        assert_eq_m128i(r, _mm_set1_epi64x(0));
4174    }
4175
4176    #[simd_test(enable = "sse2")]
4177    const unsafe fn test_mm_loadl_epi64() {
4178        let a = _mm_setr_epi64x(6, 5);
4179        let r = _mm_loadl_epi64(ptr::addr_of!(a));
4180        assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
4181    }
4182
4183    #[simd_test(enable = "sse2")]
4184    const unsafe fn test_mm_load_si128() {
4185        let a = _mm_set_epi64x(5, 6);
4186        let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
4187        assert_eq_m128i(a, r);
4188    }
4189
4190    #[simd_test(enable = "sse2")]
4191    const unsafe fn test_mm_loadu_si128() {
4192        let a = _mm_set_epi64x(5, 6);
4193        let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
4194        assert_eq_m128i(a, r);
4195    }
4196
4197    #[simd_test(enable = "sse2")]
4198    // Miri cannot support this until it is clear how it fits in the Rust memory model
4199    // (non-temporal store)
4200    #[cfg_attr(miri, ignore)]
4201    unsafe fn test_mm_maskmoveu_si128() {
4202        let a = _mm_set1_epi8(9);
4203        #[rustfmt::skip]
4204        let mask = _mm_set_epi8(
4205            0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
4206            0, 0, 0, 0, 0, 0, 0, 0,
4207        );
4208        let mut r = _mm_set1_epi8(0);
4209        _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4210        _mm_sfence();
4211        let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4212        assert_eq_m128i(r, e);
4213    }
4214
4215    #[simd_test(enable = "sse2")]
4216    const unsafe fn test_mm_store_si128() {
4217        let a = _mm_set1_epi8(9);
4218        let mut r = _mm_set1_epi8(0);
4219        _mm_store_si128(&mut r, a);
4220        assert_eq_m128i(r, a);
4221    }
4222
4223    #[simd_test(enable = "sse2")]
4224    const unsafe fn test_mm_storeu_si128() {
4225        let a = _mm_set1_epi8(9);
4226        let mut r = _mm_set1_epi8(0);
4227        _mm_storeu_si128(&mut r, a);
4228        assert_eq_m128i(r, a);
4229    }
4230
4231    #[simd_test(enable = "sse2")]
4232    const unsafe fn test_mm_storel_epi64() {
4233        let a = _mm_setr_epi64x(2, 9);
4234        let mut r = _mm_set1_epi8(0);
4235        _mm_storel_epi64(&mut r, a);
4236        assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4237    }
4238
4239    #[simd_test(enable = "sse2")]
4240    // Miri cannot support this until it is clear how it fits in the Rust memory model
4241    // (non-temporal store)
4242    #[cfg_attr(miri, ignore)]
4243    unsafe fn test_mm_stream_si128() {
4244        let a = _mm_setr_epi32(1, 2, 3, 4);
4245        let mut r = _mm_undefined_si128();
4246        _mm_stream_si128(ptr::addr_of_mut!(r), a);
4247        _mm_sfence();
4248        assert_eq_m128i(r, a);
4249    }
4250
4251    #[simd_test(enable = "sse2")]
4252    // Miri cannot support this until it is clear how it fits in the Rust memory model
4253    // (non-temporal store)
4254    #[cfg_attr(miri, ignore)]
4255    unsafe fn test_mm_stream_si32() {
4256        let a: i32 = 7;
4257        let mut mem = boxed::Box::<i32>::new(-1);
4258        _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4259        _mm_sfence();
4260        assert_eq!(a, *mem);
4261    }
4262
4263    #[simd_test(enable = "sse2")]
4264    const unsafe fn test_mm_move_epi64() {
4265        let a = _mm_setr_epi64x(5, 6);
4266        let r = _mm_move_epi64(a);
4267        assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4268    }
4269
4270    #[simd_test(enable = "sse2")]
4271    unsafe fn test_mm_packs_epi16() {
4272        let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4273        let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4274        let r = _mm_packs_epi16(a, b);
4275        #[rustfmt::skip]
4276        assert_eq_m128i(
4277            r,
4278            _mm_setr_epi8(
4279                0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4280            )
4281        );
4282    }
4283
4284    #[simd_test(enable = "sse2")]
4285    unsafe fn test_mm_packs_epi32() {
4286        let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4287        let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4288        let r = _mm_packs_epi32(a, b);
4289        assert_eq_m128i(
4290            r,
4291            _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4292        );
4293    }
4294
4295    #[simd_test(enable = "sse2")]
4296    unsafe fn test_mm_packus_epi16() {
4297        let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4298        let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4299        let r = _mm_packus_epi16(a, b);
4300        assert_eq_m128i(
4301            r,
4302            _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4303        );
4304    }
4305
4306    #[simd_test(enable = "sse2")]
4307    const unsafe fn test_mm_extract_epi16() {
4308        let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4309        let r1 = _mm_extract_epi16::<0>(a);
4310        let r2 = _mm_extract_epi16::<3>(a);
4311        assert_eq!(r1, 0xFFFF);
4312        assert_eq!(r2, 3);
4313    }
4314
4315    #[simd_test(enable = "sse2")]
4316    const unsafe fn test_mm_insert_epi16() {
4317        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4318        let r = _mm_insert_epi16::<0>(a, 9);
4319        let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4320        assert_eq_m128i(r, e);
4321    }
4322
4323    #[simd_test(enable = "sse2")]
4324    const unsafe fn test_mm_movemask_epi8() {
4325        #[rustfmt::skip]
4326        let a = _mm_setr_epi8(
4327            0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4328            0b0101, 0b1111_0000u8 as i8, 0, 0,
4329            0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4330            0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4331        );
4332        let r = _mm_movemask_epi8(a);
4333        assert_eq!(r, 0b10100110_00100101);
4334    }
4335
4336    #[simd_test(enable = "sse2")]
4337    const unsafe fn test_mm_shuffle_epi32() {
4338        let a = _mm_setr_epi32(5, 10, 15, 20);
4339        let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4340        let e = _mm_setr_epi32(20, 10, 10, 5);
4341        assert_eq_m128i(r, e);
4342    }
4343
4344    #[simd_test(enable = "sse2")]
4345    const unsafe fn test_mm_shufflehi_epi16() {
4346        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4347        let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4348        let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4349        assert_eq_m128i(r, e);
4350    }
4351
4352    #[simd_test(enable = "sse2")]
4353    const unsafe fn test_mm_shufflelo_epi16() {
4354        let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4355        let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4356        let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4357        assert_eq_m128i(r, e);
4358    }
4359
4360    #[simd_test(enable = "sse2")]
4361    const unsafe fn test_mm_unpackhi_epi8() {
4362        #[rustfmt::skip]
4363        let a = _mm_setr_epi8(
4364            0, 1, 2, 3, 4, 5, 6, 7,
4365            8, 9, 10, 11, 12, 13, 14, 15,
4366        );
4367        #[rustfmt::skip]
4368        let b = _mm_setr_epi8(
4369            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4370        );
4371        let r = _mm_unpackhi_epi8(a, b);
4372        #[rustfmt::skip]
4373        let e = _mm_setr_epi8(
4374            8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4375        );
4376        assert_eq_m128i(r, e);
4377    }
4378
4379    #[simd_test(enable = "sse2")]
4380    const unsafe fn test_mm_unpackhi_epi16() {
4381        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4382        let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4383        let r = _mm_unpackhi_epi16(a, b);
4384        let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4385        assert_eq_m128i(r, e);
4386    }
4387
4388    #[simd_test(enable = "sse2")]
4389    const unsafe fn test_mm_unpackhi_epi32() {
4390        let a = _mm_setr_epi32(0, 1, 2, 3);
4391        let b = _mm_setr_epi32(4, 5, 6, 7);
4392        let r = _mm_unpackhi_epi32(a, b);
4393        let e = _mm_setr_epi32(2, 6, 3, 7);
4394        assert_eq_m128i(r, e);
4395    }
4396
4397    #[simd_test(enable = "sse2")]
4398    const unsafe fn test_mm_unpackhi_epi64() {
4399        let a = _mm_setr_epi64x(0, 1);
4400        let b = _mm_setr_epi64x(2, 3);
4401        let r = _mm_unpackhi_epi64(a, b);
4402        let e = _mm_setr_epi64x(1, 3);
4403        assert_eq_m128i(r, e);
4404    }
4405
4406    #[simd_test(enable = "sse2")]
4407    const unsafe fn test_mm_unpacklo_epi8() {
4408        #[rustfmt::skip]
4409        let a = _mm_setr_epi8(
4410            0, 1, 2, 3, 4, 5, 6, 7,
4411            8, 9, 10, 11, 12, 13, 14, 15,
4412        );
4413        #[rustfmt::skip]
4414        let b = _mm_setr_epi8(
4415            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4416        );
4417        let r = _mm_unpacklo_epi8(a, b);
4418        #[rustfmt::skip]
4419        let e = _mm_setr_epi8(
4420            0, 16, 1, 17, 2, 18, 3, 19,
4421            4, 20, 5, 21, 6, 22, 7, 23,
4422        );
4423        assert_eq_m128i(r, e);
4424    }
4425
4426    #[simd_test(enable = "sse2")]
4427    const unsafe fn test_mm_unpacklo_epi16() {
4428        let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4429        let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4430        let r = _mm_unpacklo_epi16(a, b);
4431        let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4432        assert_eq_m128i(r, e);
4433    }
4434
4435    #[simd_test(enable = "sse2")]
4436    const unsafe fn test_mm_unpacklo_epi32() {
4437        let a = _mm_setr_epi32(0, 1, 2, 3);
4438        let b = _mm_setr_epi32(4, 5, 6, 7);
4439        let r = _mm_unpacklo_epi32(a, b);
4440        let e = _mm_setr_epi32(0, 4, 1, 5);
4441        assert_eq_m128i(r, e);
4442    }
4443
4444    #[simd_test(enable = "sse2")]
4445    const unsafe fn test_mm_unpacklo_epi64() {
4446        let a = _mm_setr_epi64x(0, 1);
4447        let b = _mm_setr_epi64x(2, 3);
4448        let r = _mm_unpacklo_epi64(a, b);
4449        let e = _mm_setr_epi64x(0, 2);
4450        assert_eq_m128i(r, e);
4451    }
4452
4453    #[simd_test(enable = "sse2")]
4454    const unsafe fn test_mm_add_sd() {
4455        let a = _mm_setr_pd(1.0, 2.0);
4456        let b = _mm_setr_pd(5.0, 10.0);
4457        let r = _mm_add_sd(a, b);
4458        assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4459    }
4460
4461    #[simd_test(enable = "sse2")]
4462    const unsafe fn test_mm_add_pd() {
4463        let a = _mm_setr_pd(1.0, 2.0);
4464        let b = _mm_setr_pd(5.0, 10.0);
4465        let r = _mm_add_pd(a, b);
4466        assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4467    }
4468
4469    #[simd_test(enable = "sse2")]
4470    const unsafe fn test_mm_div_sd() {
4471        let a = _mm_setr_pd(1.0, 2.0);
4472        let b = _mm_setr_pd(5.0, 10.0);
4473        let r = _mm_div_sd(a, b);
4474        assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4475    }
4476
4477    #[simd_test(enable = "sse2")]
4478    const unsafe fn test_mm_div_pd() {
4479        let a = _mm_setr_pd(1.0, 2.0);
4480        let b = _mm_setr_pd(5.0, 10.0);
4481        let r = _mm_div_pd(a, b);
4482        assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4483    }
4484
4485    #[simd_test(enable = "sse2")]
4486    unsafe fn test_mm_max_sd() {
4487        let a = _mm_setr_pd(1.0, 2.0);
4488        let b = _mm_setr_pd(5.0, 10.0);
4489        let r = _mm_max_sd(a, b);
4490        assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4491    }
4492
4493    #[simd_test(enable = "sse2")]
4494    unsafe fn test_mm_max_pd() {
4495        let a = _mm_setr_pd(1.0, 2.0);
4496        let b = _mm_setr_pd(5.0, 10.0);
4497        let r = _mm_max_pd(a, b);
4498        assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4499
4500        // Check SSE(2)-specific semantics for -0.0 handling.
4501        let a = _mm_setr_pd(-0.0, 0.0);
4502        let b = _mm_setr_pd(0.0, 0.0);
4503        let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
4504        let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
4505        let a: [u8; 16] = transmute(a);
4506        let b: [u8; 16] = transmute(b);
4507        assert_eq!(r1, b);
4508        assert_eq!(r2, a);
4509        assert_ne!(a, b); // sanity check that -0.0 is actually present
4510    }
4511
4512    #[simd_test(enable = "sse2")]
4513    unsafe fn test_mm_min_sd() {
4514        let a = _mm_setr_pd(1.0, 2.0);
4515        let b = _mm_setr_pd(5.0, 10.0);
4516        let r = _mm_min_sd(a, b);
4517        assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4518    }
4519
4520    #[simd_test(enable = "sse2")]
4521    unsafe fn test_mm_min_pd() {
4522        let a = _mm_setr_pd(1.0, 2.0);
4523        let b = _mm_setr_pd(5.0, 10.0);
4524        let r = _mm_min_pd(a, b);
4525        assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4526
4527        // Check SSE(2)-specific semantics for -0.0 handling.
4528        let a = _mm_setr_pd(-0.0, 0.0);
4529        let b = _mm_setr_pd(0.0, 0.0);
4530        let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
4531        let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
4532        let a: [u8; 16] = transmute(a);
4533        let b: [u8; 16] = transmute(b);
4534        assert_eq!(r1, b);
4535        assert_eq!(r2, a);
4536        assert_ne!(a, b); // sanity check that -0.0 is actually present
4537    }
4538
4539    #[simd_test(enable = "sse2")]
4540    const unsafe fn test_mm_mul_sd() {
4541        let a = _mm_setr_pd(1.0, 2.0);
4542        let b = _mm_setr_pd(5.0, 10.0);
4543        let r = _mm_mul_sd(a, b);
4544        assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4545    }
4546
4547    #[simd_test(enable = "sse2")]
4548    const unsafe fn test_mm_mul_pd() {
4549        let a = _mm_setr_pd(1.0, 2.0);
4550        let b = _mm_setr_pd(5.0, 10.0);
4551        let r = _mm_mul_pd(a, b);
4552        assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4553    }
4554
4555    #[simd_test(enable = "sse2")]
4556    unsafe fn test_mm_sqrt_sd() {
4557        let a = _mm_setr_pd(1.0, 2.0);
4558        let b = _mm_setr_pd(5.0, 10.0);
4559        let r = _mm_sqrt_sd(a, b);
4560        assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4561    }
4562
4563    #[simd_test(enable = "sse2")]
4564    unsafe fn test_mm_sqrt_pd() {
4565        let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4566        assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4567    }
4568
4569    #[simd_test(enable = "sse2")]
4570    const unsafe fn test_mm_sub_sd() {
4571        let a = _mm_setr_pd(1.0, 2.0);
4572        let b = _mm_setr_pd(5.0, 10.0);
4573        let r = _mm_sub_sd(a, b);
4574        assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4575    }
4576
4577    #[simd_test(enable = "sse2")]
4578    const unsafe fn test_mm_sub_pd() {
4579        let a = _mm_setr_pd(1.0, 2.0);
4580        let b = _mm_setr_pd(5.0, 10.0);
4581        let r = _mm_sub_pd(a, b);
4582        assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4583    }
4584
4585    #[simd_test(enable = "sse2")]
4586    const unsafe fn test_mm_and_pd() {
4587        let a = transmute(u64x2::splat(5));
4588        let b = transmute(u64x2::splat(3));
4589        let r = _mm_and_pd(a, b);
4590        let e = transmute(u64x2::splat(1));
4591        assert_eq_m128d(r, e);
4592    }
4593
4594    #[simd_test(enable = "sse2")]
4595    const unsafe fn test_mm_andnot_pd() {
4596        let a = transmute(u64x2::splat(5));
4597        let b = transmute(u64x2::splat(3));
4598        let r = _mm_andnot_pd(a, b);
4599        let e = transmute(u64x2::splat(2));
4600        assert_eq_m128d(r, e);
4601    }
4602
4603    #[simd_test(enable = "sse2")]
4604    const unsafe fn test_mm_or_pd() {
4605        let a = transmute(u64x2::splat(5));
4606        let b = transmute(u64x2::splat(3));
4607        let r = _mm_or_pd(a, b);
4608        let e = transmute(u64x2::splat(7));
4609        assert_eq_m128d(r, e);
4610    }
4611
4612    #[simd_test(enable = "sse2")]
4613    const unsafe fn test_mm_xor_pd() {
4614        let a = transmute(u64x2::splat(5));
4615        let b = transmute(u64x2::splat(3));
4616        let r = _mm_xor_pd(a, b);
4617        let e = transmute(u64x2::splat(6));
4618        assert_eq_m128d(r, e);
4619    }
4620
4621    #[simd_test(enable = "sse2")]
4622    unsafe fn test_mm_cmpeq_sd() {
4623        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4624        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4625        let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4626        assert_eq_m128i(r, e);
4627    }
4628
4629    #[simd_test(enable = "sse2")]
4630    unsafe fn test_mm_cmplt_sd() {
4631        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4632        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4633        let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4634        assert_eq_m128i(r, e);
4635    }
4636
4637    #[simd_test(enable = "sse2")]
4638    unsafe fn test_mm_cmple_sd() {
4639        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4640        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4641        let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4642        assert_eq_m128i(r, e);
4643    }
4644
4645    #[simd_test(enable = "sse2")]
4646    unsafe fn test_mm_cmpgt_sd() {
4647        let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4648        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4649        let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4650        assert_eq_m128i(r, e);
4651    }
4652
4653    #[simd_test(enable = "sse2")]
4654    unsafe fn test_mm_cmpge_sd() {
4655        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4656        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4657        let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4658        assert_eq_m128i(r, e);
4659    }
4660
4661    #[simd_test(enable = "sse2")]
4662    unsafe fn test_mm_cmpord_sd() {
4663        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4664        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4665        let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4666        assert_eq_m128i(r, e);
4667    }
4668
4669    #[simd_test(enable = "sse2")]
4670    unsafe fn test_mm_cmpunord_sd() {
4671        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4672        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4673        let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4674        assert_eq_m128i(r, e);
4675    }
4676
4677    #[simd_test(enable = "sse2")]
4678    unsafe fn test_mm_cmpneq_sd() {
4679        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4680        let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4681        let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4682        assert_eq_m128i(r, e);
4683    }
4684
4685    #[simd_test(enable = "sse2")]
4686    unsafe fn test_mm_cmpnlt_sd() {
4687        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4688        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4689        let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4690        assert_eq_m128i(r, e);
4691    }
4692
4693    #[simd_test(enable = "sse2")]
4694    unsafe fn test_mm_cmpnle_sd() {
4695        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4696        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4697        let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4698        assert_eq_m128i(r, e);
4699    }
4700
4701    #[simd_test(enable = "sse2")]
4702    unsafe fn test_mm_cmpngt_sd() {
4703        let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4704        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4705        let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4706        assert_eq_m128i(r, e);
4707    }
4708
4709    #[simd_test(enable = "sse2")]
4710    unsafe fn test_mm_cmpnge_sd() {
4711        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4712        let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4713        let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4714        assert_eq_m128i(r, e);
4715    }
4716
4717    #[simd_test(enable = "sse2")]
4718    unsafe fn test_mm_cmpeq_pd() {
4719        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4720        let e = _mm_setr_epi64x(!0, 0);
4721        let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4722        assert_eq_m128i(r, e);
4723    }
4724
4725    #[simd_test(enable = "sse2")]
4726    unsafe fn test_mm_cmplt_pd() {
4727        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4728        let e = _mm_setr_epi64x(0, !0);
4729        let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4730        assert_eq_m128i(r, e);
4731    }
4732
4733    #[simd_test(enable = "sse2")]
4734    unsafe fn test_mm_cmple_pd() {
4735        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4736        let e = _mm_setr_epi64x(!0, !0);
4737        let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4738        assert_eq_m128i(r, e);
4739    }
4740
4741    #[simd_test(enable = "sse2")]
4742    unsafe fn test_mm_cmpgt_pd() {
4743        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4744        let e = _mm_setr_epi64x(0, 0);
4745        let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4746        assert_eq_m128i(r, e);
4747    }
4748
4749    #[simd_test(enable = "sse2")]
4750    unsafe fn test_mm_cmpge_pd() {
4751        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4752        let e = _mm_setr_epi64x(!0, 0);
4753        let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4754        assert_eq_m128i(r, e);
4755    }
4756
4757    #[simd_test(enable = "sse2")]
4758    unsafe fn test_mm_cmpord_pd() {
4759        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4760        let e = _mm_setr_epi64x(0, !0);
4761        let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4762        assert_eq_m128i(r, e);
4763    }
4764
4765    #[simd_test(enable = "sse2")]
4766    unsafe fn test_mm_cmpunord_pd() {
4767        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4768        let e = _mm_setr_epi64x(!0, 0);
4769        let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4770        assert_eq_m128i(r, e);
4771    }
4772
4773    #[simd_test(enable = "sse2")]
4774    unsafe fn test_mm_cmpneq_pd() {
4775        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4776        let e = _mm_setr_epi64x(!0, !0);
4777        let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4778        assert_eq_m128i(r, e);
4779    }
4780
4781    #[simd_test(enable = "sse2")]
4782    unsafe fn test_mm_cmpnlt_pd() {
4783        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4784        let e = _mm_setr_epi64x(0, 0);
4785        let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4786        assert_eq_m128i(r, e);
4787    }
4788
4789    #[simd_test(enable = "sse2")]
4790    unsafe fn test_mm_cmpnle_pd() {
4791        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4792        let e = _mm_setr_epi64x(0, 0);
4793        let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4794        assert_eq_m128i(r, e);
4795    }
4796
4797    #[simd_test(enable = "sse2")]
4798    unsafe fn test_mm_cmpngt_pd() {
4799        let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4800        let e = _mm_setr_epi64x(0, !0);
4801        let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4802        assert_eq_m128i(r, e);
4803    }
4804
4805    #[simd_test(enable = "sse2")]
4806    unsafe fn test_mm_cmpnge_pd() {
4807        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4808        let e = _mm_setr_epi64x(0, !0);
4809        let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4810        assert_eq_m128i(r, e);
4811    }
4812
4813    #[simd_test(enable = "sse2")]
4814    unsafe fn test_mm_comieq_sd() {
4815        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4816        assert!(_mm_comieq_sd(a, b) != 0);
4817
4818        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4819        assert!(_mm_comieq_sd(a, b) == 0);
4820    }
4821
4822    #[simd_test(enable = "sse2")]
4823    unsafe fn test_mm_comilt_sd() {
4824        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4825        assert!(_mm_comilt_sd(a, b) == 0);
4826    }
4827
4828    #[simd_test(enable = "sse2")]
4829    unsafe fn test_mm_comile_sd() {
4830        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4831        assert!(_mm_comile_sd(a, b) != 0);
4832    }
4833
4834    #[simd_test(enable = "sse2")]
4835    unsafe fn test_mm_comigt_sd() {
4836        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4837        assert!(_mm_comigt_sd(a, b) == 0);
4838    }
4839
4840    #[simd_test(enable = "sse2")]
4841    unsafe fn test_mm_comige_sd() {
4842        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4843        assert!(_mm_comige_sd(a, b) != 0);
4844    }
4845
4846    #[simd_test(enable = "sse2")]
4847    unsafe fn test_mm_comineq_sd() {
4848        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4849        assert!(_mm_comineq_sd(a, b) == 0);
4850    }
4851
4852    #[simd_test(enable = "sse2")]
4853    unsafe fn test_mm_ucomieq_sd() {
4854        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4855        assert!(_mm_ucomieq_sd(a, b) != 0);
4856
4857        let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4858        assert!(_mm_ucomieq_sd(a, b) == 0);
4859    }
4860
4861    #[simd_test(enable = "sse2")]
4862    unsafe fn test_mm_ucomilt_sd() {
4863        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4864        assert!(_mm_ucomilt_sd(a, b) == 0);
4865    }
4866
4867    #[simd_test(enable = "sse2")]
4868    unsafe fn test_mm_ucomile_sd() {
4869        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4870        assert!(_mm_ucomile_sd(a, b) != 0);
4871    }
4872
4873    #[simd_test(enable = "sse2")]
4874    unsafe fn test_mm_ucomigt_sd() {
4875        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4876        assert!(_mm_ucomigt_sd(a, b) == 0);
4877    }
4878
4879    #[simd_test(enable = "sse2")]
4880    unsafe fn test_mm_ucomige_sd() {
4881        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4882        assert!(_mm_ucomige_sd(a, b) != 0);
4883    }
4884
4885    #[simd_test(enable = "sse2")]
4886    unsafe fn test_mm_ucomineq_sd() {
4887        let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4888        assert!(_mm_ucomineq_sd(a, b) == 0);
4889    }
4890
4891    #[simd_test(enable = "sse2")]
4892    const unsafe fn test_mm_movemask_pd() {
4893        let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4894        assert_eq!(r, 0b01);
4895
4896        let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4897        assert_eq!(r, 0b11);
4898    }
4899
4900    #[repr(align(16))]
4901    struct Memory {
4902        data: [f64; 4],
4903    }
4904
4905    #[simd_test(enable = "sse2")]
4906    const unsafe fn test_mm_load_pd() {
4907        let mem = Memory {
4908            data: [1.0f64, 2.0, 3.0, 4.0],
4909        };
4910        let vals = &mem.data;
4911        let d = vals.as_ptr();
4912
4913        let r = _mm_load_pd(d);
4914        assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4915    }
4916
4917    #[simd_test(enable = "sse2")]
4918    const unsafe fn test_mm_load_sd() {
4919        let a = 1.;
4920        let expected = _mm_setr_pd(a, 0.);
4921        let r = _mm_load_sd(&a);
4922        assert_eq_m128d(r, expected);
4923    }
4924
4925    #[simd_test(enable = "sse2")]
4926    const unsafe fn test_mm_loadh_pd() {
4927        let a = _mm_setr_pd(1., 2.);
4928        let b = 3.;
4929        let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4930        let r = _mm_loadh_pd(a, &b);
4931        assert_eq_m128d(r, expected);
4932    }
4933
4934    #[simd_test(enable = "sse2")]
4935    const unsafe fn test_mm_loadl_pd() {
4936        let a = _mm_setr_pd(1., 2.);
4937        let b = 3.;
4938        let expected = _mm_setr_pd(3., get_m128d(a, 1));
4939        let r = _mm_loadl_pd(a, &b);
4940        assert_eq_m128d(r, expected);
4941    }
4942
4943    #[simd_test(enable = "sse2")]
4944    // Miri cannot support this until it is clear how it fits in the Rust memory model
4945    // (non-temporal store)
4946    #[cfg_attr(miri, ignore)]
4947    unsafe fn test_mm_stream_pd() {
4948        #[repr(align(128))]
4949        struct Memory {
4950            pub data: [f64; 2],
4951        }
4952        let a = _mm_set1_pd(7.0);
4953        let mut mem = Memory { data: [-1.0; 2] };
4954
4955        _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4956        _mm_sfence();
4957        for i in 0..2 {
4958            assert_eq!(mem.data[i], get_m128d(a, i));
4959        }
4960    }
4961
4962    #[simd_test(enable = "sse2")]
4963    const unsafe fn test_mm_store_sd() {
4964        let mut dest = 0.;
4965        let a = _mm_setr_pd(1., 2.);
4966        _mm_store_sd(&mut dest, a);
4967        assert_eq!(dest, _mm_cvtsd_f64(a));
4968    }
4969
4970    #[simd_test(enable = "sse2")]
4971    const unsafe fn test_mm_store_pd() {
4972        let mut mem = Memory { data: [0.0f64; 4] };
4973        let vals = &mut mem.data;
4974        let a = _mm_setr_pd(1.0, 2.0);
4975        let d = vals.as_mut_ptr();
4976
4977        _mm_store_pd(d, *black_box(&a));
4978        assert_eq!(vals[0], 1.0);
4979        assert_eq!(vals[1], 2.0);
4980    }
4981
4982    #[simd_test(enable = "sse2")]
4983    const unsafe fn test_mm_storeu_pd() {
4984        // guaranteed to be aligned to 16 bytes
4985        let mut mem = Memory { data: [0.0f64; 4] };
4986        let vals = &mut mem.data;
4987        let a = _mm_setr_pd(1.0, 2.0);
4988
4989        // so p is *not* aligned to 16 bytes
4990        let p = vals.as_mut_ptr().offset(1);
4991        _mm_storeu_pd(p, *black_box(&a));
4992
4993        assert_eq!(*vals, [0.0, 1.0, 2.0, 0.0]);
4994    }
4995
4996    #[simd_test(enable = "sse2")]
4997    const unsafe fn test_mm_storeu_si16() {
4998        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
4999        let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
5000        _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
5001        let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
5002        assert_eq_m128i(r, e);
5003    }
5004
5005    #[simd_test(enable = "sse2")]
5006    const unsafe fn test_mm_storeu_si32() {
5007        let a = _mm_setr_epi32(1, 2, 3, 4);
5008        let mut r = _mm_setr_epi32(5, 6, 7, 8);
5009        _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
5010        let e = _mm_setr_epi32(1, 6, 7, 8);
5011        assert_eq_m128i(r, e);
5012    }
5013
5014    #[simd_test(enable = "sse2")]
5015    const unsafe fn test_mm_storeu_si64() {
5016        let a = _mm_setr_epi64x(1, 2);
5017        let mut r = _mm_setr_epi64x(3, 4);
5018        _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
5019        let e = _mm_setr_epi64x(1, 4);
5020        assert_eq_m128i(r, e);
5021    }
5022
5023    #[simd_test(enable = "sse2")]
5024    const unsafe fn test_mm_store1_pd() {
5025        let mut mem = Memory { data: [0.0f64; 4] };
5026        let vals = &mut mem.data;
5027        let a = _mm_setr_pd(1.0, 2.0);
5028        let d = vals.as_mut_ptr();
5029
5030        _mm_store1_pd(d, *black_box(&a));
5031        assert_eq!(vals[0], 1.0);
5032        assert_eq!(vals[1], 1.0);
5033    }
5034
5035    #[simd_test(enable = "sse2")]
5036    const unsafe fn test_mm_store_pd1() {
5037        let mut mem = Memory { data: [0.0f64; 4] };
5038        let vals = &mut mem.data;
5039        let a = _mm_setr_pd(1.0, 2.0);
5040        let d = vals.as_mut_ptr();
5041
5042        _mm_store_pd1(d, *black_box(&a));
5043        assert_eq!(vals[0], 1.0);
5044        assert_eq!(vals[1], 1.0);
5045    }
5046
5047    #[simd_test(enable = "sse2")]
5048    const unsafe fn test_mm_storer_pd() {
5049        let mut mem = Memory { data: [0.0f64; 4] };
5050        let vals = &mut mem.data;
5051        let a = _mm_setr_pd(1.0, 2.0);
5052        let d = vals.as_mut_ptr();
5053
5054        _mm_storer_pd(d, *black_box(&a));
5055        assert_eq!(vals[0], 2.0);
5056        assert_eq!(vals[1], 1.0);
5057    }
5058
5059    #[simd_test(enable = "sse2")]
5060    const unsafe fn test_mm_storeh_pd() {
5061        let mut dest = 0.;
5062        let a = _mm_setr_pd(1., 2.);
5063        _mm_storeh_pd(&mut dest, a);
5064        assert_eq!(dest, get_m128d(a, 1));
5065    }
5066
5067    #[simd_test(enable = "sse2")]
5068    const unsafe fn test_mm_storel_pd() {
5069        let mut dest = 0.;
5070        let a = _mm_setr_pd(1., 2.);
5071        _mm_storel_pd(&mut dest, a);
5072        assert_eq!(dest, _mm_cvtsd_f64(a));
5073    }
5074
5075    #[simd_test(enable = "sse2")]
5076    const unsafe fn test_mm_loadr_pd() {
5077        let mut mem = Memory {
5078            data: [1.0f64, 2.0, 3.0, 4.0],
5079        };
5080        let vals = &mut mem.data;
5081        let d = vals.as_ptr();
5082
5083        let r = _mm_loadr_pd(d);
5084        assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
5085    }
5086
5087    #[simd_test(enable = "sse2")]
5088    const unsafe fn test_mm_loadu_pd() {
5089        // guaranteed to be aligned to 16 bytes
5090        let mut mem = Memory {
5091            data: [1.0f64, 2.0, 3.0, 4.0],
5092        };
5093        let vals = &mut mem.data;
5094
5095        // so this will *not* be aligned to 16 bytes
5096        let d = vals.as_ptr().offset(1);
5097
5098        let r = _mm_loadu_pd(d);
5099        let e = _mm_setr_pd(2.0, 3.0);
5100        assert_eq_m128d(r, e);
5101    }
5102
5103    #[simd_test(enable = "sse2")]
5104    const unsafe fn test_mm_loadu_si16() {
5105        let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
5106        let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
5107        assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
5108    }
5109
5110    #[simd_test(enable = "sse2")]
5111    const unsafe fn test_mm_loadu_si32() {
5112        let a = _mm_setr_epi32(1, 2, 3, 4);
5113        let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
5114        assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
5115    }
5116
5117    #[simd_test(enable = "sse2")]
5118    const unsafe fn test_mm_loadu_si64() {
5119        let a = _mm_setr_epi64x(5, 6);
5120        let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
5121        assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
5122    }
5123
5124    #[simd_test(enable = "sse2")]
5125    const unsafe fn test_mm_cvtpd_ps() {
5126        let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
5127        assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
5128
5129        let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
5130        assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
5131
5132        let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
5133        assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
5134
5135        let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
5136        assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
5137    }
5138
5139    #[simd_test(enable = "sse2")]
5140    const unsafe fn test_mm_cvtps_pd() {
5141        let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
5142        assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
5143
5144        let r = _mm_cvtps_pd(_mm_setr_ps(
5145            f32::MAX,
5146            f32::INFINITY,
5147            f32::NEG_INFINITY,
5148            f32::MIN,
5149        ));
5150        assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
5151    }
5152
5153    #[simd_test(enable = "sse2")]
5154    unsafe fn test_mm_cvtpd_epi32() {
5155        let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
5156        assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
5157
5158        let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
5159        assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
5160
5161        let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5162        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5163
5164        let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5165        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5166
5167        let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5168        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5169    }
5170
5171    #[simd_test(enable = "sse2")]
5172    unsafe fn test_mm_cvtsd_si32() {
5173        let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
5174        assert_eq!(r, -2);
5175
5176        let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5177        assert_eq!(r, i32::MIN);
5178
5179        let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5180        assert_eq!(r, i32::MIN);
5181    }
5182
5183    #[simd_test(enable = "sse2")]
5184    unsafe fn test_mm_cvtsd_ss() {
5185        let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
5186        let b = _mm_setr_pd(2.0, -5.0);
5187
5188        let r = _mm_cvtsd_ss(a, b);
5189
5190        assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
5191
5192        let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5193        let b = _mm_setr_pd(f64::INFINITY, -5.0);
5194
5195        let r = _mm_cvtsd_ss(a, b);
5196
5197        assert_eq_m128(
5198            r,
5199            _mm_setr_ps(
5200                f32::INFINITY,
5201                f32::NEG_INFINITY,
5202                f32::MAX,
5203                f32::NEG_INFINITY,
5204            ),
5205        );
5206    }
5207
5208    #[simd_test(enable = "sse2")]
5209    const unsafe fn test_mm_cvtsd_f64() {
5210        let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
5211        assert_eq!(r, -1.1);
5212    }
5213
5214    #[simd_test(enable = "sse2")]
5215    const unsafe fn test_mm_cvtss_sd() {
5216        let a = _mm_setr_pd(-1.1, 2.2);
5217        let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
5218
5219        let r = _mm_cvtss_sd(a, b);
5220        assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5221
5222        let a = _mm_setr_pd(-1.1, f64::INFINITY);
5223        let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5224
5225        let r = _mm_cvtss_sd(a, b);
5226        assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5227    }
5228
5229    #[simd_test(enable = "sse2")]
5230    unsafe fn test_mm_cvttpd_epi32() {
5231        let a = _mm_setr_pd(-1.1, 2.2);
5232        let r = _mm_cvttpd_epi32(a);
5233        assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5234
5235        let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5236        let r = _mm_cvttpd_epi32(a);
5237        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5238    }
5239
5240    #[simd_test(enable = "sse2")]
5241    unsafe fn test_mm_cvttsd_si32() {
5242        let a = _mm_setr_pd(-1.1, 2.2);
5243        let r = _mm_cvttsd_si32(a);
5244        assert_eq!(r, -1);
5245
5246        let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5247        let r = _mm_cvttsd_si32(a);
5248        assert_eq!(r, i32::MIN);
5249    }
5250
5251    #[simd_test(enable = "sse2")]
5252    unsafe fn test_mm_cvttps_epi32() {
5253        let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5254        let r = _mm_cvttps_epi32(a);
5255        assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5256
5257        let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5258        let r = _mm_cvttps_epi32(a);
5259        assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5260    }
5261
5262    #[simd_test(enable = "sse2")]
5263    const unsafe fn test_mm_set_sd() {
5264        let r = _mm_set_sd(-1.0_f64);
5265        assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5266    }
5267
5268    #[simd_test(enable = "sse2")]
5269    const unsafe fn test_mm_set1_pd() {
5270        let r = _mm_set1_pd(-1.0_f64);
5271        assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5272    }
5273
5274    #[simd_test(enable = "sse2")]
5275    const unsafe fn test_mm_set_pd1() {
5276        let r = _mm_set_pd1(-2.0_f64);
5277        assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5278    }
5279
5280    #[simd_test(enable = "sse2")]
5281    const unsafe fn test_mm_set_pd() {
5282        let r = _mm_set_pd(1.0_f64, 5.0_f64);
5283        assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5284    }
5285
5286    #[simd_test(enable = "sse2")]
5287    const unsafe fn test_mm_setr_pd() {
5288        let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5289        assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5290    }
5291
5292    #[simd_test(enable = "sse2")]
5293    const unsafe fn test_mm_setzero_pd() {
5294        let r = _mm_setzero_pd();
5295        assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5296    }
5297
5298    #[simd_test(enable = "sse2")]
5299    const unsafe fn test_mm_load1_pd() {
5300        let d = -5.0;
5301        let r = _mm_load1_pd(&d);
5302        assert_eq_m128d(r, _mm_setr_pd(d, d));
5303    }
5304
5305    #[simd_test(enable = "sse2")]
5306    const unsafe fn test_mm_load_pd1() {
5307        let d = -5.0;
5308        let r = _mm_load_pd1(&d);
5309        assert_eq_m128d(r, _mm_setr_pd(d, d));
5310    }
5311
5312    #[simd_test(enable = "sse2")]
5313    const unsafe fn test_mm_unpackhi_pd() {
5314        let a = _mm_setr_pd(1.0, 2.0);
5315        let b = _mm_setr_pd(3.0, 4.0);
5316        let r = _mm_unpackhi_pd(a, b);
5317        assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5318    }
5319
5320    #[simd_test(enable = "sse2")]
5321    const unsafe fn test_mm_unpacklo_pd() {
5322        let a = _mm_setr_pd(1.0, 2.0);
5323        let b = _mm_setr_pd(3.0, 4.0);
5324        let r = _mm_unpacklo_pd(a, b);
5325        assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5326    }
5327
5328    #[simd_test(enable = "sse2")]
5329    const unsafe fn test_mm_shuffle_pd() {
5330        let a = _mm_setr_pd(1., 2.);
5331        let b = _mm_setr_pd(3., 4.);
5332        let expected = _mm_setr_pd(1., 3.);
5333        let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5334        assert_eq_m128d(r, expected);
5335    }
5336
5337    #[simd_test(enable = "sse2")]
5338    const unsafe fn test_mm_move_sd() {
5339        let a = _mm_setr_pd(1., 2.);
5340        let b = _mm_setr_pd(3., 4.);
5341        let expected = _mm_setr_pd(3., 2.);
5342        let r = _mm_move_sd(a, b);
5343        assert_eq_m128d(r, expected);
5344    }
5345
5346    #[simd_test(enable = "sse2")]
5347    const unsafe fn test_mm_castpd_ps() {
5348        let a = _mm_set1_pd(0.);
5349        let expected = _mm_set1_ps(0.);
5350        let r = _mm_castpd_ps(a);
5351        assert_eq_m128(r, expected);
5352    }
5353
5354    #[simd_test(enable = "sse2")]
5355    const unsafe fn test_mm_castpd_si128() {
5356        let a = _mm_set1_pd(0.);
5357        let expected = _mm_set1_epi64x(0);
5358        let r = _mm_castpd_si128(a);
5359        assert_eq_m128i(r, expected);
5360    }
5361
5362    #[simd_test(enable = "sse2")]
5363    const unsafe fn test_mm_castps_pd() {
5364        let a = _mm_set1_ps(0.);
5365        let expected = _mm_set1_pd(0.);
5366        let r = _mm_castps_pd(a);
5367        assert_eq_m128d(r, expected);
5368    }
5369
5370    #[simd_test(enable = "sse2")]
5371    const unsafe fn test_mm_castps_si128() {
5372        let a = _mm_set1_ps(0.);
5373        let expected = _mm_set1_epi32(0);
5374        let r = _mm_castps_si128(a);
5375        assert_eq_m128i(r, expected);
5376    }
5377
5378    #[simd_test(enable = "sse2")]
5379    const unsafe fn test_mm_castsi128_pd() {
5380        let a = _mm_set1_epi64x(0);
5381        let expected = _mm_set1_pd(0.);
5382        let r = _mm_castsi128_pd(a);
5383        assert_eq_m128d(r, expected);
5384    }
5385
5386    #[simd_test(enable = "sse2")]
5387    const unsafe fn test_mm_castsi128_ps() {
5388        let a = _mm_set1_epi32(0);
5389        let expected = _mm_set1_ps(0.);
5390        let r = _mm_castsi128_ps(a);
5391        assert_eq_m128(r, expected);
5392    }
5393}