1#[cfg(test)]
4use stdarch_test::assert_instr;
5
6use crate::{
7 core_arch::{simd::*, x86::*},
8 intrinsics::simd::*,
9 intrinsics::sqrtf64,
10 mem, ptr,
11};
12
13#[inline]
20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21#[stable(feature = "simd_x86", since = "1.27.0")]
22pub fn _mm_pause() {
23 unsafe { pause() }
26}
27
28#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
35#[stable(feature = "simd_x86", since = "1.27.0")]
36pub unsafe fn _mm_clflush(p: *const u8) {
37 clflush(p)
38}
39
40#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
51#[stable(feature = "simd_x86", since = "1.27.0")]
52pub fn _mm_lfence() {
53 unsafe { lfence() }
54}
55
56#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
67#[stable(feature = "simd_x86", since = "1.27.0")]
68pub fn _mm_mfence() {
69 unsafe { mfence() }
70}
71
72#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
80pub const fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
81 unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) }
82}
83
84#[inline]
88#[target_feature(enable = "sse2")]
89#[cfg_attr(test, assert_instr(paddw))]
90#[stable(feature = "simd_x86", since = "1.27.0")]
91#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
92pub const fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
93 unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) }
94}
95
96#[inline]
100#[target_feature(enable = "sse2")]
101#[cfg_attr(test, assert_instr(paddd))]
102#[stable(feature = "simd_x86", since = "1.27.0")]
103#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
104pub const fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
105 unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) }
106}
107
108#[inline]
112#[target_feature(enable = "sse2")]
113#[cfg_attr(test, assert_instr(paddq))]
114#[stable(feature = "simd_x86", since = "1.27.0")]
115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
116pub const fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
117 unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) }
118}
119
120#[inline]
124#[target_feature(enable = "sse2")]
125#[cfg_attr(test, assert_instr(paddsb))]
126#[stable(feature = "simd_x86", since = "1.27.0")]
127#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
128pub const fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
129 unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) }
130}
131
132#[inline]
136#[target_feature(enable = "sse2")]
137#[cfg_attr(test, assert_instr(paddsw))]
138#[stable(feature = "simd_x86", since = "1.27.0")]
139#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
140pub const fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
141 unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) }
142}
143
144#[inline]
148#[target_feature(enable = "sse2")]
149#[cfg_attr(test, assert_instr(paddusb))]
150#[stable(feature = "simd_x86", since = "1.27.0")]
151#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
152pub const fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
153 unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) }
154}
155
156#[inline]
160#[target_feature(enable = "sse2")]
161#[cfg_attr(test, assert_instr(paddusw))]
162#[stable(feature = "simd_x86", since = "1.27.0")]
163#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
164pub const fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
165 unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) }
166}
167
168#[inline]
172#[target_feature(enable = "sse2")]
173#[cfg_attr(test, assert_instr(pavgb))]
174#[stable(feature = "simd_x86", since = "1.27.0")]
175#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
176pub const fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
177 unsafe {
178 let a = simd_cast::<_, u16x16>(a.as_u8x16());
179 let b = simd_cast::<_, u16x16>(b.as_u8x16());
180 let r = simd_shr(simd_add(simd_add(a, b), u16x16::splat(1)), u16x16::splat(1));
181 transmute(simd_cast::<_, u8x16>(r))
182 }
183}
184
185#[inline]
189#[target_feature(enable = "sse2")]
190#[cfg_attr(test, assert_instr(pavgw))]
191#[stable(feature = "simd_x86", since = "1.27.0")]
192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
193pub const fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
194 unsafe {
195 let a = simd_cast::<_, u32x8>(a.as_u16x8());
196 let b = simd_cast::<_, u32x8>(b.as_u16x8());
197 let r = simd_shr(simd_add(simd_add(a, b), u32x8::splat(1)), u32x8::splat(1));
198 transmute(simd_cast::<_, u16x8>(r))
199 }
200}
201
202#[inline]
210#[target_feature(enable = "sse2")]
211#[cfg_attr(test, assert_instr(pmaddwd))]
212#[stable(feature = "simd_x86", since = "1.27.0")]
213pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
214 unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) }
227}
228
229#[inline]
234#[target_feature(enable = "sse2")]
235#[cfg_attr(test, assert_instr(pmaxsw))]
236#[stable(feature = "simd_x86", since = "1.27.0")]
237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
238pub const fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
239 unsafe { simd_imax(a.as_i16x8(), b.as_i16x8()).as_m128i() }
240}
241
242#[inline]
247#[target_feature(enable = "sse2")]
248#[cfg_attr(test, assert_instr(pmaxub))]
249#[stable(feature = "simd_x86", since = "1.27.0")]
250#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
251pub const fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
252 unsafe { simd_imax(a.as_u8x16(), b.as_u8x16()).as_m128i() }
253}
254
255#[inline]
260#[target_feature(enable = "sse2")]
261#[cfg_attr(test, assert_instr(pminsw))]
262#[stable(feature = "simd_x86", since = "1.27.0")]
263#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
264pub const fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
265 unsafe { simd_imin(a.as_i16x8(), b.as_i16x8()).as_m128i() }
266}
267
268#[inline]
273#[target_feature(enable = "sse2")]
274#[cfg_attr(test, assert_instr(pminub))]
275#[stable(feature = "simd_x86", since = "1.27.0")]
276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
277pub const fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
278 unsafe { simd_imin(a.as_u8x16(), b.as_u8x16()).as_m128i() }
279}
280
281#[inline]
288#[target_feature(enable = "sse2")]
289#[cfg_attr(test, assert_instr(pmulhw))]
290#[stable(feature = "simd_x86", since = "1.27.0")]
291#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
292pub const fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
293 unsafe {
294 let a = simd_cast::<_, i32x8>(a.as_i16x8());
295 let b = simd_cast::<_, i32x8>(b.as_i16x8());
296 let r = simd_shr(simd_mul(a, b), i32x8::splat(16));
297 transmute(simd_cast::<i32x8, i16x8>(r))
298 }
299}
300
301#[inline]
308#[target_feature(enable = "sse2")]
309#[cfg_attr(test, assert_instr(pmulhuw))]
310#[stable(feature = "simd_x86", since = "1.27.0")]
311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
312pub const fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
313 unsafe {
314 let a = simd_cast::<_, u32x8>(a.as_u16x8());
315 let b = simd_cast::<_, u32x8>(b.as_u16x8());
316 let r = simd_shr(simd_mul(a, b), u32x8::splat(16));
317 transmute(simd_cast::<u32x8, u16x8>(r))
318 }
319}
320
321#[inline]
328#[target_feature(enable = "sse2")]
329#[cfg_attr(test, assert_instr(pmullw))]
330#[stable(feature = "simd_x86", since = "1.27.0")]
331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332pub const fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
333 unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) }
334}
335
336#[inline]
343#[target_feature(enable = "sse2")]
344#[cfg_attr(test, assert_instr(pmuludq))]
345#[stable(feature = "simd_x86", since = "1.27.0")]
346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
347pub const fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
348 unsafe {
349 let a = a.as_u64x2();
350 let b = b.as_u64x2();
351 let mask = u64x2::splat(u32::MAX as u64);
352 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
353 }
354}
355
356#[inline]
365#[target_feature(enable = "sse2")]
366#[cfg_attr(test, assert_instr(psadbw))]
367#[stable(feature = "simd_x86", since = "1.27.0")]
368pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
369 unsafe { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) }
370}
371
372#[inline]
376#[target_feature(enable = "sse2")]
377#[cfg_attr(test, assert_instr(psubb))]
378#[stable(feature = "simd_x86", since = "1.27.0")]
379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
380pub const fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
381 unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) }
382}
383
384#[inline]
388#[target_feature(enable = "sse2")]
389#[cfg_attr(test, assert_instr(psubw))]
390#[stable(feature = "simd_x86", since = "1.27.0")]
391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
392pub const fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
393 unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) }
394}
395
396#[inline]
400#[target_feature(enable = "sse2")]
401#[cfg_attr(test, assert_instr(psubd))]
402#[stable(feature = "simd_x86", since = "1.27.0")]
403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
404pub const fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
405 unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) }
406}
407
408#[inline]
412#[target_feature(enable = "sse2")]
413#[cfg_attr(test, assert_instr(psubq))]
414#[stable(feature = "simd_x86", since = "1.27.0")]
415#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
416pub const fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
417 unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) }
418}
419
420#[inline]
425#[target_feature(enable = "sse2")]
426#[cfg_attr(test, assert_instr(psubsb))]
427#[stable(feature = "simd_x86", since = "1.27.0")]
428#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
429pub const fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
430 unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) }
431}
432
433#[inline]
438#[target_feature(enable = "sse2")]
439#[cfg_attr(test, assert_instr(psubsw))]
440#[stable(feature = "simd_x86", since = "1.27.0")]
441#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
442pub const fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
443 unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) }
444}
445
446#[inline]
451#[target_feature(enable = "sse2")]
452#[cfg_attr(test, assert_instr(psubusb))]
453#[stable(feature = "simd_x86", since = "1.27.0")]
454#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
455pub const fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
456 unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) }
457}
458
459#[inline]
464#[target_feature(enable = "sse2")]
465#[cfg_attr(test, assert_instr(psubusw))]
466#[stable(feature = "simd_x86", since = "1.27.0")]
467#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
468pub const fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
469 unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) }
470}
471
472#[inline]
476#[target_feature(enable = "sse2")]
477#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
478#[rustc_legacy_const_generics(1)]
479#[stable(feature = "simd_x86", since = "1.27.0")]
480#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
481pub const fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
482 static_assert_uimm_bits!(IMM8, 8);
483 unsafe { _mm_slli_si128_impl::<IMM8>(a) }
484}
485
486#[inline]
489#[target_feature(enable = "sse2")]
490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
491const unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
492 const fn mask(shift: i32, i: u32) -> u32 {
493 let shift = shift as u32 & 0xff;
494 if shift > 15 { i } else { 16 - shift + i }
495 }
496 transmute::<i8x16, _>(simd_shuffle!(
497 i8x16::ZERO,
498 a.as_i8x16(),
499 [
500 mask(IMM8, 0),
501 mask(IMM8, 1),
502 mask(IMM8, 2),
503 mask(IMM8, 3),
504 mask(IMM8, 4),
505 mask(IMM8, 5),
506 mask(IMM8, 6),
507 mask(IMM8, 7),
508 mask(IMM8, 8),
509 mask(IMM8, 9),
510 mask(IMM8, 10),
511 mask(IMM8, 11),
512 mask(IMM8, 12),
513 mask(IMM8, 13),
514 mask(IMM8, 14),
515 mask(IMM8, 15),
516 ],
517 ))
518}
519
520#[inline]
524#[target_feature(enable = "sse2")]
525#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
526#[rustc_legacy_const_generics(1)]
527#[stable(feature = "simd_x86", since = "1.27.0")]
528#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
529pub const fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
530 unsafe {
531 static_assert_uimm_bits!(IMM8, 8);
532 _mm_slli_si128_impl::<IMM8>(a)
533 }
534}
535
536#[inline]
540#[target_feature(enable = "sse2")]
541#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
542#[rustc_legacy_const_generics(1)]
543#[stable(feature = "simd_x86", since = "1.27.0")]
544#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
545pub const fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
546 unsafe {
547 static_assert_uimm_bits!(IMM8, 8);
548 _mm_srli_si128_impl::<IMM8>(a)
549 }
550}
551
552#[inline]
556#[target_feature(enable = "sse2")]
557#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
558#[rustc_legacy_const_generics(1)]
559#[stable(feature = "simd_x86", since = "1.27.0")]
560#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
561pub const fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
562 static_assert_uimm_bits!(IMM8, 8);
563 unsafe {
564 if IMM8 >= 16 {
565 _mm_setzero_si128()
566 } else {
567 transmute(simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
568 }
569 }
570}
571
572#[inline]
577#[target_feature(enable = "sse2")]
578#[cfg_attr(test, assert_instr(psllw))]
579#[stable(feature = "simd_x86", since = "1.27.0")]
580pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
581 unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) }
582}
583
584#[inline]
588#[target_feature(enable = "sse2")]
589#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
590#[rustc_legacy_const_generics(1)]
591#[stable(feature = "simd_x86", since = "1.27.0")]
592#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
593pub const fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
594 static_assert_uimm_bits!(IMM8, 8);
595 unsafe {
596 if IMM8 >= 32 {
597 _mm_setzero_si128()
598 } else {
599 transmute(simd_shl(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
600 }
601 }
602}
603
604#[inline]
609#[target_feature(enable = "sse2")]
610#[cfg_attr(test, assert_instr(pslld))]
611#[stable(feature = "simd_x86", since = "1.27.0")]
612pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
613 unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) }
614}
615
616#[inline]
620#[target_feature(enable = "sse2")]
621#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
622#[rustc_legacy_const_generics(1)]
623#[stable(feature = "simd_x86", since = "1.27.0")]
624#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
625pub const fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
626 static_assert_uimm_bits!(IMM8, 8);
627 unsafe {
628 if IMM8 >= 64 {
629 _mm_setzero_si128()
630 } else {
631 transmute(simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
632 }
633 }
634}
635
636#[inline]
641#[target_feature(enable = "sse2")]
642#[cfg_attr(test, assert_instr(psllq))]
643#[stable(feature = "simd_x86", since = "1.27.0")]
644pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
645 unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) }
646}
647
648#[inline]
653#[target_feature(enable = "sse2")]
654#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
655#[rustc_legacy_const_generics(1)]
656#[stable(feature = "simd_x86", since = "1.27.0")]
657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
658pub const fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
659 static_assert_uimm_bits!(IMM8, 8);
660 unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) }
661}
662
663#[inline]
668#[target_feature(enable = "sse2")]
669#[cfg_attr(test, assert_instr(psraw))]
670#[stable(feature = "simd_x86", since = "1.27.0")]
671pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
672 unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) }
673}
674
675#[inline]
680#[target_feature(enable = "sse2")]
681#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
682#[rustc_legacy_const_generics(1)]
683#[stable(feature = "simd_x86", since = "1.27.0")]
684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
685pub const fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
686 static_assert_uimm_bits!(IMM8, 8);
687 unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) }
688}
689
690#[inline]
695#[target_feature(enable = "sse2")]
696#[cfg_attr(test, assert_instr(psrad))]
697#[stable(feature = "simd_x86", since = "1.27.0")]
698pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
699 unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) }
700}
701
702#[inline]
706#[target_feature(enable = "sse2")]
707#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
708#[rustc_legacy_const_generics(1)]
709#[stable(feature = "simd_x86", since = "1.27.0")]
710#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
711pub const fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
712 static_assert_uimm_bits!(IMM8, 8);
713 unsafe { _mm_srli_si128_impl::<IMM8>(a) }
714}
715
716#[inline]
719#[target_feature(enable = "sse2")]
720#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
721const unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
722 const fn mask(shift: i32, i: u32) -> u32 {
723 if (shift as u32) > 15 {
724 i + 16
725 } else {
726 i + (shift as u32)
727 }
728 }
729 let x: i8x16 = simd_shuffle!(
730 a.as_i8x16(),
731 i8x16::ZERO,
732 [
733 mask(IMM8, 0),
734 mask(IMM8, 1),
735 mask(IMM8, 2),
736 mask(IMM8, 3),
737 mask(IMM8, 4),
738 mask(IMM8, 5),
739 mask(IMM8, 6),
740 mask(IMM8, 7),
741 mask(IMM8, 8),
742 mask(IMM8, 9),
743 mask(IMM8, 10),
744 mask(IMM8, 11),
745 mask(IMM8, 12),
746 mask(IMM8, 13),
747 mask(IMM8, 14),
748 mask(IMM8, 15),
749 ],
750 );
751 transmute(x)
752}
753
754#[inline]
759#[target_feature(enable = "sse2")]
760#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
761#[rustc_legacy_const_generics(1)]
762#[stable(feature = "simd_x86", since = "1.27.0")]
763#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
764pub const fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
765 static_assert_uimm_bits!(IMM8, 8);
766 unsafe {
767 if IMM8 >= 16 {
768 _mm_setzero_si128()
769 } else {
770 transmute(simd_shr(a.as_u16x8(), u16x8::splat(IMM8 as u16)))
771 }
772 }
773}
774
775#[inline]
780#[target_feature(enable = "sse2")]
781#[cfg_attr(test, assert_instr(psrlw))]
782#[stable(feature = "simd_x86", since = "1.27.0")]
783pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
784 unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) }
785}
786
787#[inline]
792#[target_feature(enable = "sse2")]
793#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
794#[rustc_legacy_const_generics(1)]
795#[stable(feature = "simd_x86", since = "1.27.0")]
796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
797pub const fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
798 static_assert_uimm_bits!(IMM8, 8);
799 unsafe {
800 if IMM8 >= 32 {
801 _mm_setzero_si128()
802 } else {
803 transmute(simd_shr(a.as_u32x4(), u32x4::splat(IMM8 as u32)))
804 }
805 }
806}
807
808#[inline]
813#[target_feature(enable = "sse2")]
814#[cfg_attr(test, assert_instr(psrld))]
815#[stable(feature = "simd_x86", since = "1.27.0")]
816pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
817 unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) }
818}
819
820#[inline]
825#[target_feature(enable = "sse2")]
826#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
827#[rustc_legacy_const_generics(1)]
828#[stable(feature = "simd_x86", since = "1.27.0")]
829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
830pub const fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
831 static_assert_uimm_bits!(IMM8, 8);
832 unsafe {
833 if IMM8 >= 64 {
834 _mm_setzero_si128()
835 } else {
836 transmute(simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)))
837 }
838 }
839}
840
841#[inline]
846#[target_feature(enable = "sse2")]
847#[cfg_attr(test, assert_instr(psrlq))]
848#[stable(feature = "simd_x86", since = "1.27.0")]
849pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
850 unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) }
851}
852
853#[inline]
858#[target_feature(enable = "sse2")]
859#[cfg_attr(test, assert_instr(andps))]
860#[stable(feature = "simd_x86", since = "1.27.0")]
861#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
862pub const fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
863 unsafe { simd_and(a, b) }
864}
865
866#[inline]
871#[target_feature(enable = "sse2")]
872#[cfg_attr(test, assert_instr(andnps))]
873#[stable(feature = "simd_x86", since = "1.27.0")]
874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
875pub const fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
876 unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) }
877}
878
879#[inline]
884#[target_feature(enable = "sse2")]
885#[cfg_attr(test, assert_instr(orps))]
886#[stable(feature = "simd_x86", since = "1.27.0")]
887#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
888pub const fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
889 unsafe { simd_or(a, b) }
890}
891
892#[inline]
897#[target_feature(enable = "sse2")]
898#[cfg_attr(test, assert_instr(xorps))]
899#[stable(feature = "simd_x86", since = "1.27.0")]
900#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
901pub const fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
902 unsafe { simd_xor(a, b) }
903}
904
905#[inline]
909#[target_feature(enable = "sse2")]
910#[cfg_attr(test, assert_instr(pcmpeqb))]
911#[stable(feature = "simd_x86", since = "1.27.0")]
912#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
913pub const fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
914 unsafe { transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
915}
916
917#[inline]
921#[target_feature(enable = "sse2")]
922#[cfg_attr(test, assert_instr(pcmpeqw))]
923#[stable(feature = "simd_x86", since = "1.27.0")]
924#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
925pub const fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
926 unsafe { transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
927}
928
929#[inline]
933#[target_feature(enable = "sse2")]
934#[cfg_attr(test, assert_instr(pcmpeqd))]
935#[stable(feature = "simd_x86", since = "1.27.0")]
936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
937pub const fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
938 unsafe { transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
939}
940
941#[inline]
945#[target_feature(enable = "sse2")]
946#[cfg_attr(test, assert_instr(pcmpgtb))]
947#[stable(feature = "simd_x86", since = "1.27.0")]
948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
949pub const fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
950 unsafe { transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
951}
952
953#[inline]
957#[target_feature(enable = "sse2")]
958#[cfg_attr(test, assert_instr(pcmpgtw))]
959#[stable(feature = "simd_x86", since = "1.27.0")]
960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
961pub const fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
962 unsafe { transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
963}
964
965#[inline]
969#[target_feature(enable = "sse2")]
970#[cfg_attr(test, assert_instr(pcmpgtd))]
971#[stable(feature = "simd_x86", since = "1.27.0")]
972#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
973pub const fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
974 unsafe { transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
975}
976
977#[inline]
981#[target_feature(enable = "sse2")]
982#[cfg_attr(test, assert_instr(pcmpgtb))]
983#[stable(feature = "simd_x86", since = "1.27.0")]
984#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
985pub const fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
986 unsafe { transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
987}
988
989#[inline]
993#[target_feature(enable = "sse2")]
994#[cfg_attr(test, assert_instr(pcmpgtw))]
995#[stable(feature = "simd_x86", since = "1.27.0")]
996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
997pub const fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
998 unsafe { transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
999}
1000
1001#[inline]
1005#[target_feature(enable = "sse2")]
1006#[cfg_attr(test, assert_instr(pcmpgtd))]
1007#[stable(feature = "simd_x86", since = "1.27.0")]
1008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1009pub const fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
1010 unsafe { transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
1011}
1012
1013#[inline]
1018#[target_feature(enable = "sse2")]
1019#[cfg_attr(test, assert_instr(cvtdq2pd))]
1020#[stable(feature = "simd_x86", since = "1.27.0")]
1021#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1022pub const fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
1023 unsafe {
1024 let a = a.as_i32x4();
1025 simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
1026 }
1027}
1028
1029#[inline]
1034#[target_feature(enable = "sse2")]
1035#[cfg_attr(test, assert_instr(cvtsi2sd))]
1036#[stable(feature = "simd_x86", since = "1.27.0")]
1037#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1038pub const fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
1039 unsafe { simd_insert!(a, 0, b as f64) }
1040}
1041
1042#[inline]
1047#[target_feature(enable = "sse2")]
1048#[cfg_attr(test, assert_instr(cvtdq2ps))]
1049#[stable(feature = "simd_x86", since = "1.27.0")]
1050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1051pub const fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
1052 unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) }
1053}
1054
1055#[inline]
1060#[target_feature(enable = "sse2")]
1061#[cfg_attr(test, assert_instr(cvtps2dq))]
1062#[stable(feature = "simd_x86", since = "1.27.0")]
1063pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
1064 unsafe { transmute(cvtps2dq(a)) }
1065}
1066
1067#[inline]
1072#[target_feature(enable = "sse2")]
1073#[stable(feature = "simd_x86", since = "1.27.0")]
1074#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1075pub const fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1076 unsafe { transmute(i32x4::new(a, 0, 0, 0)) }
1077}
1078
1079#[inline]
1083#[target_feature(enable = "sse2")]
1084#[stable(feature = "simd_x86", since = "1.27.0")]
1085#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1086pub const fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1087 unsafe { simd_extract!(a.as_i32x4(), 0) }
1088}
1089
1090#[inline]
1095#[target_feature(enable = "sse2")]
1096#[stable(feature = "simd_x86", since = "1.27.0")]
1098#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1099pub const fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1100 unsafe { transmute(i64x2::new(e0, e1)) }
1101}
1102
1103#[inline]
1107#[target_feature(enable = "sse2")]
1108#[stable(feature = "simd_x86", since = "1.27.0")]
1110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1111pub const fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1112 unsafe { transmute(i32x4::new(e0, e1, e2, e3)) }
1113}
1114
1115#[inline]
1119#[target_feature(enable = "sse2")]
1120#[stable(feature = "simd_x86", since = "1.27.0")]
1122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1123pub const fn _mm_set_epi16(
1124 e7: i16,
1125 e6: i16,
1126 e5: i16,
1127 e4: i16,
1128 e3: i16,
1129 e2: i16,
1130 e1: i16,
1131 e0: i16,
1132) -> __m128i {
1133 unsafe { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) }
1134}
1135
1136#[inline]
1140#[target_feature(enable = "sse2")]
1141#[stable(feature = "simd_x86", since = "1.27.0")]
1143#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1144pub const fn _mm_set_epi8(
1145 e15: i8,
1146 e14: i8,
1147 e13: i8,
1148 e12: i8,
1149 e11: i8,
1150 e10: i8,
1151 e9: i8,
1152 e8: i8,
1153 e7: i8,
1154 e6: i8,
1155 e5: i8,
1156 e4: i8,
1157 e3: i8,
1158 e2: i8,
1159 e1: i8,
1160 e0: i8,
1161) -> __m128i {
1162 unsafe {
1163 #[rustfmt::skip]
1164 transmute(i8x16::new(
1165 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1166 ))
1167 }
1168}
1169
1170#[inline]
1174#[target_feature(enable = "sse2")]
1175#[stable(feature = "simd_x86", since = "1.27.0")]
1177#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1178pub const fn _mm_set1_epi64x(a: i64) -> __m128i {
1179 i64x2::splat(a).as_m128i()
1180}
1181
1182#[inline]
1186#[target_feature(enable = "sse2")]
1187#[stable(feature = "simd_x86", since = "1.27.0")]
1189#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1190pub const fn _mm_set1_epi32(a: i32) -> __m128i {
1191 i32x4::splat(a).as_m128i()
1192}
1193
1194#[inline]
1198#[target_feature(enable = "sse2")]
1199#[stable(feature = "simd_x86", since = "1.27.0")]
1201#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1202pub const fn _mm_set1_epi16(a: i16) -> __m128i {
1203 i16x8::splat(a).as_m128i()
1204}
1205
1206#[inline]
1210#[target_feature(enable = "sse2")]
1211#[stable(feature = "simd_x86", since = "1.27.0")]
1213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1214pub const fn _mm_set1_epi8(a: i8) -> __m128i {
1215 i8x16::splat(a).as_m128i()
1216}
1217
1218#[inline]
1222#[target_feature(enable = "sse2")]
1223#[stable(feature = "simd_x86", since = "1.27.0")]
1225#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1226pub const fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1227 _mm_set_epi32(e0, e1, e2, e3)
1228}
1229
1230#[inline]
1234#[target_feature(enable = "sse2")]
1235#[stable(feature = "simd_x86", since = "1.27.0")]
1237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1238pub const fn _mm_setr_epi16(
1239 e7: i16,
1240 e6: i16,
1241 e5: i16,
1242 e4: i16,
1243 e3: i16,
1244 e2: i16,
1245 e1: i16,
1246 e0: i16,
1247) -> __m128i {
1248 _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1249}
1250
1251#[inline]
1255#[target_feature(enable = "sse2")]
1256#[stable(feature = "simd_x86", since = "1.27.0")]
1258#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1259pub const fn _mm_setr_epi8(
1260 e15: i8,
1261 e14: i8,
1262 e13: i8,
1263 e12: i8,
1264 e11: i8,
1265 e10: i8,
1266 e9: i8,
1267 e8: i8,
1268 e7: i8,
1269 e6: i8,
1270 e5: i8,
1271 e4: i8,
1272 e3: i8,
1273 e2: i8,
1274 e1: i8,
1275 e0: i8,
1276) -> __m128i {
1277 #[rustfmt::skip]
1278 _mm_set_epi8(
1279 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1280 )
1281}
1282
1283#[inline]
1287#[target_feature(enable = "sse2")]
1288#[cfg_attr(test, assert_instr(xorps))]
1289#[stable(feature = "simd_x86", since = "1.27.0")]
1290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1291pub const fn _mm_setzero_si128() -> __m128i {
1292 const { unsafe { mem::zeroed() } }
1293}
1294
1295#[inline]
1299#[target_feature(enable = "sse2")]
1300#[stable(feature = "simd_x86", since = "1.27.0")]
1301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1302pub const unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1303 _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1304}
1305
1306#[inline]
1312#[target_feature(enable = "sse2")]
1313#[cfg_attr(
1314 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1315 assert_instr(movaps)
1316)]
1317#[stable(feature = "simd_x86", since = "1.27.0")]
1318#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1319pub const unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1320 *mem_addr
1321}
1322
1323#[inline]
1329#[target_feature(enable = "sse2")]
1330#[cfg_attr(test, assert_instr(movups))]
1331#[stable(feature = "simd_x86", since = "1.27.0")]
1332#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1333pub const unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1334 let mut dst: __m128i = _mm_undefined_si128();
1335 ptr::copy_nonoverlapping(
1336 mem_addr as *const u8,
1337 ptr::addr_of_mut!(dst) as *mut u8,
1338 mem::size_of::<__m128i>(),
1339 );
1340 dst
1341}
1342
1343#[inline]
1363#[target_feature(enable = "sse2")]
1364#[cfg_attr(test, assert_instr(maskmovdqu))]
1365#[stable(feature = "simd_x86", since = "1.27.0")]
1366pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1367 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1368}
1369
1370#[inline]
1376#[target_feature(enable = "sse2")]
1377#[cfg_attr(
1378 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1379 assert_instr(movaps)
1380)]
1381#[stable(feature = "simd_x86", since = "1.27.0")]
1382#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1383pub const unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1384 *mem_addr = a;
1385}
1386
1387#[inline]
1393#[target_feature(enable = "sse2")]
1394#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1397pub const unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1398 mem_addr.write_unaligned(a);
1399}
1400
1401#[inline]
1407#[target_feature(enable = "sse2")]
1408#[stable(feature = "simd_x86", since = "1.27.0")]
1409#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1410pub const unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1411 ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8);
1412}
1413
1414#[inline]
1429#[target_feature(enable = "sse2")]
1430#[cfg_attr(test, assert_instr(movntdq))]
1431#[stable(feature = "simd_x86", since = "1.27.0")]
1432pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1433 crate::arch::asm!(
1435 vps!("movntdq", ",{a}"),
1436 p = in(reg) mem_addr,
1437 a = in(xmm_reg) a,
1438 options(nostack, preserves_flags),
1439 );
1440}
1441
1442#[inline]
1457#[target_feature(enable = "sse2")]
1458#[cfg_attr(test, assert_instr(movnti))]
1459#[stable(feature = "simd_x86", since = "1.27.0")]
1460pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1461 crate::arch::asm!(
1463 vps!("movnti", ",{a:e}"), p = in(reg) mem_addr,
1465 a = in(reg) a,
1466 options(nostack, preserves_flags),
1467 );
1468}
1469
1470#[inline]
1475#[target_feature(enable = "sse2")]
1476#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movq))]
1478#[stable(feature = "simd_x86", since = "1.27.0")]
1479#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1480pub const fn _mm_move_epi64(a: __m128i) -> __m128i {
1481 unsafe {
1482 let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
1483 transmute(r)
1484 }
1485}
1486
1487#[inline]
1492#[target_feature(enable = "sse2")]
1493#[cfg_attr(test, assert_instr(packsswb))]
1494#[stable(feature = "simd_x86", since = "1.27.0")]
1495pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1496 unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) }
1497}
1498
1499#[inline]
1504#[target_feature(enable = "sse2")]
1505#[cfg_attr(test, assert_instr(packssdw))]
1506#[stable(feature = "simd_x86", since = "1.27.0")]
1507pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1508 unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) }
1509}
1510
1511#[inline]
1516#[target_feature(enable = "sse2")]
1517#[cfg_attr(test, assert_instr(packuswb))]
1518#[stable(feature = "simd_x86", since = "1.27.0")]
1519pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1520 unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) }
1521}
1522
1523#[inline]
1527#[target_feature(enable = "sse2")]
1528#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1529#[rustc_legacy_const_generics(1)]
1530#[stable(feature = "simd_x86", since = "1.27.0")]
1531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1532pub const fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1533 static_assert_uimm_bits!(IMM8, 3);
1534 unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
1535}
1536
1537#[inline]
1541#[target_feature(enable = "sse2")]
1542#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1543#[rustc_legacy_const_generics(2)]
1544#[stable(feature = "simd_x86", since = "1.27.0")]
1545#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1546pub const fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1547 static_assert_uimm_bits!(IMM8, 3);
1548 unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
1549}
1550
1551#[inline]
1555#[target_feature(enable = "sse2")]
1556#[cfg_attr(test, assert_instr(pmovmskb))]
1557#[stable(feature = "simd_x86", since = "1.27.0")]
1558#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1559pub const fn _mm_movemask_epi8(a: __m128i) -> i32 {
1560 unsafe {
1561 let z = i8x16::ZERO;
1562 let m: i8x16 = simd_lt(a.as_i8x16(), z);
1563 simd_bitmask::<_, u16>(m) as u32 as i32
1564 }
1565}
1566
1567#[inline]
1571#[target_feature(enable = "sse2")]
1572#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1573#[rustc_legacy_const_generics(1)]
1574#[stable(feature = "simd_x86", since = "1.27.0")]
1575#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1576pub const fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1577 static_assert_uimm_bits!(IMM8, 8);
1578 unsafe {
1579 let a = a.as_i32x4();
1580 let x: i32x4 = simd_shuffle!(
1581 a,
1582 a,
1583 [
1584 IMM8 as u32 & 0b11,
1585 (IMM8 as u32 >> 2) & 0b11,
1586 (IMM8 as u32 >> 4) & 0b11,
1587 (IMM8 as u32 >> 6) & 0b11,
1588 ],
1589 );
1590 transmute(x)
1591 }
1592}
1593
1594#[inline]
1602#[target_feature(enable = "sse2")]
1603#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1604#[rustc_legacy_const_generics(1)]
1605#[stable(feature = "simd_x86", since = "1.27.0")]
1606#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1607pub const fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1608 static_assert_uimm_bits!(IMM8, 8);
1609 unsafe {
1610 let a = a.as_i16x8();
1611 let x: i16x8 = simd_shuffle!(
1612 a,
1613 a,
1614 [
1615 0,
1616 1,
1617 2,
1618 3,
1619 (IMM8 as u32 & 0b11) + 4,
1620 ((IMM8 as u32 >> 2) & 0b11) + 4,
1621 ((IMM8 as u32 >> 4) & 0b11) + 4,
1622 ((IMM8 as u32 >> 6) & 0b11) + 4,
1623 ],
1624 );
1625 transmute(x)
1626 }
1627}
1628
1629#[inline]
1637#[target_feature(enable = "sse2")]
1638#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1639#[rustc_legacy_const_generics(1)]
1640#[stable(feature = "simd_x86", since = "1.27.0")]
1641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1642pub const fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1643 static_assert_uimm_bits!(IMM8, 8);
1644 unsafe {
1645 let a = a.as_i16x8();
1646 let x: i16x8 = simd_shuffle!(
1647 a,
1648 a,
1649 [
1650 IMM8 as u32 & 0b11,
1651 (IMM8 as u32 >> 2) & 0b11,
1652 (IMM8 as u32 >> 4) & 0b11,
1653 (IMM8 as u32 >> 6) & 0b11,
1654 4,
1655 5,
1656 6,
1657 7,
1658 ],
1659 );
1660 transmute(x)
1661 }
1662}
1663
1664#[inline]
1668#[target_feature(enable = "sse2")]
1669#[cfg_attr(test, assert_instr(punpckhbw))]
1670#[stable(feature = "simd_x86", since = "1.27.0")]
1671#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1672pub const fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1673 unsafe {
1674 transmute::<i8x16, _>(simd_shuffle!(
1675 a.as_i8x16(),
1676 b.as_i8x16(),
1677 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1678 ))
1679 }
1680}
1681
1682#[inline]
1686#[target_feature(enable = "sse2")]
1687#[cfg_attr(test, assert_instr(punpckhwd))]
1688#[stable(feature = "simd_x86", since = "1.27.0")]
1689#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1690pub const fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1691 unsafe {
1692 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1693 transmute::<i16x8, _>(x)
1694 }
1695}
1696
1697#[inline]
1701#[target_feature(enable = "sse2")]
1702#[cfg_attr(test, assert_instr(unpckhps))]
1703#[stable(feature = "simd_x86", since = "1.27.0")]
1704#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1705pub const fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1706 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
1707}
1708
1709#[inline]
1713#[target_feature(enable = "sse2")]
1714#[cfg_attr(test, assert_instr(unpckhpd))]
1715#[stable(feature = "simd_x86", since = "1.27.0")]
1716#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1717pub const fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1718 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
1719}
1720
1721#[inline]
1725#[target_feature(enable = "sse2")]
1726#[cfg_attr(test, assert_instr(punpcklbw))]
1727#[stable(feature = "simd_x86", since = "1.27.0")]
1728#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1729pub const fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1730 unsafe {
1731 transmute::<i8x16, _>(simd_shuffle!(
1732 a.as_i8x16(),
1733 b.as_i8x16(),
1734 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1735 ))
1736 }
1737}
1738
1739#[inline]
1743#[target_feature(enable = "sse2")]
1744#[cfg_attr(test, assert_instr(punpcklwd))]
1745#[stable(feature = "simd_x86", since = "1.27.0")]
1746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1747pub const fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1748 unsafe {
1749 let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1750 transmute::<i16x8, _>(x)
1751 }
1752}
1753
1754#[inline]
1758#[target_feature(enable = "sse2")]
1759#[cfg_attr(test, assert_instr(unpcklps))]
1760#[stable(feature = "simd_x86", since = "1.27.0")]
1761#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1762pub const fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1763 unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
1764}
1765
1766#[inline]
1770#[target_feature(enable = "sse2")]
1771#[cfg_attr(test, assert_instr(movlhps))]
1772#[stable(feature = "simd_x86", since = "1.27.0")]
1773#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1774pub const fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1775 unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
1776}
1777
1778#[inline]
1783#[target_feature(enable = "sse2")]
1784#[cfg_attr(test, assert_instr(addsd))]
1785#[stable(feature = "simd_x86", since = "1.27.0")]
1786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1787pub const fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1788 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
1789}
1790
1791#[inline]
1796#[target_feature(enable = "sse2")]
1797#[cfg_attr(test, assert_instr(addpd))]
1798#[stable(feature = "simd_x86", since = "1.27.0")]
1799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1800pub const fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1801 unsafe { simd_add(a, b) }
1802}
1803
1804#[inline]
1809#[target_feature(enable = "sse2")]
1810#[cfg_attr(test, assert_instr(divsd))]
1811#[stable(feature = "simd_x86", since = "1.27.0")]
1812#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1813pub const fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1814 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
1815}
1816
1817#[inline]
1822#[target_feature(enable = "sse2")]
1823#[cfg_attr(test, assert_instr(divpd))]
1824#[stable(feature = "simd_x86", since = "1.27.0")]
1825#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1826pub const fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1827 unsafe { simd_div(a, b) }
1828}
1829
1830#[inline]
1835#[target_feature(enable = "sse2")]
1836#[cfg_attr(test, assert_instr(maxsd))]
1837#[stable(feature = "simd_x86", since = "1.27.0")]
1838pub fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1839 unsafe { maxsd(a, b) }
1840}
1841
1842#[inline]
1847#[target_feature(enable = "sse2")]
1848#[cfg_attr(test, assert_instr(maxpd))]
1849#[stable(feature = "simd_x86", since = "1.27.0")]
1850pub fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1851 unsafe { maxpd(a, b) }
1852}
1853
1854#[inline]
1859#[target_feature(enable = "sse2")]
1860#[cfg_attr(test, assert_instr(minsd))]
1861#[stable(feature = "simd_x86", since = "1.27.0")]
1862pub fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1863 unsafe { minsd(a, b) }
1864}
1865
1866#[inline]
1871#[target_feature(enable = "sse2")]
1872#[cfg_attr(test, assert_instr(minpd))]
1873#[stable(feature = "simd_x86", since = "1.27.0")]
1874pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1875 unsafe { minpd(a, b) }
1876}
1877
1878#[inline]
1883#[target_feature(enable = "sse2")]
1884#[cfg_attr(test, assert_instr(mulsd))]
1885#[stable(feature = "simd_x86", since = "1.27.0")]
1886#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1887pub const fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1888 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
1889}
1890
1891#[inline]
1896#[target_feature(enable = "sse2")]
1897#[cfg_attr(test, assert_instr(mulpd))]
1898#[stable(feature = "simd_x86", since = "1.27.0")]
1899#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1900pub const fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1901 unsafe { simd_mul(a, b) }
1902}
1903
1904#[inline]
1909#[target_feature(enable = "sse2")]
1910#[cfg_attr(test, assert_instr(sqrtsd))]
1911#[stable(feature = "simd_x86", since = "1.27.0")]
1912pub fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1913 unsafe { simd_insert!(a, 0, sqrtf64(_mm_cvtsd_f64(b))) }
1914}
1915
1916#[inline]
1920#[target_feature(enable = "sse2")]
1921#[cfg_attr(test, assert_instr(sqrtpd))]
1922#[stable(feature = "simd_x86", since = "1.27.0")]
1923pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1924 unsafe { simd_fsqrt(a) }
1925}
1926
1927#[inline]
1932#[target_feature(enable = "sse2")]
1933#[cfg_attr(test, assert_instr(subsd))]
1934#[stable(feature = "simd_x86", since = "1.27.0")]
1935#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1936pub const fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1937 unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
1938}
1939
1940#[inline]
1945#[target_feature(enable = "sse2")]
1946#[cfg_attr(test, assert_instr(subpd))]
1947#[stable(feature = "simd_x86", since = "1.27.0")]
1948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1949pub const fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1950 unsafe { simd_sub(a, b) }
1951}
1952
1953#[inline]
1958#[target_feature(enable = "sse2")]
1959#[cfg_attr(test, assert_instr(andps))]
1960#[stable(feature = "simd_x86", since = "1.27.0")]
1961#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1962pub const fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1963 unsafe {
1964 let a: __m128i = transmute(a);
1965 let b: __m128i = transmute(b);
1966 transmute(_mm_and_si128(a, b))
1967 }
1968}
1969
1970#[inline]
1974#[target_feature(enable = "sse2")]
1975#[cfg_attr(test, assert_instr(andnps))]
1976#[stable(feature = "simd_x86", since = "1.27.0")]
1977#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1978pub const fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1979 unsafe {
1980 let a: __m128i = transmute(a);
1981 let b: __m128i = transmute(b);
1982 transmute(_mm_andnot_si128(a, b))
1983 }
1984}
1985
1986#[inline]
1990#[target_feature(enable = "sse2")]
1991#[cfg_attr(test, assert_instr(orps))]
1992#[stable(feature = "simd_x86", since = "1.27.0")]
1993#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1994pub const fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1995 unsafe {
1996 let a: __m128i = transmute(a);
1997 let b: __m128i = transmute(b);
1998 transmute(_mm_or_si128(a, b))
1999 }
2000}
2001
2002#[inline]
2006#[target_feature(enable = "sse2")]
2007#[cfg_attr(test, assert_instr(xorps))]
2008#[stable(feature = "simd_x86", since = "1.27.0")]
2009#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2010pub const fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
2011 unsafe {
2012 let a: __m128i = transmute(a);
2013 let b: __m128i = transmute(b);
2014 transmute(_mm_xor_si128(a, b))
2015 }
2016}
2017
2018#[inline]
2023#[target_feature(enable = "sse2")]
2024#[cfg_attr(test, assert_instr(cmpeqsd))]
2025#[stable(feature = "simd_x86", since = "1.27.0")]
2026pub fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
2027 unsafe { cmpsd(a, b, 0) }
2028}
2029
2030#[inline]
2035#[target_feature(enable = "sse2")]
2036#[cfg_attr(test, assert_instr(cmpltsd))]
2037#[stable(feature = "simd_x86", since = "1.27.0")]
2038pub fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
2039 unsafe { cmpsd(a, b, 1) }
2040}
2041
2042#[inline]
2047#[target_feature(enable = "sse2")]
2048#[cfg_attr(test, assert_instr(cmplesd))]
2049#[stable(feature = "simd_x86", since = "1.27.0")]
2050pub fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
2051 unsafe { cmpsd(a, b, 2) }
2052}
2053
2054#[inline]
2059#[target_feature(enable = "sse2")]
2060#[cfg_attr(test, assert_instr(cmpltsd))]
2061#[stable(feature = "simd_x86", since = "1.27.0")]
2062pub fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
2063 unsafe { simd_insert!(_mm_cmplt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2064}
2065
2066#[inline]
2071#[target_feature(enable = "sse2")]
2072#[cfg_attr(test, assert_instr(cmplesd))]
2073#[stable(feature = "simd_x86", since = "1.27.0")]
2074pub fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
2075 unsafe { simd_insert!(_mm_cmple_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2076}
2077
2078#[inline]
2085#[target_feature(enable = "sse2")]
2086#[cfg_attr(test, assert_instr(cmpordsd))]
2087#[stable(feature = "simd_x86", since = "1.27.0")]
2088pub fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
2089 unsafe { cmpsd(a, b, 7) }
2090}
2091
2092#[inline]
2098#[target_feature(enable = "sse2")]
2099#[cfg_attr(test, assert_instr(cmpunordsd))]
2100#[stable(feature = "simd_x86", since = "1.27.0")]
2101pub fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
2102 unsafe { cmpsd(a, b, 3) }
2103}
2104
2105#[inline]
2110#[target_feature(enable = "sse2")]
2111#[cfg_attr(test, assert_instr(cmpneqsd))]
2112#[stable(feature = "simd_x86", since = "1.27.0")]
2113pub fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2114 unsafe { cmpsd(a, b, 4) }
2115}
2116
2117#[inline]
2122#[target_feature(enable = "sse2")]
2123#[cfg_attr(test, assert_instr(cmpnltsd))]
2124#[stable(feature = "simd_x86", since = "1.27.0")]
2125pub fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2126 unsafe { cmpsd(a, b, 5) }
2127}
2128
2129#[inline]
2134#[target_feature(enable = "sse2")]
2135#[cfg_attr(test, assert_instr(cmpnlesd))]
2136#[stable(feature = "simd_x86", since = "1.27.0")]
2137pub fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2138 unsafe { cmpsd(a, b, 6) }
2139}
2140
2141#[inline]
2146#[target_feature(enable = "sse2")]
2147#[cfg_attr(test, assert_instr(cmpnltsd))]
2148#[stable(feature = "simd_x86", since = "1.27.0")]
2149pub fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2150 unsafe { simd_insert!(_mm_cmpnlt_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2151}
2152
2153#[inline]
2158#[target_feature(enable = "sse2")]
2159#[cfg_attr(test, assert_instr(cmpnlesd))]
2160#[stable(feature = "simd_x86", since = "1.27.0")]
2161pub fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2162 unsafe { simd_insert!(_mm_cmpnle_sd(b, a), 1, simd_extract!(a, 1, f64)) }
2163}
2164
2165#[inline]
2169#[target_feature(enable = "sse2")]
2170#[cfg_attr(test, assert_instr(cmpeqpd))]
2171#[stable(feature = "simd_x86", since = "1.27.0")]
2172pub fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2173 unsafe { cmppd(a, b, 0) }
2174}
2175
2176#[inline]
2180#[target_feature(enable = "sse2")]
2181#[cfg_attr(test, assert_instr(cmpltpd))]
2182#[stable(feature = "simd_x86", since = "1.27.0")]
2183pub fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2184 unsafe { cmppd(a, b, 1) }
2185}
2186
2187#[inline]
2191#[target_feature(enable = "sse2")]
2192#[cfg_attr(test, assert_instr(cmplepd))]
2193#[stable(feature = "simd_x86", since = "1.27.0")]
2194pub fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2195 unsafe { cmppd(a, b, 2) }
2196}
2197
2198#[inline]
2202#[target_feature(enable = "sse2")]
2203#[cfg_attr(test, assert_instr(cmpltpd))]
2204#[stable(feature = "simd_x86", since = "1.27.0")]
2205pub fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2206 _mm_cmplt_pd(b, a)
2207}
2208
2209#[inline]
2213#[target_feature(enable = "sse2")]
2214#[cfg_attr(test, assert_instr(cmplepd))]
2215#[stable(feature = "simd_x86", since = "1.27.0")]
2216pub fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2217 _mm_cmple_pd(b, a)
2218}
2219
2220#[inline]
2224#[target_feature(enable = "sse2")]
2225#[cfg_attr(test, assert_instr(cmpordpd))]
2226#[stable(feature = "simd_x86", since = "1.27.0")]
2227pub fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2228 unsafe { cmppd(a, b, 7) }
2229}
2230
2231#[inline]
2235#[target_feature(enable = "sse2")]
2236#[cfg_attr(test, assert_instr(cmpunordpd))]
2237#[stable(feature = "simd_x86", since = "1.27.0")]
2238pub fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2239 unsafe { cmppd(a, b, 3) }
2240}
2241
2242#[inline]
2246#[target_feature(enable = "sse2")]
2247#[cfg_attr(test, assert_instr(cmpneqpd))]
2248#[stable(feature = "simd_x86", since = "1.27.0")]
2249pub fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2250 unsafe { cmppd(a, b, 4) }
2251}
2252
2253#[inline]
2257#[target_feature(enable = "sse2")]
2258#[cfg_attr(test, assert_instr(cmpnltpd))]
2259#[stable(feature = "simd_x86", since = "1.27.0")]
2260pub fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2261 unsafe { cmppd(a, b, 5) }
2262}
2263
2264#[inline]
2268#[target_feature(enable = "sse2")]
2269#[cfg_attr(test, assert_instr(cmpnlepd))]
2270#[stable(feature = "simd_x86", since = "1.27.0")]
2271pub fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2272 unsafe { cmppd(a, b, 6) }
2273}
2274
2275#[inline]
2279#[target_feature(enable = "sse2")]
2280#[cfg_attr(test, assert_instr(cmpnltpd))]
2281#[stable(feature = "simd_x86", since = "1.27.0")]
2282pub fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2283 _mm_cmpnlt_pd(b, a)
2284}
2285
2286#[inline]
2291#[target_feature(enable = "sse2")]
2292#[cfg_attr(test, assert_instr(cmpnlepd))]
2293#[stable(feature = "simd_x86", since = "1.27.0")]
2294pub fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2295 _mm_cmpnle_pd(b, a)
2296}
2297
2298#[inline]
2302#[target_feature(enable = "sse2")]
2303#[cfg_attr(test, assert_instr(comisd))]
2304#[stable(feature = "simd_x86", since = "1.27.0")]
2305pub fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2306 unsafe { comieqsd(a, b) }
2307}
2308
2309#[inline]
2313#[target_feature(enable = "sse2")]
2314#[cfg_attr(test, assert_instr(comisd))]
2315#[stable(feature = "simd_x86", since = "1.27.0")]
2316pub fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2317 unsafe { comiltsd(a, b) }
2318}
2319
2320#[inline]
2324#[target_feature(enable = "sse2")]
2325#[cfg_attr(test, assert_instr(comisd))]
2326#[stable(feature = "simd_x86", since = "1.27.0")]
2327pub fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2328 unsafe { comilesd(a, b) }
2329}
2330
2331#[inline]
2335#[target_feature(enable = "sse2")]
2336#[cfg_attr(test, assert_instr(comisd))]
2337#[stable(feature = "simd_x86", since = "1.27.0")]
2338pub fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2339 unsafe { comigtsd(a, b) }
2340}
2341
2342#[inline]
2346#[target_feature(enable = "sse2")]
2347#[cfg_attr(test, assert_instr(comisd))]
2348#[stable(feature = "simd_x86", since = "1.27.0")]
2349pub fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2350 unsafe { comigesd(a, b) }
2351}
2352
2353#[inline]
2357#[target_feature(enable = "sse2")]
2358#[cfg_attr(test, assert_instr(comisd))]
2359#[stable(feature = "simd_x86", since = "1.27.0")]
2360pub fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2361 unsafe { comineqsd(a, b) }
2362}
2363
2364#[inline]
2368#[target_feature(enable = "sse2")]
2369#[cfg_attr(test, assert_instr(ucomisd))]
2370#[stable(feature = "simd_x86", since = "1.27.0")]
2371pub fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2372 unsafe { ucomieqsd(a, b) }
2373}
2374
2375#[inline]
2379#[target_feature(enable = "sse2")]
2380#[cfg_attr(test, assert_instr(ucomisd))]
2381#[stable(feature = "simd_x86", since = "1.27.0")]
2382pub fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2383 unsafe { ucomiltsd(a, b) }
2384}
2385
2386#[inline]
2390#[target_feature(enable = "sse2")]
2391#[cfg_attr(test, assert_instr(ucomisd))]
2392#[stable(feature = "simd_x86", since = "1.27.0")]
2393pub fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2394 unsafe { ucomilesd(a, b) }
2395}
2396
2397#[inline]
2401#[target_feature(enable = "sse2")]
2402#[cfg_attr(test, assert_instr(ucomisd))]
2403#[stable(feature = "simd_x86", since = "1.27.0")]
2404pub fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2405 unsafe { ucomigtsd(a, b) }
2406}
2407
2408#[inline]
2412#[target_feature(enable = "sse2")]
2413#[cfg_attr(test, assert_instr(ucomisd))]
2414#[stable(feature = "simd_x86", since = "1.27.0")]
2415pub fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2416 unsafe { ucomigesd(a, b) }
2417}
2418
2419#[inline]
2423#[target_feature(enable = "sse2")]
2424#[cfg_attr(test, assert_instr(ucomisd))]
2425#[stable(feature = "simd_x86", since = "1.27.0")]
2426pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2427 unsafe { ucomineqsd(a, b) }
2428}
2429
2430#[inline]
2435#[target_feature(enable = "sse2")]
2436#[cfg_attr(test, assert_instr(cvtpd2ps))]
2437#[stable(feature = "simd_x86", since = "1.27.0")]
2438#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2439pub const fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2440 unsafe {
2441 let r = simd_cast::<_, f32x2>(a.as_f64x2());
2442 let zero = f32x2::ZERO;
2443 transmute::<f32x4, _>(simd_shuffle!(r, zero, [0, 1, 2, 3]))
2444 }
2445}
2446
2447#[inline]
2453#[target_feature(enable = "sse2")]
2454#[cfg_attr(test, assert_instr(cvtps2pd))]
2455#[stable(feature = "simd_x86", since = "1.27.0")]
2456#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2457pub const fn _mm_cvtps_pd(a: __m128) -> __m128d {
2458 unsafe {
2459 let a = a.as_f32x4();
2460 transmute(simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
2461 }
2462}
2463
2464#[inline]
2469#[target_feature(enable = "sse2")]
2470#[cfg_attr(test, assert_instr(cvtpd2dq))]
2471#[stable(feature = "simd_x86", since = "1.27.0")]
2472pub fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2473 unsafe { transmute(cvtpd2dq(a)) }
2474}
2475
2476#[inline]
2481#[target_feature(enable = "sse2")]
2482#[cfg_attr(test, assert_instr(cvtsd2si))]
2483#[stable(feature = "simd_x86", since = "1.27.0")]
2484pub fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2485 unsafe { cvtsd2si(a) }
2486}
2487
2488#[inline]
2495#[target_feature(enable = "sse2")]
2496#[cfg_attr(test, assert_instr(cvtsd2ss))]
2497#[stable(feature = "simd_x86", since = "1.27.0")]
2498pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2499 unsafe { cvtsd2ss(a, b) }
2500}
2501
2502#[inline]
2506#[target_feature(enable = "sse2")]
2507#[stable(feature = "simd_x86", since = "1.27.0")]
2508#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2509pub const fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2510 unsafe { simd_extract!(a, 0) }
2511}
2512
2513#[inline]
2520#[target_feature(enable = "sse2")]
2521#[cfg_attr(test, assert_instr(cvtss2sd))]
2522#[stable(feature = "simd_x86", since = "1.27.0")]
2523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2524pub const fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2525 unsafe {
2526 let elt: f32 = simd_extract!(b, 0);
2527 simd_insert!(a, 0, elt as f64)
2528 }
2529}
2530
2531#[inline]
2536#[target_feature(enable = "sse2")]
2537#[cfg_attr(test, assert_instr(cvttpd2dq))]
2538#[stable(feature = "simd_x86", since = "1.27.0")]
2539pub fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2540 unsafe { transmute(cvttpd2dq(a)) }
2541}
2542
2543#[inline]
2548#[target_feature(enable = "sse2")]
2549#[cfg_attr(test, assert_instr(cvttsd2si))]
2550#[stable(feature = "simd_x86", since = "1.27.0")]
2551pub fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2552 unsafe { cvttsd2si(a) }
2553}
2554
2555#[inline]
2560#[target_feature(enable = "sse2")]
2561#[cfg_attr(test, assert_instr(cvttps2dq))]
2562#[stable(feature = "simd_x86", since = "1.27.0")]
2563pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2564 unsafe { transmute(cvttps2dq(a)) }
2565}
2566
2567#[inline]
2572#[target_feature(enable = "sse2")]
2573#[stable(feature = "simd_x86", since = "1.27.0")]
2574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2575pub const fn _mm_set_sd(a: f64) -> __m128d {
2576 _mm_set_pd(0.0, a)
2577}
2578
2579#[inline]
2584#[target_feature(enable = "sse2")]
2585#[stable(feature = "simd_x86", since = "1.27.0")]
2586#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2587pub const fn _mm_set1_pd(a: f64) -> __m128d {
2588 _mm_set_pd(a, a)
2589}
2590
2591#[inline]
2596#[target_feature(enable = "sse2")]
2597#[stable(feature = "simd_x86", since = "1.27.0")]
2598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2599pub const fn _mm_set_pd1(a: f64) -> __m128d {
2600 _mm_set_pd(a, a)
2601}
2602
2603#[inline]
2608#[target_feature(enable = "sse2")]
2609#[stable(feature = "simd_x86", since = "1.27.0")]
2610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2611pub const fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2612 __m128d([b, a])
2613}
2614
2615#[inline]
2620#[target_feature(enable = "sse2")]
2621#[stable(feature = "simd_x86", since = "1.27.0")]
2622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2623pub const fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2624 _mm_set_pd(b, a)
2625}
2626
2627#[inline]
2632#[target_feature(enable = "sse2")]
2633#[cfg_attr(test, assert_instr(xorp))]
2634#[stable(feature = "simd_x86", since = "1.27.0")]
2635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2636pub const fn _mm_setzero_pd() -> __m128d {
2637 const { unsafe { mem::zeroed() } }
2638}
2639
2640#[inline]
2647#[target_feature(enable = "sse2")]
2648#[cfg_attr(test, assert_instr(movmskpd))]
2649#[stable(feature = "simd_x86", since = "1.27.0")]
2650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2651pub const fn _mm_movemask_pd(a: __m128d) -> i32 {
2652 unsafe {
2655 let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO);
2656 simd_bitmask::<i64x2, u8>(mask) as i32
2657 }
2658}
2659
2660#[inline]
2667#[target_feature(enable = "sse2")]
2668#[cfg_attr(
2669 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2670 assert_instr(movaps)
2671)]
2672#[stable(feature = "simd_x86", since = "1.27.0")]
2673#[allow(clippy::cast_ptr_alignment)]
2674#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2675pub const unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2676 *(mem_addr as *const __m128d)
2677}
2678
2679#[inline]
2684#[target_feature(enable = "sse2")]
2685#[cfg_attr(test, assert_instr(movsd))]
2686#[stable(feature = "simd_x86", since = "1.27.0")]
2687#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2688pub const unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2689 _mm_setr_pd(*mem_addr, 0.)
2690}
2691
2692#[inline]
2698#[target_feature(enable = "sse2")]
2699#[cfg_attr(test, assert_instr(movhps))]
2700#[stable(feature = "simd_x86", since = "1.27.0")]
2701#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2702pub const unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2703 _mm_setr_pd(simd_extract!(a, 0), *mem_addr)
2704}
2705
2706#[inline]
2712#[target_feature(enable = "sse2")]
2713#[cfg_attr(test, assert_instr(movlps))]
2714#[stable(feature = "simd_x86", since = "1.27.0")]
2715#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2716pub const unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2717 _mm_setr_pd(*mem_addr, simd_extract!(a, 1))
2718}
2719
2720#[inline]
2736#[target_feature(enable = "sse2")]
2737#[cfg_attr(test, assert_instr(movntpd))]
2738#[stable(feature = "simd_x86", since = "1.27.0")]
2739#[allow(clippy::cast_ptr_alignment)]
2740pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2741 crate::arch::asm!(
2743 vps!("movntpd", ",{a}"),
2744 p = in(reg) mem_addr,
2745 a = in(xmm_reg) a,
2746 options(nostack, preserves_flags),
2747 );
2748}
2749
2750#[inline]
2755#[target_feature(enable = "sse2")]
2756#[cfg_attr(test, assert_instr(movlps))]
2757#[stable(feature = "simd_x86", since = "1.27.0")]
2758#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2759pub const unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2760 *mem_addr = simd_extract!(a, 0)
2761}
2762
2763#[inline]
2769#[target_feature(enable = "sse2")]
2770#[cfg_attr(
2771 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2772 assert_instr(movaps)
2773)]
2774#[stable(feature = "simd_x86", since = "1.27.0")]
2775#[allow(clippy::cast_ptr_alignment)]
2776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2777pub const unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2778 *(mem_addr as *mut __m128d) = a;
2779}
2780
2781#[inline]
2787#[target_feature(enable = "sse2")]
2788#[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")]
2790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2791pub const unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2792 mem_addr.cast::<__m128d>().write_unaligned(a);
2793}
2794
2795#[inline]
2801#[target_feature(enable = "sse2")]
2802#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2803#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2804pub const unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
2805 ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0))
2806}
2807
2808#[inline]
2814#[target_feature(enable = "sse2")]
2815#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2817pub const unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
2818 ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0))
2819}
2820
2821#[inline]
2827#[target_feature(enable = "sse2")]
2828#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2830pub const unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
2831 ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0))
2832}
2833
2834#[inline]
2840#[target_feature(enable = "sse2")]
2841#[stable(feature = "simd_x86", since = "1.27.0")]
2842#[allow(clippy::cast_ptr_alignment)]
2843#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2844pub const unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2845 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2846 *(mem_addr as *mut __m128d) = b;
2847}
2848
2849#[inline]
2855#[target_feature(enable = "sse2")]
2856#[stable(feature = "simd_x86", since = "1.27.0")]
2857#[allow(clippy::cast_ptr_alignment)]
2858#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2859pub const unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2860 let b: __m128d = simd_shuffle!(a, a, [0, 0]);
2861 *(mem_addr as *mut __m128d) = b;
2862}
2863
2864#[inline]
2871#[target_feature(enable = "sse2")]
2872#[stable(feature = "simd_x86", since = "1.27.0")]
2873#[allow(clippy::cast_ptr_alignment)]
2874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2875pub const unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2876 let b: __m128d = simd_shuffle!(a, a, [1, 0]);
2877 *(mem_addr as *mut __m128d) = b;
2878}
2879
2880#[inline]
2885#[target_feature(enable = "sse2")]
2886#[cfg_attr(test, assert_instr(movhps))]
2887#[stable(feature = "simd_x86", since = "1.27.0")]
2888#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2889pub const unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2890 *mem_addr = simd_extract!(a, 1);
2891}
2892
2893#[inline]
2898#[target_feature(enable = "sse2")]
2899#[cfg_attr(test, assert_instr(movlps))]
2900#[stable(feature = "simd_x86", since = "1.27.0")]
2901#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2902pub const unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2903 *mem_addr = simd_extract!(a, 0);
2904}
2905
2906#[inline]
2911#[target_feature(enable = "sse2")]
2912#[stable(feature = "simd_x86", since = "1.27.0")]
2914#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2915pub const unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2916 let d = *mem_addr;
2917 _mm_setr_pd(d, d)
2918}
2919
2920#[inline]
2925#[target_feature(enable = "sse2")]
2926#[stable(feature = "simd_x86", since = "1.27.0")]
2928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2929pub const unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2930 _mm_load1_pd(mem_addr)
2931}
2932
2933#[inline]
2939#[target_feature(enable = "sse2")]
2940#[cfg_attr(
2941 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
2942 assert_instr(movaps)
2943)]
2944#[stable(feature = "simd_x86", since = "1.27.0")]
2945#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2946pub const unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2947 let a = _mm_load_pd(mem_addr);
2948 simd_shuffle!(a, a, [1, 0])
2949}
2950
2951#[inline]
2957#[target_feature(enable = "sse2")]
2958#[cfg_attr(test, assert_instr(movups))]
2959#[stable(feature = "simd_x86", since = "1.27.0")]
2960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2961pub const unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2962 let mut dst = _mm_undefined_pd();
2963 ptr::copy_nonoverlapping(
2964 mem_addr as *const u8,
2965 ptr::addr_of_mut!(dst) as *mut u8,
2966 mem::size_of::<__m128d>(),
2967 );
2968 dst
2969}
2970
2971#[inline]
2977#[target_feature(enable = "sse2")]
2978#[stable(feature = "simd_x86_updates", since = "1.82.0")]
2979#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2980pub const unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
2981 transmute(i16x8::new(
2982 ptr::read_unaligned(mem_addr as *const i16),
2983 0,
2984 0,
2985 0,
2986 0,
2987 0,
2988 0,
2989 0,
2990 ))
2991}
2992
2993#[inline]
2999#[target_feature(enable = "sse2")]
3000#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3001#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3002pub const unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
3003 transmute(i32x4::new(
3004 ptr::read_unaligned(mem_addr as *const i32),
3005 0,
3006 0,
3007 0,
3008 ))
3009}
3010
3011#[inline]
3017#[target_feature(enable = "sse2")]
3018#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
3019#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3020pub const unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
3021 transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
3022}
3023
3024#[inline]
3030#[target_feature(enable = "sse2")]
3031#[cfg_attr(test, assert_instr(shufps, MASK = 2))]
3032#[rustc_legacy_const_generics(2)]
3033#[stable(feature = "simd_x86", since = "1.27.0")]
3034#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3035pub const fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
3036 static_assert_uimm_bits!(MASK, 8);
3037 unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
3038}
3039
3040#[inline]
3046#[target_feature(enable = "sse2")]
3047#[cfg_attr(test, assert_instr(movsd))]
3048#[stable(feature = "simd_x86", since = "1.27.0")]
3049#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3050pub const fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
3051 unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) }
3052}
3053
3054#[inline]
3059#[target_feature(enable = "sse2")]
3060#[stable(feature = "simd_x86", since = "1.27.0")]
3061#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3062pub const fn _mm_castpd_ps(a: __m128d) -> __m128 {
3063 unsafe { transmute(a) }
3064}
3065
3066#[inline]
3071#[target_feature(enable = "sse2")]
3072#[stable(feature = "simd_x86", since = "1.27.0")]
3073#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3074pub const fn _mm_castpd_si128(a: __m128d) -> __m128i {
3075 unsafe { transmute(a) }
3076}
3077
3078#[inline]
3083#[target_feature(enable = "sse2")]
3084#[stable(feature = "simd_x86", since = "1.27.0")]
3085#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3086pub const fn _mm_castps_pd(a: __m128) -> __m128d {
3087 unsafe { transmute(a) }
3088}
3089
3090#[inline]
3095#[target_feature(enable = "sse2")]
3096#[stable(feature = "simd_x86", since = "1.27.0")]
3097#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3098pub const fn _mm_castps_si128(a: __m128) -> __m128i {
3099 unsafe { transmute(a) }
3100}
3101
3102#[inline]
3107#[target_feature(enable = "sse2")]
3108#[stable(feature = "simd_x86", since = "1.27.0")]
3109#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3110pub const fn _mm_castsi128_pd(a: __m128i) -> __m128d {
3111 unsafe { transmute(a) }
3112}
3113
3114#[inline]
3119#[target_feature(enable = "sse2")]
3120#[stable(feature = "simd_x86", since = "1.27.0")]
3121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3122pub const fn _mm_castsi128_ps(a: __m128i) -> __m128 {
3123 unsafe { transmute(a) }
3124}
3125
3126#[inline]
3133#[target_feature(enable = "sse2")]
3134#[stable(feature = "simd_x86", since = "1.27.0")]
3135#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3136pub const fn _mm_undefined_pd() -> __m128d {
3137 const { unsafe { mem::zeroed() } }
3138}
3139
3140#[inline]
3147#[target_feature(enable = "sse2")]
3148#[stable(feature = "simd_x86", since = "1.27.0")]
3149#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3150pub const fn _mm_undefined_si128() -> __m128i {
3151 const { unsafe { mem::zeroed() } }
3152}
3153
3154#[inline]
3162#[target_feature(enable = "sse2")]
3163#[cfg_attr(test, assert_instr(unpckhpd))]
3164#[stable(feature = "simd_x86", since = "1.27.0")]
3165#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3166pub const fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
3167 unsafe { simd_shuffle!(a, b, [1, 3]) }
3168}
3169
3170#[inline]
3178#[target_feature(enable = "sse2")]
3179#[cfg_attr(test, assert_instr(movlhps))]
3180#[stable(feature = "simd_x86", since = "1.27.0")]
3181#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3182pub const fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
3183 unsafe { simd_shuffle!(a, b, [0, 2]) }
3184}
3185
3186#[allow(improper_ctypes)]
3187unsafe extern "C" {
3188 #[link_name = "llvm.x86.sse2.pause"]
3189 fn pause();
3190 #[link_name = "llvm.x86.sse2.clflush"]
3191 fn clflush(p: *const u8);
3192 #[link_name = "llvm.x86.sse2.lfence"]
3193 fn lfence();
3194 #[link_name = "llvm.x86.sse2.mfence"]
3195 fn mfence();
3196 #[link_name = "llvm.x86.sse2.pmadd.wd"]
3197 fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
3198 #[link_name = "llvm.x86.sse2.psad.bw"]
3199 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
3200 #[link_name = "llvm.x86.sse2.psll.w"]
3201 fn psllw(a: i16x8, count: i16x8) -> i16x8;
3202 #[link_name = "llvm.x86.sse2.psll.d"]
3203 fn pslld(a: i32x4, count: i32x4) -> i32x4;
3204 #[link_name = "llvm.x86.sse2.psll.q"]
3205 fn psllq(a: i64x2, count: i64x2) -> i64x2;
3206 #[link_name = "llvm.x86.sse2.psra.w"]
3207 fn psraw(a: i16x8, count: i16x8) -> i16x8;
3208 #[link_name = "llvm.x86.sse2.psra.d"]
3209 fn psrad(a: i32x4, count: i32x4) -> i32x4;
3210 #[link_name = "llvm.x86.sse2.psrl.w"]
3211 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
3212 #[link_name = "llvm.x86.sse2.psrl.d"]
3213 fn psrld(a: i32x4, count: i32x4) -> i32x4;
3214 #[link_name = "llvm.x86.sse2.psrl.q"]
3215 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
3216 #[link_name = "llvm.x86.sse2.cvtps2dq"]
3217 fn cvtps2dq(a: __m128) -> i32x4;
3218 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3219 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3220 #[link_name = "llvm.x86.sse2.packsswb.128"]
3221 fn packsswb(a: i16x8, b: i16x8) -> i8x16;
3222 #[link_name = "llvm.x86.sse2.packssdw.128"]
3223 fn packssdw(a: i32x4, b: i32x4) -> i16x8;
3224 #[link_name = "llvm.x86.sse2.packuswb.128"]
3225 fn packuswb(a: i16x8, b: i16x8) -> u8x16;
3226 #[link_name = "llvm.x86.sse2.max.sd"]
3227 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
3228 #[link_name = "llvm.x86.sse2.max.pd"]
3229 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
3230 #[link_name = "llvm.x86.sse2.min.sd"]
3231 fn minsd(a: __m128d, b: __m128d) -> __m128d;
3232 #[link_name = "llvm.x86.sse2.min.pd"]
3233 fn minpd(a: __m128d, b: __m128d) -> __m128d;
3234 #[link_name = "llvm.x86.sse2.cmp.sd"]
3235 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3236 #[link_name = "llvm.x86.sse2.cmp.pd"]
3237 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3238 #[link_name = "llvm.x86.sse2.comieq.sd"]
3239 fn comieqsd(a: __m128d, b: __m128d) -> i32;
3240 #[link_name = "llvm.x86.sse2.comilt.sd"]
3241 fn comiltsd(a: __m128d, b: __m128d) -> i32;
3242 #[link_name = "llvm.x86.sse2.comile.sd"]
3243 fn comilesd(a: __m128d, b: __m128d) -> i32;
3244 #[link_name = "llvm.x86.sse2.comigt.sd"]
3245 fn comigtsd(a: __m128d, b: __m128d) -> i32;
3246 #[link_name = "llvm.x86.sse2.comige.sd"]
3247 fn comigesd(a: __m128d, b: __m128d) -> i32;
3248 #[link_name = "llvm.x86.sse2.comineq.sd"]
3249 fn comineqsd(a: __m128d, b: __m128d) -> i32;
3250 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3251 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3252 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3253 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3254 #[link_name = "llvm.x86.sse2.ucomile.sd"]
3255 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
3256 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3257 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3258 #[link_name = "llvm.x86.sse2.ucomige.sd"]
3259 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
3260 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3261 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3262 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3263 fn cvtpd2dq(a: __m128d) -> i32x4;
3264 #[link_name = "llvm.x86.sse2.cvtsd2si"]
3265 fn cvtsd2si(a: __m128d) -> i32;
3266 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3267 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3268 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3269 fn cvttpd2dq(a: __m128d) -> i32x4;
3270 #[link_name = "llvm.x86.sse2.cvttsd2si"]
3271 fn cvttsd2si(a: __m128d) -> i32;
3272 #[link_name = "llvm.x86.sse2.cvttps2dq"]
3273 fn cvttps2dq(a: __m128) -> i32x4;
3274}
3275
3276#[cfg(test)]
3277mod tests {
3278 use crate::core_arch::assert_eq_const as assert_eq;
3279 use crate::{
3280 core_arch::{simd::*, x86::*},
3281 hint::black_box,
3282 };
3283 use std::{boxed, f32, f64, mem, ptr};
3284 use stdarch_test::simd_test;
3285
3286 const NAN: f64 = f64::NAN;
3287
3288 #[test]
3289 fn test_mm_pause() {
3290 _mm_pause()
3291 }
3292
3293 #[simd_test(enable = "sse2")]
3294 unsafe fn test_mm_clflush() {
3295 let x = 0_u8;
3296 _mm_clflush(ptr::addr_of!(x));
3297 }
3298
3299 #[simd_test(enable = "sse2")]
3300 #[cfg_attr(miri, ignore)]
3302 fn test_mm_lfence() {
3303 _mm_lfence();
3304 }
3305
3306 #[simd_test(enable = "sse2")]
3307 #[cfg_attr(miri, ignore)]
3309 fn test_mm_mfence() {
3310 _mm_mfence();
3311 }
3312
3313 #[simd_test(enable = "sse2")]
3314 const fn test_mm_add_epi8() {
3315 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3316 #[rustfmt::skip]
3317 let b = _mm_setr_epi8(
3318 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3319 );
3320 let r = _mm_add_epi8(a, b);
3321 #[rustfmt::skip]
3322 let e = _mm_setr_epi8(
3323 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3324 );
3325 assert_eq_m128i(r, e);
3326 }
3327
3328 #[simd_test(enable = "sse2")]
3329 fn test_mm_add_epi8_overflow() {
3330 let a = _mm_set1_epi8(0x7F);
3331 let b = _mm_set1_epi8(1);
3332 let r = _mm_add_epi8(a, b);
3333 assert_eq_m128i(r, _mm_set1_epi8(-128));
3334 }
3335
3336 #[simd_test(enable = "sse2")]
3337 const fn test_mm_add_epi16() {
3338 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3339 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3340 let r = _mm_add_epi16(a, b);
3341 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3342 assert_eq_m128i(r, e);
3343 }
3344
3345 #[simd_test(enable = "sse2")]
3346 const fn test_mm_add_epi32() {
3347 let a = _mm_setr_epi32(0, 1, 2, 3);
3348 let b = _mm_setr_epi32(4, 5, 6, 7);
3349 let r = _mm_add_epi32(a, b);
3350 let e = _mm_setr_epi32(4, 6, 8, 10);
3351 assert_eq_m128i(r, e);
3352 }
3353
3354 #[simd_test(enable = "sse2")]
3355 const fn test_mm_add_epi64() {
3356 let a = _mm_setr_epi64x(0, 1);
3357 let b = _mm_setr_epi64x(2, 3);
3358 let r = _mm_add_epi64(a, b);
3359 let e = _mm_setr_epi64x(2, 4);
3360 assert_eq_m128i(r, e);
3361 }
3362
3363 #[simd_test(enable = "sse2")]
3364 const fn test_mm_adds_epi8() {
3365 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3366 #[rustfmt::skip]
3367 let b = _mm_setr_epi8(
3368 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3369 );
3370 let r = _mm_adds_epi8(a, b);
3371 #[rustfmt::skip]
3372 let e = _mm_setr_epi8(
3373 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3374 );
3375 assert_eq_m128i(r, e);
3376 }
3377
3378 #[simd_test(enable = "sse2")]
3379 fn test_mm_adds_epi8_saturate_positive() {
3380 let a = _mm_set1_epi8(0x7F);
3381 let b = _mm_set1_epi8(1);
3382 let r = _mm_adds_epi8(a, b);
3383 assert_eq_m128i(r, a);
3384 }
3385
3386 #[simd_test(enable = "sse2")]
3387 fn test_mm_adds_epi8_saturate_negative() {
3388 let a = _mm_set1_epi8(-0x80);
3389 let b = _mm_set1_epi8(-1);
3390 let r = _mm_adds_epi8(a, b);
3391 assert_eq_m128i(r, a);
3392 }
3393
3394 #[simd_test(enable = "sse2")]
3395 const fn test_mm_adds_epi16() {
3396 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3397 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3398 let r = _mm_adds_epi16(a, b);
3399 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3400 assert_eq_m128i(r, e);
3401 }
3402
3403 #[simd_test(enable = "sse2")]
3404 fn test_mm_adds_epi16_saturate_positive() {
3405 let a = _mm_set1_epi16(0x7FFF);
3406 let b = _mm_set1_epi16(1);
3407 let r = _mm_adds_epi16(a, b);
3408 assert_eq_m128i(r, a);
3409 }
3410
3411 #[simd_test(enable = "sse2")]
3412 fn test_mm_adds_epi16_saturate_negative() {
3413 let a = _mm_set1_epi16(-0x8000);
3414 let b = _mm_set1_epi16(-1);
3415 let r = _mm_adds_epi16(a, b);
3416 assert_eq_m128i(r, a);
3417 }
3418
3419 #[simd_test(enable = "sse2")]
3420 const fn test_mm_adds_epu8() {
3421 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3422 #[rustfmt::skip]
3423 let b = _mm_setr_epi8(
3424 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3425 );
3426 let r = _mm_adds_epu8(a, b);
3427 #[rustfmt::skip]
3428 let e = _mm_setr_epi8(
3429 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3430 );
3431 assert_eq_m128i(r, e);
3432 }
3433
3434 #[simd_test(enable = "sse2")]
3435 fn test_mm_adds_epu8_saturate() {
3436 let a = _mm_set1_epi8(!0);
3437 let b = _mm_set1_epi8(1);
3438 let r = _mm_adds_epu8(a, b);
3439 assert_eq_m128i(r, a);
3440 }
3441
3442 #[simd_test(enable = "sse2")]
3443 const fn test_mm_adds_epu16() {
3444 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3445 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3446 let r = _mm_adds_epu16(a, b);
3447 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3448 assert_eq_m128i(r, e);
3449 }
3450
3451 #[simd_test(enable = "sse2")]
3452 fn test_mm_adds_epu16_saturate() {
3453 let a = _mm_set1_epi16(!0);
3454 let b = _mm_set1_epi16(1);
3455 let r = _mm_adds_epu16(a, b);
3456 assert_eq_m128i(r, a);
3457 }
3458
3459 #[simd_test(enable = "sse2")]
3460 const fn test_mm_avg_epu8() {
3461 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3462 let r = _mm_avg_epu8(a, b);
3463 assert_eq_m128i(r, _mm_set1_epi8(6));
3464 }
3465
3466 #[simd_test(enable = "sse2")]
3467 const fn test_mm_avg_epu16() {
3468 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3469 let r = _mm_avg_epu16(a, b);
3470 assert_eq_m128i(r, _mm_set1_epi16(6));
3471 }
3472
3473 #[simd_test(enable = "sse2")]
3474 fn test_mm_madd_epi16() {
3475 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3476 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3477 let r = _mm_madd_epi16(a, b);
3478 let e = _mm_setr_epi32(29, 81, 149, 233);
3479 assert_eq_m128i(r, e);
3480
3481 let a = _mm_setr_epi16(
3484 i16::MAX,
3485 i16::MAX,
3486 i16::MIN,
3487 i16::MIN,
3488 i16::MIN,
3489 i16::MAX,
3490 0,
3491 0,
3492 );
3493 let b = _mm_setr_epi16(
3494 i16::MAX,
3495 i16::MAX,
3496 i16::MIN,
3497 i16::MIN,
3498 i16::MAX,
3499 i16::MIN,
3500 0,
3501 0,
3502 );
3503 let r = _mm_madd_epi16(a, b);
3504 let e = _mm_setr_epi32(0x7FFE0002, i32::MIN, -0x7FFF0000, 0);
3505 assert_eq_m128i(r, e);
3506 }
3507
3508 #[simd_test(enable = "sse2")]
3509 const fn test_mm_max_epi16() {
3510 let a = _mm_set1_epi16(1);
3511 let b = _mm_set1_epi16(-1);
3512 let r = _mm_max_epi16(a, b);
3513 assert_eq_m128i(r, a);
3514 }
3515
3516 #[simd_test(enable = "sse2")]
3517 const fn test_mm_max_epu8() {
3518 let a = _mm_set1_epi8(1);
3519 let b = _mm_set1_epi8(!0);
3520 let r = _mm_max_epu8(a, b);
3521 assert_eq_m128i(r, b);
3522 }
3523
3524 #[simd_test(enable = "sse2")]
3525 const fn test_mm_min_epi16() {
3526 let a = _mm_set1_epi16(1);
3527 let b = _mm_set1_epi16(-1);
3528 let r = _mm_min_epi16(a, b);
3529 assert_eq_m128i(r, b);
3530 }
3531
3532 #[simd_test(enable = "sse2")]
3533 const fn test_mm_min_epu8() {
3534 let a = _mm_set1_epi8(1);
3535 let b = _mm_set1_epi8(!0);
3536 let r = _mm_min_epu8(a, b);
3537 assert_eq_m128i(r, a);
3538 }
3539
3540 #[simd_test(enable = "sse2")]
3541 const fn test_mm_mulhi_epi16() {
3542 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3543 let r = _mm_mulhi_epi16(a, b);
3544 assert_eq_m128i(r, _mm_set1_epi16(-16));
3545 }
3546
3547 #[simd_test(enable = "sse2")]
3548 const fn test_mm_mulhi_epu16() {
3549 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3550 let r = _mm_mulhi_epu16(a, b);
3551 assert_eq_m128i(r, _mm_set1_epi16(15));
3552 }
3553
3554 #[simd_test(enable = "sse2")]
3555 const fn test_mm_mullo_epi16() {
3556 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3557 let r = _mm_mullo_epi16(a, b);
3558 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3559 }
3560
3561 #[simd_test(enable = "sse2")]
3562 const fn test_mm_mul_epu32() {
3563 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3564 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3565 let r = _mm_mul_epu32(a, b);
3566 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3567 assert_eq_m128i(r, e);
3568 }
3569
3570 #[simd_test(enable = "sse2")]
3571 fn test_mm_sad_epu8() {
3572 #[rustfmt::skip]
3573 let a = _mm_setr_epi8(
3574 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3575 1, 2, 3, 4,
3576 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3577 1, 2, 3, 4,
3578 );
3579 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3580 let r = _mm_sad_epu8(a, b);
3581 let e = _mm_setr_epi64x(1020, 614);
3582 assert_eq_m128i(r, e);
3583 }
3584
3585 #[simd_test(enable = "sse2")]
3586 const fn test_mm_sub_epi8() {
3587 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3588 let r = _mm_sub_epi8(a, b);
3589 assert_eq_m128i(r, _mm_set1_epi8(-1));
3590 }
3591
3592 #[simd_test(enable = "sse2")]
3593 const fn test_mm_sub_epi16() {
3594 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3595 let r = _mm_sub_epi16(a, b);
3596 assert_eq_m128i(r, _mm_set1_epi16(-1));
3597 }
3598
3599 #[simd_test(enable = "sse2")]
3600 const fn test_mm_sub_epi32() {
3601 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3602 let r = _mm_sub_epi32(a, b);
3603 assert_eq_m128i(r, _mm_set1_epi32(-1));
3604 }
3605
3606 #[simd_test(enable = "sse2")]
3607 const fn test_mm_sub_epi64() {
3608 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3609 let r = _mm_sub_epi64(a, b);
3610 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3611 }
3612
3613 #[simd_test(enable = "sse2")]
3614 const fn test_mm_subs_epi8() {
3615 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3616 let r = _mm_subs_epi8(a, b);
3617 assert_eq_m128i(r, _mm_set1_epi8(3));
3618 }
3619
3620 #[simd_test(enable = "sse2")]
3621 fn test_mm_subs_epi8_saturate_positive() {
3622 let a = _mm_set1_epi8(0x7F);
3623 let b = _mm_set1_epi8(-1);
3624 let r = _mm_subs_epi8(a, b);
3625 assert_eq_m128i(r, a);
3626 }
3627
3628 #[simd_test(enable = "sse2")]
3629 fn test_mm_subs_epi8_saturate_negative() {
3630 let a = _mm_set1_epi8(-0x80);
3631 let b = _mm_set1_epi8(1);
3632 let r = _mm_subs_epi8(a, b);
3633 assert_eq_m128i(r, a);
3634 }
3635
3636 #[simd_test(enable = "sse2")]
3637 const fn test_mm_subs_epi16() {
3638 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3639 let r = _mm_subs_epi16(a, b);
3640 assert_eq_m128i(r, _mm_set1_epi16(3));
3641 }
3642
3643 #[simd_test(enable = "sse2")]
3644 fn test_mm_subs_epi16_saturate_positive() {
3645 let a = _mm_set1_epi16(0x7FFF);
3646 let b = _mm_set1_epi16(-1);
3647 let r = _mm_subs_epi16(a, b);
3648 assert_eq_m128i(r, a);
3649 }
3650
3651 #[simd_test(enable = "sse2")]
3652 fn test_mm_subs_epi16_saturate_negative() {
3653 let a = _mm_set1_epi16(-0x8000);
3654 let b = _mm_set1_epi16(1);
3655 let r = _mm_subs_epi16(a, b);
3656 assert_eq_m128i(r, a);
3657 }
3658
3659 #[simd_test(enable = "sse2")]
3660 const fn test_mm_subs_epu8() {
3661 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3662 let r = _mm_subs_epu8(a, b);
3663 assert_eq_m128i(r, _mm_set1_epi8(3));
3664 }
3665
3666 #[simd_test(enable = "sse2")]
3667 fn test_mm_subs_epu8_saturate() {
3668 let a = _mm_set1_epi8(0);
3669 let b = _mm_set1_epi8(1);
3670 let r = _mm_subs_epu8(a, b);
3671 assert_eq_m128i(r, a);
3672 }
3673
3674 #[simd_test(enable = "sse2")]
3675 const fn test_mm_subs_epu16() {
3676 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3677 let r = _mm_subs_epu16(a, b);
3678 assert_eq_m128i(r, _mm_set1_epi16(3));
3679 }
3680
3681 #[simd_test(enable = "sse2")]
3682 fn test_mm_subs_epu16_saturate() {
3683 let a = _mm_set1_epi16(0);
3684 let b = _mm_set1_epi16(1);
3685 let r = _mm_subs_epu16(a, b);
3686 assert_eq_m128i(r, a);
3687 }
3688
3689 #[simd_test(enable = "sse2")]
3690 const fn test_mm_slli_si128() {
3691 #[rustfmt::skip]
3692 let a = _mm_setr_epi8(
3693 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3694 );
3695 let r = _mm_slli_si128::<1>(a);
3696 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3697 assert_eq_m128i(r, e);
3698
3699 #[rustfmt::skip]
3700 let a = _mm_setr_epi8(
3701 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3702 );
3703 let r = _mm_slli_si128::<15>(a);
3704 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3705 assert_eq_m128i(r, e);
3706
3707 #[rustfmt::skip]
3708 let a = _mm_setr_epi8(
3709 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3710 );
3711 let r = _mm_slli_si128::<16>(a);
3712 assert_eq_m128i(r, _mm_set1_epi8(0));
3713 }
3714
3715 #[simd_test(enable = "sse2")]
3716 const fn test_mm_slli_epi16() {
3717 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3718 let r = _mm_slli_epi16::<4>(a);
3719 assert_eq_m128i(
3720 r,
3721 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3722 );
3723 let r = _mm_slli_epi16::<16>(a);
3724 assert_eq_m128i(r, _mm_set1_epi16(0));
3725 }
3726
3727 #[simd_test(enable = "sse2")]
3728 unsafe fn test_mm_sll_epi16() {
3729 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3730 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
3731 assert_eq_m128i(
3732 r,
3733 _mm_setr_epi16(0xCC0, -0xCC0, 0xDD0, -0xDD0, 0xEE0, -0xEE0, 0xFF0, -0xFF0),
3734 );
3735 let r = _mm_sll_epi16(a, _mm_set_epi64x(4, 0));
3736 assert_eq_m128i(r, a);
3737 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 16));
3738 assert_eq_m128i(r, _mm_set1_epi16(0));
3739 let r = _mm_sll_epi16(a, _mm_set_epi64x(0, i64::MAX));
3740 assert_eq_m128i(r, _mm_set1_epi16(0));
3741 }
3742
3743 #[simd_test(enable = "sse2")]
3744 const fn test_mm_slli_epi32() {
3745 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3746 let r = _mm_slli_epi32::<4>(a);
3747 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3748 let r = _mm_slli_epi32::<32>(a);
3749 assert_eq_m128i(r, _mm_set1_epi32(0));
3750 }
3751
3752 #[simd_test(enable = "sse2")]
3753 fn test_mm_sll_epi32() {
3754 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3755 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
3756 assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
3757 let r = _mm_sll_epi32(a, _mm_set_epi64x(4, 0));
3758 assert_eq_m128i(r, a);
3759 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 32));
3760 assert_eq_m128i(r, _mm_set1_epi32(0));
3761 let r = _mm_sll_epi32(a, _mm_set_epi64x(0, i64::MAX));
3762 assert_eq_m128i(r, _mm_set1_epi32(0));
3763 }
3764
3765 #[simd_test(enable = "sse2")]
3766 const fn test_mm_slli_epi64() {
3767 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3768 let r = _mm_slli_epi64::<4>(a);
3769 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3770 let r = _mm_slli_epi64::<64>(a);
3771 assert_eq_m128i(r, _mm_set1_epi64x(0));
3772 }
3773
3774 #[simd_test(enable = "sse2")]
3775 fn test_mm_sll_epi64() {
3776 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3777 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
3778 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
3779 let r = _mm_sll_epi64(a, _mm_set_epi64x(4, 0));
3780 assert_eq_m128i(r, a);
3781 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 64));
3782 assert_eq_m128i(r, _mm_set1_epi64x(0));
3783 let r = _mm_sll_epi64(a, _mm_set_epi64x(0, i64::MAX));
3784 assert_eq_m128i(r, _mm_set1_epi64x(0));
3785 }
3786
3787 #[simd_test(enable = "sse2")]
3788 const fn test_mm_srai_epi16() {
3789 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3790 let r = _mm_srai_epi16::<4>(a);
3791 assert_eq_m128i(
3792 r,
3793 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3794 );
3795 let r = _mm_srai_epi16::<16>(a);
3796 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3797 }
3798
3799 #[simd_test(enable = "sse2")]
3800 fn test_mm_sra_epi16() {
3801 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3802 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
3803 assert_eq_m128i(
3804 r,
3805 _mm_setr_epi16(0xC, -0xD, 0xD, -0xE, 0xE, -0xF, 0xF, -0x10),
3806 );
3807 let r = _mm_sra_epi16(a, _mm_set_epi64x(4, 0));
3808 assert_eq_m128i(r, a);
3809 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 16));
3810 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3811 let r = _mm_sra_epi16(a, _mm_set_epi64x(0, i64::MAX));
3812 assert_eq_m128i(r, _mm_setr_epi16(0, -1, 0, -1, 0, -1, 0, -1));
3813 }
3814
3815 #[simd_test(enable = "sse2")]
3816 const fn test_mm_srai_epi32() {
3817 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3818 let r = _mm_srai_epi32::<4>(a);
3819 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3820 let r = _mm_srai_epi32::<32>(a);
3821 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3822 }
3823
3824 #[simd_test(enable = "sse2")]
3825 fn test_mm_sra_epi32() {
3826 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3827 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
3828 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
3829 let r = _mm_sra_epi32(a, _mm_set_epi64x(4, 0));
3830 assert_eq_m128i(r, a);
3831 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 32));
3832 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3833 let r = _mm_sra_epi32(a, _mm_set_epi64x(0, i64::MAX));
3834 assert_eq_m128i(r, _mm_setr_epi32(0, -1, 0, -1));
3835 }
3836
3837 #[simd_test(enable = "sse2")]
3838 const fn test_mm_srli_si128() {
3839 #[rustfmt::skip]
3840 let a = _mm_setr_epi8(
3841 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3842 );
3843 let r = _mm_srli_si128::<1>(a);
3844 #[rustfmt::skip]
3845 let e = _mm_setr_epi8(
3846 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3847 );
3848 assert_eq_m128i(r, e);
3849
3850 #[rustfmt::skip]
3851 let a = _mm_setr_epi8(
3852 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3853 );
3854 let r = _mm_srli_si128::<15>(a);
3855 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3856 assert_eq_m128i(r, e);
3857
3858 #[rustfmt::skip]
3859 let a = _mm_setr_epi8(
3860 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3861 );
3862 let r = _mm_srli_si128::<16>(a);
3863 assert_eq_m128i(r, _mm_set1_epi8(0));
3864 }
3865
3866 #[simd_test(enable = "sse2")]
3867 const fn test_mm_srli_epi16() {
3868 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3869 let r = _mm_srli_epi16::<4>(a);
3870 assert_eq_m128i(
3871 r,
3872 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3873 );
3874 let r = _mm_srli_epi16::<16>(a);
3875 assert_eq_m128i(r, _mm_set1_epi16(0));
3876 }
3877
3878 #[simd_test(enable = "sse2")]
3879 fn test_mm_srl_epi16() {
3880 let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
3881 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
3882 assert_eq_m128i(
3883 r,
3884 _mm_setr_epi16(0xC, 0xFF3, 0xD, 0xFF2, 0xE, 0xFF1, 0xF, 0xFF0),
3885 );
3886 let r = _mm_srl_epi16(a, _mm_set_epi64x(4, 0));
3887 assert_eq_m128i(r, a);
3888 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 16));
3889 assert_eq_m128i(r, _mm_set1_epi16(0));
3890 let r = _mm_srl_epi16(a, _mm_set_epi64x(0, i64::MAX));
3891 assert_eq_m128i(r, _mm_set1_epi16(0));
3892 }
3893
3894 #[simd_test(enable = "sse2")]
3895 const fn test_mm_srli_epi32() {
3896 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3897 let r = _mm_srli_epi32::<4>(a);
3898 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3899 let r = _mm_srli_epi32::<32>(a);
3900 assert_eq_m128i(r, _mm_set1_epi32(0));
3901 }
3902
3903 #[simd_test(enable = "sse2")]
3904 fn test_mm_srl_epi32() {
3905 let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
3906 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
3907 assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
3908 let r = _mm_srl_epi32(a, _mm_set_epi64x(4, 0));
3909 assert_eq_m128i(r, a);
3910 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 32));
3911 assert_eq_m128i(r, _mm_set1_epi32(0));
3912 let r = _mm_srl_epi32(a, _mm_set_epi64x(0, i64::MAX));
3913 assert_eq_m128i(r, _mm_set1_epi32(0));
3914 }
3915
3916 #[simd_test(enable = "sse2")]
3917 const fn test_mm_srli_epi64() {
3918 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3919 let r = _mm_srli_epi64::<4>(a);
3920 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3921 let r = _mm_srli_epi64::<64>(a);
3922 assert_eq_m128i(r, _mm_set1_epi64x(0));
3923 }
3924
3925 #[simd_test(enable = "sse2")]
3926 fn test_mm_srl_epi64() {
3927 let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
3928 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
3929 assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
3930 let r = _mm_srl_epi64(a, _mm_set_epi64x(4, 0));
3931 assert_eq_m128i(r, a);
3932 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 64));
3933 assert_eq_m128i(r, _mm_set1_epi64x(0));
3934 let r = _mm_srl_epi64(a, _mm_set_epi64x(0, i64::MAX));
3935 assert_eq_m128i(r, _mm_set1_epi64x(0));
3936 }
3937
3938 #[simd_test(enable = "sse2")]
3939 const fn test_mm_and_si128() {
3940 let a = _mm_set1_epi8(5);
3941 let b = _mm_set1_epi8(3);
3942 let r = _mm_and_si128(a, b);
3943 assert_eq_m128i(r, _mm_set1_epi8(1));
3944 }
3945
3946 #[simd_test(enable = "sse2")]
3947 const fn test_mm_andnot_si128() {
3948 let a = _mm_set1_epi8(5);
3949 let b = _mm_set1_epi8(3);
3950 let r = _mm_andnot_si128(a, b);
3951 assert_eq_m128i(r, _mm_set1_epi8(2));
3952 }
3953
3954 #[simd_test(enable = "sse2")]
3955 const fn test_mm_or_si128() {
3956 let a = _mm_set1_epi8(5);
3957 let b = _mm_set1_epi8(3);
3958 let r = _mm_or_si128(a, b);
3959 assert_eq_m128i(r, _mm_set1_epi8(7));
3960 }
3961
3962 #[simd_test(enable = "sse2")]
3963 const fn test_mm_xor_si128() {
3964 let a = _mm_set1_epi8(5);
3965 let b = _mm_set1_epi8(3);
3966 let r = _mm_xor_si128(a, b);
3967 assert_eq_m128i(r, _mm_set1_epi8(6));
3968 }
3969
3970 #[simd_test(enable = "sse2")]
3971 const fn test_mm_cmpeq_epi8() {
3972 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3973 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3974 let r = _mm_cmpeq_epi8(a, b);
3975 #[rustfmt::skip]
3976 assert_eq_m128i(
3977 r,
3978 _mm_setr_epi8(
3979 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3980 )
3981 );
3982 }
3983
3984 #[simd_test(enable = "sse2")]
3985 const fn test_mm_cmpeq_epi16() {
3986 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3987 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3988 let r = _mm_cmpeq_epi16(a, b);
3989 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3990 }
3991
3992 #[simd_test(enable = "sse2")]
3993 const fn test_mm_cmpeq_epi32() {
3994 let a = _mm_setr_epi32(0, 1, 2, 3);
3995 let b = _mm_setr_epi32(3, 2, 2, 0);
3996 let r = _mm_cmpeq_epi32(a, b);
3997 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3998 }
3999
4000 #[simd_test(enable = "sse2")]
4001 const fn test_mm_cmpgt_epi8() {
4002 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4003 let b = _mm_set1_epi8(0);
4004 let r = _mm_cmpgt_epi8(a, b);
4005 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4006 assert_eq_m128i(r, e);
4007 }
4008
4009 #[simd_test(enable = "sse2")]
4010 const fn test_mm_cmpgt_epi16() {
4011 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
4012 let b = _mm_set1_epi16(0);
4013 let r = _mm_cmpgt_epi16(a, b);
4014 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
4015 assert_eq_m128i(r, e);
4016 }
4017
4018 #[simd_test(enable = "sse2")]
4019 const fn test_mm_cmpgt_epi32() {
4020 let a = _mm_set_epi32(5, 0, 0, 0);
4021 let b = _mm_set1_epi32(0);
4022 let r = _mm_cmpgt_epi32(a, b);
4023 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
4024 }
4025
4026 #[simd_test(enable = "sse2")]
4027 const fn test_mm_cmplt_epi8() {
4028 let a = _mm_set1_epi8(0);
4029 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4030 let r = _mm_cmplt_epi8(a, b);
4031 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4032 assert_eq_m128i(r, e);
4033 }
4034
4035 #[simd_test(enable = "sse2")]
4036 const fn test_mm_cmplt_epi16() {
4037 let a = _mm_set1_epi16(0);
4038 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
4039 let r = _mm_cmplt_epi16(a, b);
4040 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
4041 assert_eq_m128i(r, e);
4042 }
4043
4044 #[simd_test(enable = "sse2")]
4045 const fn test_mm_cmplt_epi32() {
4046 let a = _mm_set1_epi32(0);
4047 let b = _mm_set_epi32(5, 0, 0, 0);
4048 let r = _mm_cmplt_epi32(a, b);
4049 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
4050 }
4051
4052 #[simd_test(enable = "sse2")]
4053 const fn test_mm_cvtepi32_pd() {
4054 let a = _mm_set_epi32(35, 25, 15, 5);
4055 let r = _mm_cvtepi32_pd(a);
4056 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
4057 }
4058
4059 #[simd_test(enable = "sse2")]
4060 const fn test_mm_cvtsi32_sd() {
4061 let a = _mm_set1_pd(3.5);
4062 let r = _mm_cvtsi32_sd(a, 5);
4063 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
4064 }
4065
4066 #[simd_test(enable = "sse2")]
4067 const fn test_mm_cvtepi32_ps() {
4068 let a = _mm_setr_epi32(1, 2, 3, 4);
4069 let r = _mm_cvtepi32_ps(a);
4070 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
4071 }
4072
4073 #[simd_test(enable = "sse2")]
4074 unsafe fn test_mm_cvtps_epi32() {
4075 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
4076 let r = _mm_cvtps_epi32(a);
4077 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
4078 }
4079
4080 #[simd_test(enable = "sse2")]
4081 const fn test_mm_cvtsi32_si128() {
4082 let r = _mm_cvtsi32_si128(5);
4083 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
4084 }
4085
4086 #[simd_test(enable = "sse2")]
4087 const fn test_mm_cvtsi128_si32() {
4088 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
4089 assert_eq!(r, 5);
4090 }
4091
4092 #[simd_test(enable = "sse2")]
4093 const fn test_mm_set_epi64x() {
4094 let r = _mm_set_epi64x(0, 1);
4095 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
4096 }
4097
4098 #[simd_test(enable = "sse2")]
4099 const fn test_mm_set_epi32() {
4100 let r = _mm_set_epi32(0, 1, 2, 3);
4101 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
4102 }
4103
4104 #[simd_test(enable = "sse2")]
4105 const fn test_mm_set_epi16() {
4106 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4107 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
4108 }
4109
4110 #[simd_test(enable = "sse2")]
4111 const fn test_mm_set_epi8() {
4112 #[rustfmt::skip]
4113 let r = _mm_set_epi8(
4114 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4115 );
4116 #[rustfmt::skip]
4117 let e = _mm_setr_epi8(
4118 15, 14, 13, 12, 11, 10, 9, 8,
4119 7, 6, 5, 4, 3, 2, 1, 0,
4120 );
4121 assert_eq_m128i(r, e);
4122 }
4123
4124 #[simd_test(enable = "sse2")]
4125 const fn test_mm_set1_epi64x() {
4126 let r = _mm_set1_epi64x(1);
4127 assert_eq_m128i(r, _mm_set1_epi64x(1));
4128 }
4129
4130 #[simd_test(enable = "sse2")]
4131 const fn test_mm_set1_epi32() {
4132 let r = _mm_set1_epi32(1);
4133 assert_eq_m128i(r, _mm_set1_epi32(1));
4134 }
4135
4136 #[simd_test(enable = "sse2")]
4137 const fn test_mm_set1_epi16() {
4138 let r = _mm_set1_epi16(1);
4139 assert_eq_m128i(r, _mm_set1_epi16(1));
4140 }
4141
4142 #[simd_test(enable = "sse2")]
4143 const fn test_mm_set1_epi8() {
4144 let r = _mm_set1_epi8(1);
4145 assert_eq_m128i(r, _mm_set1_epi8(1));
4146 }
4147
4148 #[simd_test(enable = "sse2")]
4149 const fn test_mm_setr_epi32() {
4150 let r = _mm_setr_epi32(0, 1, 2, 3);
4151 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
4152 }
4153
4154 #[simd_test(enable = "sse2")]
4155 const fn test_mm_setr_epi16() {
4156 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4157 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
4158 }
4159
4160 #[simd_test(enable = "sse2")]
4161 const fn test_mm_setr_epi8() {
4162 #[rustfmt::skip]
4163 let r = _mm_setr_epi8(
4164 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
4165 );
4166 #[rustfmt::skip]
4167 let e = _mm_setr_epi8(
4168 0, 1, 2, 3, 4, 5, 6, 7,
4169 8, 9, 10, 11, 12, 13, 14, 15,
4170 );
4171 assert_eq_m128i(r, e);
4172 }
4173
4174 #[simd_test(enable = "sse2")]
4175 const fn test_mm_setzero_si128() {
4176 let r = _mm_setzero_si128();
4177 assert_eq_m128i(r, _mm_set1_epi64x(0));
4178 }
4179
4180 #[simd_test(enable = "sse2")]
4181 const unsafe fn test_mm_loadl_epi64() {
4182 let a = _mm_setr_epi64x(6, 5);
4183 let r = _mm_loadl_epi64(ptr::addr_of!(a));
4184 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
4185 }
4186
4187 #[simd_test(enable = "sse2")]
4188 const unsafe fn test_mm_load_si128() {
4189 let a = _mm_set_epi64x(5, 6);
4190 let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
4191 assert_eq_m128i(a, r);
4192 }
4193
4194 #[simd_test(enable = "sse2")]
4195 const unsafe fn test_mm_loadu_si128() {
4196 let a = _mm_set_epi64x(5, 6);
4197 let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
4198 assert_eq_m128i(a, r);
4199 }
4200
4201 #[simd_test(enable = "sse2")]
4202 #[cfg_attr(miri, ignore)]
4205 unsafe fn test_mm_maskmoveu_si128() {
4206 let a = _mm_set1_epi8(9);
4207 #[rustfmt::skip]
4208 let mask = _mm_set_epi8(
4209 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
4210 0, 0, 0, 0, 0, 0, 0, 0,
4211 );
4212 let mut r = _mm_set1_epi8(0);
4213 _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
4214 _mm_sfence();
4215 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
4216 assert_eq_m128i(r, e);
4217 }
4218
4219 #[simd_test(enable = "sse2")]
4220 const unsafe fn test_mm_store_si128() {
4221 let a = _mm_set1_epi8(9);
4222 let mut r = _mm_set1_epi8(0);
4223 _mm_store_si128(&mut r, a);
4224 assert_eq_m128i(r, a);
4225 }
4226
4227 #[simd_test(enable = "sse2")]
4228 const unsafe fn test_mm_storeu_si128() {
4229 let a = _mm_set1_epi8(9);
4230 let mut r = _mm_set1_epi8(0);
4231 _mm_storeu_si128(&mut r, a);
4232 assert_eq_m128i(r, a);
4233 }
4234
4235 #[simd_test(enable = "sse2")]
4236 const unsafe fn test_mm_storel_epi64() {
4237 let a = _mm_setr_epi64x(2, 9);
4238 let mut r = _mm_set1_epi8(0);
4239 _mm_storel_epi64(&mut r, a);
4240 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4241 }
4242
4243 #[simd_test(enable = "sse2")]
4244 #[cfg_attr(miri, ignore)]
4247 unsafe fn test_mm_stream_si128() {
4248 let a = _mm_setr_epi32(1, 2, 3, 4);
4249 let mut r = _mm_undefined_si128();
4250 _mm_stream_si128(ptr::addr_of_mut!(r), a);
4251 _mm_sfence();
4252 assert_eq_m128i(r, a);
4253 }
4254
4255 #[simd_test(enable = "sse2")]
4256 #[cfg_attr(miri, ignore)]
4259 unsafe fn test_mm_stream_si32() {
4260 let a: i32 = 7;
4261 let mut mem = boxed::Box::<i32>::new(-1);
4262 _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
4263 _mm_sfence();
4264 assert_eq!(a, *mem);
4265 }
4266
4267 #[simd_test(enable = "sse2")]
4268 const fn test_mm_move_epi64() {
4269 let a = _mm_setr_epi64x(5, 6);
4270 let r = _mm_move_epi64(a);
4271 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4272 }
4273
4274 #[simd_test(enable = "sse2")]
4275 fn test_mm_packs_epi16() {
4276 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4277 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4278 let r = _mm_packs_epi16(a, b);
4279 #[rustfmt::skip]
4280 assert_eq_m128i(
4281 r,
4282 _mm_setr_epi8(
4283 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4284 )
4285 );
4286 }
4287
4288 #[simd_test(enable = "sse2")]
4289 fn test_mm_packs_epi32() {
4290 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4291 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4292 let r = _mm_packs_epi32(a, b);
4293 assert_eq_m128i(
4294 r,
4295 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4296 );
4297 }
4298
4299 #[simd_test(enable = "sse2")]
4300 fn test_mm_packus_epi16() {
4301 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4302 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4303 let r = _mm_packus_epi16(a, b);
4304 assert_eq_m128i(
4305 r,
4306 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4307 );
4308 }
4309
4310 #[simd_test(enable = "sse2")]
4311 const fn test_mm_extract_epi16() {
4312 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4313 let r1 = _mm_extract_epi16::<0>(a);
4314 let r2 = _mm_extract_epi16::<3>(a);
4315 assert_eq!(r1, 0xFFFF);
4316 assert_eq!(r2, 3);
4317 }
4318
4319 #[simd_test(enable = "sse2")]
4320 const fn test_mm_insert_epi16() {
4321 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4322 let r = _mm_insert_epi16::<0>(a, 9);
4323 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4324 assert_eq_m128i(r, e);
4325 }
4326
4327 #[simd_test(enable = "sse2")]
4328 const fn test_mm_movemask_epi8() {
4329 #[rustfmt::skip]
4330 let a = _mm_setr_epi8(
4331 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4332 0b0101, 0b1111_0000u8 as i8, 0, 0,
4333 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4334 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4335 );
4336 let r = _mm_movemask_epi8(a);
4337 assert_eq!(r, 0b10100110_00100101);
4338 }
4339
4340 #[simd_test(enable = "sse2")]
4341 const fn test_mm_shuffle_epi32() {
4342 let a = _mm_setr_epi32(5, 10, 15, 20);
4343 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
4344 let e = _mm_setr_epi32(20, 10, 10, 5);
4345 assert_eq_m128i(r, e);
4346 }
4347
4348 #[simd_test(enable = "sse2")]
4349 const fn test_mm_shufflehi_epi16() {
4350 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4351 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
4352 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4353 assert_eq_m128i(r, e);
4354 }
4355
4356 #[simd_test(enable = "sse2")]
4357 const fn test_mm_shufflelo_epi16() {
4358 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4359 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
4360 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4361 assert_eq_m128i(r, e);
4362 }
4363
4364 #[simd_test(enable = "sse2")]
4365 const fn test_mm_unpackhi_epi8() {
4366 #[rustfmt::skip]
4367 let a = _mm_setr_epi8(
4368 0, 1, 2, 3, 4, 5, 6, 7,
4369 8, 9, 10, 11, 12, 13, 14, 15,
4370 );
4371 #[rustfmt::skip]
4372 let b = _mm_setr_epi8(
4373 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4374 );
4375 let r = _mm_unpackhi_epi8(a, b);
4376 #[rustfmt::skip]
4377 let e = _mm_setr_epi8(
4378 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4379 );
4380 assert_eq_m128i(r, e);
4381 }
4382
4383 #[simd_test(enable = "sse2")]
4384 const fn test_mm_unpackhi_epi16() {
4385 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4386 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4387 let r = _mm_unpackhi_epi16(a, b);
4388 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4389 assert_eq_m128i(r, e);
4390 }
4391
4392 #[simd_test(enable = "sse2")]
4393 const fn test_mm_unpackhi_epi32() {
4394 let a = _mm_setr_epi32(0, 1, 2, 3);
4395 let b = _mm_setr_epi32(4, 5, 6, 7);
4396 let r = _mm_unpackhi_epi32(a, b);
4397 let e = _mm_setr_epi32(2, 6, 3, 7);
4398 assert_eq_m128i(r, e);
4399 }
4400
4401 #[simd_test(enable = "sse2")]
4402 const fn test_mm_unpackhi_epi64() {
4403 let a = _mm_setr_epi64x(0, 1);
4404 let b = _mm_setr_epi64x(2, 3);
4405 let r = _mm_unpackhi_epi64(a, b);
4406 let e = _mm_setr_epi64x(1, 3);
4407 assert_eq_m128i(r, e);
4408 }
4409
4410 #[simd_test(enable = "sse2")]
4411 const fn test_mm_unpacklo_epi8() {
4412 #[rustfmt::skip]
4413 let a = _mm_setr_epi8(
4414 0, 1, 2, 3, 4, 5, 6, 7,
4415 8, 9, 10, 11, 12, 13, 14, 15,
4416 );
4417 #[rustfmt::skip]
4418 let b = _mm_setr_epi8(
4419 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4420 );
4421 let r = _mm_unpacklo_epi8(a, b);
4422 #[rustfmt::skip]
4423 let e = _mm_setr_epi8(
4424 0, 16, 1, 17, 2, 18, 3, 19,
4425 4, 20, 5, 21, 6, 22, 7, 23,
4426 );
4427 assert_eq_m128i(r, e);
4428 }
4429
4430 #[simd_test(enable = "sse2")]
4431 const fn test_mm_unpacklo_epi16() {
4432 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4433 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4434 let r = _mm_unpacklo_epi16(a, b);
4435 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4436 assert_eq_m128i(r, e);
4437 }
4438
4439 #[simd_test(enable = "sse2")]
4440 const fn test_mm_unpacklo_epi32() {
4441 let a = _mm_setr_epi32(0, 1, 2, 3);
4442 let b = _mm_setr_epi32(4, 5, 6, 7);
4443 let r = _mm_unpacklo_epi32(a, b);
4444 let e = _mm_setr_epi32(0, 4, 1, 5);
4445 assert_eq_m128i(r, e);
4446 }
4447
4448 #[simd_test(enable = "sse2")]
4449 const fn test_mm_unpacklo_epi64() {
4450 let a = _mm_setr_epi64x(0, 1);
4451 let b = _mm_setr_epi64x(2, 3);
4452 let r = _mm_unpacklo_epi64(a, b);
4453 let e = _mm_setr_epi64x(0, 2);
4454 assert_eq_m128i(r, e);
4455 }
4456
4457 #[simd_test(enable = "sse2")]
4458 const fn test_mm_add_sd() {
4459 let a = _mm_setr_pd(1.0, 2.0);
4460 let b = _mm_setr_pd(5.0, 10.0);
4461 let r = _mm_add_sd(a, b);
4462 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4463 }
4464
4465 #[simd_test(enable = "sse2")]
4466 const fn test_mm_add_pd() {
4467 let a = _mm_setr_pd(1.0, 2.0);
4468 let b = _mm_setr_pd(5.0, 10.0);
4469 let r = _mm_add_pd(a, b);
4470 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4471 }
4472
4473 #[simd_test(enable = "sse2")]
4474 const fn test_mm_div_sd() {
4475 let a = _mm_setr_pd(1.0, 2.0);
4476 let b = _mm_setr_pd(5.0, 10.0);
4477 let r = _mm_div_sd(a, b);
4478 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4479 }
4480
4481 #[simd_test(enable = "sse2")]
4482 const fn test_mm_div_pd() {
4483 let a = _mm_setr_pd(1.0, 2.0);
4484 let b = _mm_setr_pd(5.0, 10.0);
4485 let r = _mm_div_pd(a, b);
4486 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4487 }
4488
4489 #[simd_test(enable = "sse2")]
4490 fn test_mm_max_sd() {
4491 let a = _mm_setr_pd(1.0, 2.0);
4492 let b = _mm_setr_pd(5.0, 10.0);
4493 let r = _mm_max_sd(a, b);
4494 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4495 }
4496
4497 #[simd_test(enable = "sse2")]
4498 fn test_mm_max_pd() {
4499 let a = _mm_setr_pd(1.0, 2.0);
4500 let b = _mm_setr_pd(5.0, 10.0);
4501 let r = _mm_max_pd(a, b);
4502 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4503
4504 let a = _mm_setr_pd(-0.0, 0.0);
4506 let b = _mm_setr_pd(0.0, 0.0);
4507 let r1 = _mm_castpd_si128(_mm_max_pd(a, b));
4509 let r2 = _mm_castpd_si128(_mm_max_pd(b, a));
4510 let a = _mm_castpd_si128(a);
4511 let b = _mm_castpd_si128(b);
4512 assert_eq_m128i(r1, b);
4513 assert_eq_m128i(r2, a);
4514 assert_ne!(a.as_u8x16(), b.as_u8x16()); }
4516
4517 #[simd_test(enable = "sse2")]
4518 fn test_mm_min_sd() {
4519 let a = _mm_setr_pd(1.0, 2.0);
4520 let b = _mm_setr_pd(5.0, 10.0);
4521 let r = _mm_min_sd(a, b);
4522 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4523 }
4524
4525 #[simd_test(enable = "sse2")]
4526 fn test_mm_min_pd() {
4527 let a = _mm_setr_pd(1.0, 2.0);
4528 let b = _mm_setr_pd(5.0, 10.0);
4529 let r = _mm_min_pd(a, b);
4530 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4531
4532 let a = _mm_setr_pd(-0.0, 0.0);
4534 let b = _mm_setr_pd(0.0, 0.0);
4535 let r1 = _mm_castpd_si128(_mm_min_pd(a, b));
4537 let r2 = _mm_castpd_si128(_mm_min_pd(b, a));
4538 let a = _mm_castpd_si128(a);
4539 let b = _mm_castpd_si128(b);
4540 assert_eq_m128i(r1, b);
4541 assert_eq_m128i(r2, a);
4542 assert_ne!(a.as_u8x16(), b.as_u8x16()); }
4544
4545 #[simd_test(enable = "sse2")]
4546 const fn test_mm_mul_sd() {
4547 let a = _mm_setr_pd(1.0, 2.0);
4548 let b = _mm_setr_pd(5.0, 10.0);
4549 let r = _mm_mul_sd(a, b);
4550 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4551 }
4552
4553 #[simd_test(enable = "sse2")]
4554 const fn test_mm_mul_pd() {
4555 let a = _mm_setr_pd(1.0, 2.0);
4556 let b = _mm_setr_pd(5.0, 10.0);
4557 let r = _mm_mul_pd(a, b);
4558 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4559 }
4560
4561 #[simd_test(enable = "sse2")]
4562 fn test_mm_sqrt_sd() {
4563 let a = _mm_setr_pd(1.0, 2.0);
4564 let b = _mm_setr_pd(5.0, 10.0);
4565 let r = _mm_sqrt_sd(a, b);
4566 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4567 }
4568
4569 #[simd_test(enable = "sse2")]
4570 fn test_mm_sqrt_pd() {
4571 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4572 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4573 }
4574
4575 #[simd_test(enable = "sse2")]
4576 const fn test_mm_sub_sd() {
4577 let a = _mm_setr_pd(1.0, 2.0);
4578 let b = _mm_setr_pd(5.0, 10.0);
4579 let r = _mm_sub_sd(a, b);
4580 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4581 }
4582
4583 #[simd_test(enable = "sse2")]
4584 const fn test_mm_sub_pd() {
4585 let a = _mm_setr_pd(1.0, 2.0);
4586 let b = _mm_setr_pd(5.0, 10.0);
4587 let r = _mm_sub_pd(a, b);
4588 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4589 }
4590
4591 #[simd_test(enable = "sse2")]
4592 const fn test_mm_and_pd() {
4593 let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
4594 let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
4595 let r = _mm_and_pd(a, b);
4596 let e = f64x2::from_bits(u64x2::splat(1)).as_m128d();
4597 assert_eq_m128d(r, e);
4598 }
4599
4600 #[simd_test(enable = "sse2")]
4601 const fn test_mm_andnot_pd() {
4602 let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
4603 let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
4604 let r = _mm_andnot_pd(a, b);
4605 let e = f64x2::from_bits(u64x2::splat(2)).as_m128d();
4606 assert_eq_m128d(r, e);
4607 }
4608
4609 #[simd_test(enable = "sse2")]
4610 const fn test_mm_or_pd() {
4611 let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
4612 let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
4613 let r = _mm_or_pd(a, b);
4614 let e = f64x2::from_bits(u64x2::splat(7)).as_m128d();
4615 assert_eq_m128d(r, e);
4616 }
4617
4618 #[simd_test(enable = "sse2")]
4619 const fn test_mm_xor_pd() {
4620 let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
4621 let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
4622 let r = _mm_xor_pd(a, b);
4623 let e = f64x2::from_bits(u64x2::splat(6)).as_m128d();
4624 assert_eq_m128d(r, e);
4625 }
4626
4627 #[simd_test(enable = "sse2")]
4628 fn test_mm_cmpeq_sd() {
4629 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4630 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4631 let r = _mm_castpd_si128(_mm_cmpeq_sd(a, b));
4632 assert_eq_m128i(r, e);
4633 }
4634
4635 #[simd_test(enable = "sse2")]
4636 fn test_mm_cmplt_sd() {
4637 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4638 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4639 let r = _mm_castpd_si128(_mm_cmplt_sd(a, b));
4640 assert_eq_m128i(r, e);
4641 }
4642
4643 #[simd_test(enable = "sse2")]
4644 fn test_mm_cmple_sd() {
4645 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4646 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4647 let r = _mm_castpd_si128(_mm_cmple_sd(a, b));
4648 assert_eq_m128i(r, e);
4649 }
4650
4651 #[simd_test(enable = "sse2")]
4652 fn test_mm_cmpgt_sd() {
4653 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4654 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4655 let r = _mm_castpd_si128(_mm_cmpgt_sd(a, b));
4656 assert_eq_m128i(r, e);
4657 }
4658
4659 #[simd_test(enable = "sse2")]
4660 fn test_mm_cmpge_sd() {
4661 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4662 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4663 let r = _mm_castpd_si128(_mm_cmpge_sd(a, b));
4664 assert_eq_m128i(r, e);
4665 }
4666
4667 #[simd_test(enable = "sse2")]
4668 fn test_mm_cmpord_sd() {
4669 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4670 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4671 let r = _mm_castpd_si128(_mm_cmpord_sd(a, b));
4672 assert_eq_m128i(r, e);
4673 }
4674
4675 #[simd_test(enable = "sse2")]
4676 fn test_mm_cmpunord_sd() {
4677 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4678 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4679 let r = _mm_castpd_si128(_mm_cmpunord_sd(a, b));
4680 assert_eq_m128i(r, e);
4681 }
4682
4683 #[simd_test(enable = "sse2")]
4684 fn test_mm_cmpneq_sd() {
4685 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4686 let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
4687 let r = _mm_castpd_si128(_mm_cmpneq_sd(a, b));
4688 assert_eq_m128i(r, e);
4689 }
4690
4691 #[simd_test(enable = "sse2")]
4692 fn test_mm_cmpnlt_sd() {
4693 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4694 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4695 let r = _mm_castpd_si128(_mm_cmpnlt_sd(a, b));
4696 assert_eq_m128i(r, e);
4697 }
4698
4699 #[simd_test(enable = "sse2")]
4700 fn test_mm_cmpnle_sd() {
4701 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4702 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4703 let r = _mm_castpd_si128(_mm_cmpnle_sd(a, b));
4704 assert_eq_m128i(r, e);
4705 }
4706
4707 #[simd_test(enable = "sse2")]
4708 fn test_mm_cmpngt_sd() {
4709 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4710 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4711 let r = _mm_castpd_si128(_mm_cmpngt_sd(a, b));
4712 assert_eq_m128i(r, e);
4713 }
4714
4715 #[simd_test(enable = "sse2")]
4716 fn test_mm_cmpnge_sd() {
4717 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4718 let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
4719 let r = _mm_castpd_si128(_mm_cmpnge_sd(a, b));
4720 assert_eq_m128i(r, e);
4721 }
4722
4723 #[simd_test(enable = "sse2")]
4724 fn test_mm_cmpeq_pd() {
4725 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4726 let e = _mm_setr_epi64x(!0, 0);
4727 let r = _mm_castpd_si128(_mm_cmpeq_pd(a, b));
4728 assert_eq_m128i(r, e);
4729 }
4730
4731 #[simd_test(enable = "sse2")]
4732 fn test_mm_cmplt_pd() {
4733 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4734 let e = _mm_setr_epi64x(0, !0);
4735 let r = _mm_castpd_si128(_mm_cmplt_pd(a, b));
4736 assert_eq_m128i(r, e);
4737 }
4738
4739 #[simd_test(enable = "sse2")]
4740 fn test_mm_cmple_pd() {
4741 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4742 let e = _mm_setr_epi64x(!0, !0);
4743 let r = _mm_castpd_si128(_mm_cmple_pd(a, b));
4744 assert_eq_m128i(r, e);
4745 }
4746
4747 #[simd_test(enable = "sse2")]
4748 fn test_mm_cmpgt_pd() {
4749 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4750 let e = _mm_setr_epi64x(0, 0);
4751 let r = _mm_castpd_si128(_mm_cmpgt_pd(a, b));
4752 assert_eq_m128i(r, e);
4753 }
4754
4755 #[simd_test(enable = "sse2")]
4756 fn test_mm_cmpge_pd() {
4757 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4758 let e = _mm_setr_epi64x(!0, 0);
4759 let r = _mm_castpd_si128(_mm_cmpge_pd(a, b));
4760 assert_eq_m128i(r, e);
4761 }
4762
4763 #[simd_test(enable = "sse2")]
4764 fn test_mm_cmpord_pd() {
4765 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4766 let e = _mm_setr_epi64x(0, !0);
4767 let r = _mm_castpd_si128(_mm_cmpord_pd(a, b));
4768 assert_eq_m128i(r, e);
4769 }
4770
4771 #[simd_test(enable = "sse2")]
4772 fn test_mm_cmpunord_pd() {
4773 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4774 let e = _mm_setr_epi64x(!0, 0);
4775 let r = _mm_castpd_si128(_mm_cmpunord_pd(a, b));
4776 assert_eq_m128i(r, e);
4777 }
4778
4779 #[simd_test(enable = "sse2")]
4780 fn test_mm_cmpneq_pd() {
4781 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4782 let e = _mm_setr_epi64x(!0, !0);
4783 let r = _mm_castpd_si128(_mm_cmpneq_pd(a, b));
4784 assert_eq_m128i(r, e);
4785 }
4786
4787 #[simd_test(enable = "sse2")]
4788 fn test_mm_cmpnlt_pd() {
4789 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4790 let e = _mm_setr_epi64x(0, 0);
4791 let r = _mm_castpd_si128(_mm_cmpnlt_pd(a, b));
4792 assert_eq_m128i(r, e);
4793 }
4794
4795 #[simd_test(enable = "sse2")]
4796 fn test_mm_cmpnle_pd() {
4797 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4798 let e = _mm_setr_epi64x(0, 0);
4799 let r = _mm_castpd_si128(_mm_cmpnle_pd(a, b));
4800 assert_eq_m128i(r, e);
4801 }
4802
4803 #[simd_test(enable = "sse2")]
4804 fn test_mm_cmpngt_pd() {
4805 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4806 let e = _mm_setr_epi64x(0, !0);
4807 let r = _mm_castpd_si128(_mm_cmpngt_pd(a, b));
4808 assert_eq_m128i(r, e);
4809 }
4810
4811 #[simd_test(enable = "sse2")]
4812 fn test_mm_cmpnge_pd() {
4813 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4814 let e = _mm_setr_epi64x(0, !0);
4815 let r = _mm_castpd_si128(_mm_cmpnge_pd(a, b));
4816 assert_eq_m128i(r, e);
4817 }
4818
4819 #[simd_test(enable = "sse2")]
4820 fn test_mm_comieq_sd() {
4821 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4822 assert!(_mm_comieq_sd(a, b) != 0);
4823
4824 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4825 assert!(_mm_comieq_sd(a, b) == 0);
4826 }
4827
4828 #[simd_test(enable = "sse2")]
4829 fn test_mm_comilt_sd() {
4830 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4831 assert!(_mm_comilt_sd(a, b) == 0);
4832 }
4833
4834 #[simd_test(enable = "sse2")]
4835 fn test_mm_comile_sd() {
4836 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4837 assert!(_mm_comile_sd(a, b) != 0);
4838 }
4839
4840 #[simd_test(enable = "sse2")]
4841 fn test_mm_comigt_sd() {
4842 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4843 assert!(_mm_comigt_sd(a, b) == 0);
4844 }
4845
4846 #[simd_test(enable = "sse2")]
4847 fn test_mm_comige_sd() {
4848 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4849 assert!(_mm_comige_sd(a, b) != 0);
4850 }
4851
4852 #[simd_test(enable = "sse2")]
4853 fn test_mm_comineq_sd() {
4854 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4855 assert!(_mm_comineq_sd(a, b) == 0);
4856 }
4857
4858 #[simd_test(enable = "sse2")]
4859 fn test_mm_ucomieq_sd() {
4860 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4861 assert!(_mm_ucomieq_sd(a, b) != 0);
4862
4863 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4864 assert!(_mm_ucomieq_sd(a, b) == 0);
4865 }
4866
4867 #[simd_test(enable = "sse2")]
4868 fn test_mm_ucomilt_sd() {
4869 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4870 assert!(_mm_ucomilt_sd(a, b) == 0);
4871 }
4872
4873 #[simd_test(enable = "sse2")]
4874 fn test_mm_ucomile_sd() {
4875 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4876 assert!(_mm_ucomile_sd(a, b) != 0);
4877 }
4878
4879 #[simd_test(enable = "sse2")]
4880 fn test_mm_ucomigt_sd() {
4881 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4882 assert!(_mm_ucomigt_sd(a, b) == 0);
4883 }
4884
4885 #[simd_test(enable = "sse2")]
4886 fn test_mm_ucomige_sd() {
4887 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4888 assert!(_mm_ucomige_sd(a, b) != 0);
4889 }
4890
4891 #[simd_test(enable = "sse2")]
4892 fn test_mm_ucomineq_sd() {
4893 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4894 assert!(_mm_ucomineq_sd(a, b) == 0);
4895 }
4896
4897 #[simd_test(enable = "sse2")]
4898 const fn test_mm_movemask_pd() {
4899 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4900 assert_eq!(r, 0b01);
4901
4902 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4903 assert_eq!(r, 0b11);
4904 }
4905
4906 #[repr(align(16))]
4907 struct Memory {
4908 data: [f64; 4],
4909 }
4910
4911 #[simd_test(enable = "sse2")]
4912 const unsafe fn test_mm_load_pd() {
4913 let mem = Memory {
4914 data: [1.0f64, 2.0, 3.0, 4.0],
4915 };
4916 let vals = &mem.data;
4917 let d = vals.as_ptr();
4918
4919 let r = _mm_load_pd(d);
4920 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4921 }
4922
4923 #[simd_test(enable = "sse2")]
4924 const unsafe fn test_mm_load_sd() {
4925 let a = 1.;
4926 let expected = _mm_setr_pd(a, 0.);
4927 let r = _mm_load_sd(&a);
4928 assert_eq_m128d(r, expected);
4929 }
4930
4931 #[simd_test(enable = "sse2")]
4932 const unsafe fn test_mm_loadh_pd() {
4933 let a = _mm_setr_pd(1., 2.);
4934 let b = 3.;
4935 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4936 let r = _mm_loadh_pd(a, &b);
4937 assert_eq_m128d(r, expected);
4938 }
4939
4940 #[simd_test(enable = "sse2")]
4941 const unsafe fn test_mm_loadl_pd() {
4942 let a = _mm_setr_pd(1., 2.);
4943 let b = 3.;
4944 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4945 let r = _mm_loadl_pd(a, &b);
4946 assert_eq_m128d(r, expected);
4947 }
4948
4949 #[simd_test(enable = "sse2")]
4950 #[cfg_attr(miri, ignore)]
4953 unsafe fn test_mm_stream_pd() {
4954 #[repr(align(128))]
4955 struct Memory {
4956 pub data: [f64; 2],
4957 }
4958 let a = _mm_set1_pd(7.0);
4959 let mut mem = Memory { data: [-1.0; 2] };
4960
4961 _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4962 _mm_sfence();
4963 for i in 0..2 {
4964 assert_eq!(mem.data[i], get_m128d(a, i));
4965 }
4966 }
4967
4968 #[simd_test(enable = "sse2")]
4969 const unsafe fn test_mm_store_sd() {
4970 let mut dest = 0.;
4971 let a = _mm_setr_pd(1., 2.);
4972 _mm_store_sd(&mut dest, a);
4973 assert_eq!(dest, _mm_cvtsd_f64(a));
4974 }
4975
4976 #[simd_test(enable = "sse2")]
4977 const unsafe fn test_mm_store_pd() {
4978 let mut mem = Memory { data: [0.0f64; 4] };
4979 let vals = &mut mem.data;
4980 let a = _mm_setr_pd(1.0, 2.0);
4981 let d = vals.as_mut_ptr();
4982
4983 _mm_store_pd(d, *black_box(&a));
4984 assert_eq!(vals[0], 1.0);
4985 assert_eq!(vals[1], 2.0);
4986 }
4987
4988 #[simd_test(enable = "sse2")]
4989 const unsafe fn test_mm_storeu_pd() {
4990 let mut mem = Memory { data: [0.0f64; 4] };
4992 let vals = &mut mem.data;
4993 let a = _mm_setr_pd(1.0, 2.0);
4994
4995 let p = vals.as_mut_ptr().offset(1);
4997 _mm_storeu_pd(p, *black_box(&a));
4998
4999 assert_eq!(*vals, [0.0, 1.0, 2.0, 0.0]);
5000 }
5001
5002 #[simd_test(enable = "sse2")]
5003 const unsafe fn test_mm_storeu_si16() {
5004 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
5005 let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
5006 _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
5007 let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
5008 assert_eq_m128i(r, e);
5009 }
5010
5011 #[simd_test(enable = "sse2")]
5012 const unsafe fn test_mm_storeu_si32() {
5013 let a = _mm_setr_epi32(1, 2, 3, 4);
5014 let mut r = _mm_setr_epi32(5, 6, 7, 8);
5015 _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
5016 let e = _mm_setr_epi32(1, 6, 7, 8);
5017 assert_eq_m128i(r, e);
5018 }
5019
5020 #[simd_test(enable = "sse2")]
5021 const unsafe fn test_mm_storeu_si64() {
5022 let a = _mm_setr_epi64x(1, 2);
5023 let mut r = _mm_setr_epi64x(3, 4);
5024 _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
5025 let e = _mm_setr_epi64x(1, 4);
5026 assert_eq_m128i(r, e);
5027 }
5028
5029 #[simd_test(enable = "sse2")]
5030 const unsafe fn test_mm_store1_pd() {
5031 let mut mem = Memory { data: [0.0f64; 4] };
5032 let vals = &mut mem.data;
5033 let a = _mm_setr_pd(1.0, 2.0);
5034 let d = vals.as_mut_ptr();
5035
5036 _mm_store1_pd(d, *black_box(&a));
5037 assert_eq!(vals[0], 1.0);
5038 assert_eq!(vals[1], 1.0);
5039 }
5040
5041 #[simd_test(enable = "sse2")]
5042 const unsafe fn test_mm_store_pd1() {
5043 let mut mem = Memory { data: [0.0f64; 4] };
5044 let vals = &mut mem.data;
5045 let a = _mm_setr_pd(1.0, 2.0);
5046 let d = vals.as_mut_ptr();
5047
5048 _mm_store_pd1(d, *black_box(&a));
5049 assert_eq!(vals[0], 1.0);
5050 assert_eq!(vals[1], 1.0);
5051 }
5052
5053 #[simd_test(enable = "sse2")]
5054 const unsafe fn test_mm_storer_pd() {
5055 let mut mem = Memory { data: [0.0f64; 4] };
5056 let vals = &mut mem.data;
5057 let a = _mm_setr_pd(1.0, 2.0);
5058 let d = vals.as_mut_ptr();
5059
5060 _mm_storer_pd(d, *black_box(&a));
5061 assert_eq!(vals[0], 2.0);
5062 assert_eq!(vals[1], 1.0);
5063 }
5064
5065 #[simd_test(enable = "sse2")]
5066 const unsafe fn test_mm_storeh_pd() {
5067 let mut dest = 0.;
5068 let a = _mm_setr_pd(1., 2.);
5069 _mm_storeh_pd(&mut dest, a);
5070 assert_eq!(dest, get_m128d(a, 1));
5071 }
5072
5073 #[simd_test(enable = "sse2")]
5074 const unsafe fn test_mm_storel_pd() {
5075 let mut dest = 0.;
5076 let a = _mm_setr_pd(1., 2.);
5077 _mm_storel_pd(&mut dest, a);
5078 assert_eq!(dest, _mm_cvtsd_f64(a));
5079 }
5080
5081 #[simd_test(enable = "sse2")]
5082 const unsafe fn test_mm_loadr_pd() {
5083 let mut mem = Memory {
5084 data: [1.0f64, 2.0, 3.0, 4.0],
5085 };
5086 let vals = &mut mem.data;
5087 let d = vals.as_ptr();
5088
5089 let r = _mm_loadr_pd(d);
5090 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
5091 }
5092
5093 #[simd_test(enable = "sse2")]
5094 const unsafe fn test_mm_loadu_pd() {
5095 let mut mem = Memory {
5097 data: [1.0f64, 2.0, 3.0, 4.0],
5098 };
5099 let vals = &mut mem.data;
5100
5101 let d = vals.as_ptr().offset(1);
5103
5104 let r = _mm_loadu_pd(d);
5105 let e = _mm_setr_pd(2.0, 3.0);
5106 assert_eq_m128d(r, e);
5107 }
5108
5109 #[simd_test(enable = "sse2")]
5110 const unsafe fn test_mm_loadu_si16() {
5111 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
5112 let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
5113 assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
5114 }
5115
5116 #[simd_test(enable = "sse2")]
5117 const unsafe fn test_mm_loadu_si32() {
5118 let a = _mm_setr_epi32(1, 2, 3, 4);
5119 let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
5120 assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
5121 }
5122
5123 #[simd_test(enable = "sse2")]
5124 const unsafe fn test_mm_loadu_si64() {
5125 let a = _mm_setr_epi64x(5, 6);
5126 let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
5127 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
5128 }
5129
5130 #[simd_test(enable = "sse2")]
5131 const fn test_mm_cvtpd_ps() {
5132 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
5133 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
5134
5135 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
5136 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
5137
5138 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
5139 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
5140
5141 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
5142 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
5143 }
5144
5145 #[simd_test(enable = "sse2")]
5146 const fn test_mm_cvtps_pd() {
5147 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
5148 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
5149
5150 let r = _mm_cvtps_pd(_mm_setr_ps(
5151 f32::MAX,
5152 f32::INFINITY,
5153 f32::NEG_INFINITY,
5154 f32::MIN,
5155 ));
5156 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
5157 }
5158
5159 #[simd_test(enable = "sse2")]
5160 fn test_mm_cvtpd_epi32() {
5161 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
5162 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
5163
5164 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
5165 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
5166
5167 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
5168 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5169
5170 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
5171 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5172
5173 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
5174 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5175 }
5176
5177 #[simd_test(enable = "sse2")]
5178 fn test_mm_cvtsd_si32() {
5179 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
5180 assert_eq!(r, -2);
5181
5182 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
5183 assert_eq!(r, i32::MIN);
5184
5185 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
5186 assert_eq!(r, i32::MIN);
5187 }
5188
5189 #[simd_test(enable = "sse2")]
5190 fn test_mm_cvtsd_ss() {
5191 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
5192 let b = _mm_setr_pd(2.0, -5.0);
5193
5194 let r = _mm_cvtsd_ss(a, b);
5195
5196 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
5197
5198 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
5199 let b = _mm_setr_pd(f64::INFINITY, -5.0);
5200
5201 let r = _mm_cvtsd_ss(a, b);
5202
5203 assert_eq_m128(
5204 r,
5205 _mm_setr_ps(
5206 f32::INFINITY,
5207 f32::NEG_INFINITY,
5208 f32::MAX,
5209 f32::NEG_INFINITY,
5210 ),
5211 );
5212 }
5213
5214 #[simd_test(enable = "sse2")]
5215 const fn test_mm_cvtsd_f64() {
5216 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
5217 assert_eq!(r, -1.1);
5218 }
5219
5220 #[simd_test(enable = "sse2")]
5221 const fn test_mm_cvtss_sd() {
5222 let a = _mm_setr_pd(-1.1, 2.2);
5223 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
5224
5225 let r = _mm_cvtss_sd(a, b);
5226 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
5227
5228 let a = _mm_setr_pd(-1.1, f64::INFINITY);
5229 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
5230
5231 let r = _mm_cvtss_sd(a, b);
5232 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
5233 }
5234
5235 #[simd_test(enable = "sse2")]
5236 fn test_mm_cvttpd_epi32() {
5237 let a = _mm_setr_pd(-1.1, 2.2);
5238 let r = _mm_cvttpd_epi32(a);
5239 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
5240
5241 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5242 let r = _mm_cvttpd_epi32(a);
5243 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
5244 }
5245
5246 #[simd_test(enable = "sse2")]
5247 fn test_mm_cvttsd_si32() {
5248 let a = _mm_setr_pd(-1.1, 2.2);
5249 let r = _mm_cvttsd_si32(a);
5250 assert_eq!(r, -1);
5251
5252 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
5253 let r = _mm_cvttsd_si32(a);
5254 assert_eq!(r, i32::MIN);
5255 }
5256
5257 #[simd_test(enable = "sse2")]
5258 fn test_mm_cvttps_epi32() {
5259 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
5260 let r = _mm_cvttps_epi32(a);
5261 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
5262
5263 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
5264 let r = _mm_cvttps_epi32(a);
5265 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
5266 }
5267
5268 #[simd_test(enable = "sse2")]
5269 const fn test_mm_set_sd() {
5270 let r = _mm_set_sd(-1.0_f64);
5271 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
5272 }
5273
5274 #[simd_test(enable = "sse2")]
5275 const fn test_mm_set1_pd() {
5276 let r = _mm_set1_pd(-1.0_f64);
5277 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
5278 }
5279
5280 #[simd_test(enable = "sse2")]
5281 const fn test_mm_set_pd1() {
5282 let r = _mm_set_pd1(-2.0_f64);
5283 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
5284 }
5285
5286 #[simd_test(enable = "sse2")]
5287 const fn test_mm_set_pd() {
5288 let r = _mm_set_pd(1.0_f64, 5.0_f64);
5289 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
5290 }
5291
5292 #[simd_test(enable = "sse2")]
5293 const fn test_mm_setr_pd() {
5294 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
5295 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
5296 }
5297
5298 #[simd_test(enable = "sse2")]
5299 const fn test_mm_setzero_pd() {
5300 let r = _mm_setzero_pd();
5301 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
5302 }
5303
5304 #[simd_test(enable = "sse2")]
5305 const unsafe fn test_mm_load1_pd() {
5306 let d = -5.0;
5307 let r = _mm_load1_pd(&d);
5308 assert_eq_m128d(r, _mm_setr_pd(d, d));
5309 }
5310
5311 #[simd_test(enable = "sse2")]
5312 const unsafe fn test_mm_load_pd1() {
5313 let d = -5.0;
5314 let r = _mm_load_pd1(&d);
5315 assert_eq_m128d(r, _mm_setr_pd(d, d));
5316 }
5317
5318 #[simd_test(enable = "sse2")]
5319 const fn test_mm_unpackhi_pd() {
5320 let a = _mm_setr_pd(1.0, 2.0);
5321 let b = _mm_setr_pd(3.0, 4.0);
5322 let r = _mm_unpackhi_pd(a, b);
5323 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5324 }
5325
5326 #[simd_test(enable = "sse2")]
5327 const fn test_mm_unpacklo_pd() {
5328 let a = _mm_setr_pd(1.0, 2.0);
5329 let b = _mm_setr_pd(3.0, 4.0);
5330 let r = _mm_unpacklo_pd(a, b);
5331 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5332 }
5333
5334 #[simd_test(enable = "sse2")]
5335 const fn test_mm_shuffle_pd() {
5336 let a = _mm_setr_pd(1., 2.);
5337 let b = _mm_setr_pd(3., 4.);
5338 let expected = _mm_setr_pd(1., 3.);
5339 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
5340 assert_eq_m128d(r, expected);
5341 }
5342
5343 #[simd_test(enable = "sse2")]
5344 const fn test_mm_move_sd() {
5345 let a = _mm_setr_pd(1., 2.);
5346 let b = _mm_setr_pd(3., 4.);
5347 let expected = _mm_setr_pd(3., 2.);
5348 let r = _mm_move_sd(a, b);
5349 assert_eq_m128d(r, expected);
5350 }
5351
5352 #[simd_test(enable = "sse2")]
5353 const fn test_mm_castpd_ps() {
5354 let a = _mm_set1_pd(0.);
5355 let expected = _mm_set1_ps(0.);
5356 let r = _mm_castpd_ps(a);
5357 assert_eq_m128(r, expected);
5358 }
5359
5360 #[simd_test(enable = "sse2")]
5361 const fn test_mm_castpd_si128() {
5362 let a = _mm_set1_pd(0.);
5363 let expected = _mm_set1_epi64x(0);
5364 let r = _mm_castpd_si128(a);
5365 assert_eq_m128i(r, expected);
5366 }
5367
5368 #[simd_test(enable = "sse2")]
5369 const fn test_mm_castps_pd() {
5370 let a = _mm_set1_ps(0.);
5371 let expected = _mm_set1_pd(0.);
5372 let r = _mm_castps_pd(a);
5373 assert_eq_m128d(r, expected);
5374 }
5375
5376 #[simd_test(enable = "sse2")]
5377 const fn test_mm_castps_si128() {
5378 let a = _mm_set1_ps(0.);
5379 let expected = _mm_set1_epi32(0);
5380 let r = _mm_castps_si128(a);
5381 assert_eq_m128i(r, expected);
5382 }
5383
5384 #[simd_test(enable = "sse2")]
5385 const fn test_mm_castsi128_pd() {
5386 let a = _mm_set1_epi64x(0);
5387 let expected = _mm_set1_pd(0.);
5388 let r = _mm_castsi128_pd(a);
5389 assert_eq_m128d(r, expected);
5390 }
5391
5392 #[simd_test(enable = "sse2")]
5393 const fn test_mm_castsi128_ps() {
5394 let a = _mm_set1_epi32(0);
5395 let expected = _mm_set1_ps(0.);
5396 let r = _mm_castsi128_ps(a);
5397 assert_eq_m128(r, expected);
5398 }
5399}