1use crate::core_arch::{simd::*, x86::*};
4use crate::intrinsics::simd::*;
5
6#[cfg(test)]
7use stdarch_test::assert_instr;
8
9#[stable(feature = "simd_x86", since = "1.27.0")]
12pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
13#[stable(feature = "simd_x86", since = "1.27.0")]
15pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01;
16#[stable(feature = "simd_x86", since = "1.27.0")]
18pub const _MM_FROUND_TO_POS_INF: i32 = 0x02;
19#[stable(feature = "simd_x86", since = "1.27.0")]
21pub const _MM_FROUND_TO_ZERO: i32 = 0x03;
22#[stable(feature = "simd_x86", since = "1.27.0")]
24pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04;
25#[stable(feature = "simd_x86", since = "1.27.0")]
27pub const _MM_FROUND_RAISE_EXC: i32 = 0x00;
28#[stable(feature = "simd_x86", since = "1.27.0")]
30pub const _MM_FROUND_NO_EXC: i32 = 0x08;
31#[stable(feature = "simd_x86", since = "1.27.0")]
33pub const _MM_FROUND_NINT: i32 = 0x00;
34#[stable(feature = "simd_x86", since = "1.27.0")]
36pub const _MM_FROUND_FLOOR: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF;
37#[stable(feature = "simd_x86", since = "1.27.0")]
39pub const _MM_FROUND_CEIL: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF;
40#[stable(feature = "simd_x86", since = "1.27.0")]
42pub const _MM_FROUND_TRUNC: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO;
43#[stable(feature = "simd_x86", since = "1.27.0")]
46pub const _MM_FROUND_RINT: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION;
47#[stable(feature = "simd_x86", since = "1.27.0")]
49pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION;
50
51#[inline]
59#[target_feature(enable = "sse4.1")]
60#[cfg_attr(test, assert_instr(pblendvb))]
61#[stable(feature = "simd_x86", since = "1.27.0")]
62#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
63pub const fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i {
64 unsafe {
65 let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::ZERO);
66 transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16()))
67 }
68}
69
70#[inline]
78#[target_feature(enable = "sse4.1")]
79#[cfg_attr(test, assert_instr(pblendw, IMM8 = 0xB1))]
80#[rustc_legacy_const_generics(2)]
81#[stable(feature = "simd_x86", since = "1.27.0")]
82#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
83pub const fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
84 static_assert_uimm_bits!(IMM8, 8);
85 unsafe {
86 transmute::<i16x8, _>(simd_shuffle!(
87 a.as_i16x8(),
88 b.as_i16x8(),
89 [
90 [0, 8][IMM8 as usize & 1],
91 [1, 9][(IMM8 >> 1) as usize & 1],
92 [2, 10][(IMM8 >> 2) as usize & 1],
93 [3, 11][(IMM8 >> 3) as usize & 1],
94 [4, 12][(IMM8 >> 4) as usize & 1],
95 [5, 13][(IMM8 >> 5) as usize & 1],
96 [6, 14][(IMM8 >> 6) as usize & 1],
97 [7, 15][(IMM8 >> 7) as usize & 1],
98 ]
99 ))
100 }
101}
102
103#[inline]
108#[target_feature(enable = "sse4.1")]
109#[cfg_attr(test, assert_instr(blendvpd))]
110#[stable(feature = "simd_x86", since = "1.27.0")]
111#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
112pub const fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
113 unsafe {
114 let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::ZERO);
115 transmute(simd_select(mask, b.as_f64x2(), a.as_f64x2()))
116 }
117}
118
119#[inline]
124#[target_feature(enable = "sse4.1")]
125#[cfg_attr(test, assert_instr(blendvps))]
126#[stable(feature = "simd_x86", since = "1.27.0")]
127#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
128pub const fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
129 unsafe {
130 let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::ZERO);
131 transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4()))
132 }
133}
134
135#[inline]
140#[target_feature(enable = "sse4.1")]
141#[cfg_attr(test, assert_instr(blendps, IMM2 = 0b10))]
145#[rustc_legacy_const_generics(2)]
146#[stable(feature = "simd_x86", since = "1.27.0")]
147#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
148pub const fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d {
149 static_assert_uimm_bits!(IMM2, 2);
150 unsafe {
151 transmute::<f64x2, _>(simd_shuffle!(
152 a.as_f64x2(),
153 b.as_f64x2(),
154 [[0, 2][IMM2 as usize & 1], [1, 3][(IMM2 >> 1) as usize & 1]]
155 ))
156 }
157}
158
159#[inline]
164#[target_feature(enable = "sse4.1")]
165#[cfg_attr(test, assert_instr(blendps, IMM4 = 0b0101))]
166#[rustc_legacy_const_generics(2)]
167#[stable(feature = "simd_x86", since = "1.27.0")]
168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
169pub const fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
170 static_assert_uimm_bits!(IMM4, 4);
171 unsafe {
172 transmute::<f32x4, _>(simd_shuffle!(
173 a.as_f32x4(),
174 b.as_f32x4(),
175 [
176 [0, 4][IMM4 as usize & 1],
177 [1, 5][(IMM4 >> 1) as usize & 1],
178 [2, 6][(IMM4 >> 2) as usize & 1],
179 [3, 7][(IMM4 >> 3) as usize & 1],
180 ]
181 ))
182 }
183}
184
185#[inline]
212#[target_feature(enable = "sse4.1")]
213#[cfg_attr(test, assert_instr(extractps, IMM8 = 0))]
214#[rustc_legacy_const_generics(1)]
215#[stable(feature = "simd_x86", since = "1.27.0")]
216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
217pub const fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 {
218 static_assert_uimm_bits!(IMM8, 2);
219 unsafe { simd_extract!(a, IMM8 as u32, f32).to_bits() as i32 }
220}
221
222#[inline]
229#[target_feature(enable = "sse4.1")]
230#[cfg_attr(test, assert_instr(pextrb, IMM8 = 0))]
231#[rustc_legacy_const_generics(1)]
232#[stable(feature = "simd_x86", since = "1.27.0")]
233#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
234pub const fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32 {
235 static_assert_uimm_bits!(IMM8, 4);
236 unsafe { simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32 }
237}
238
239#[inline]
243#[target_feature(enable = "sse4.1")]
244#[cfg_attr(test, assert_instr(extractps, IMM8 = 1))]
245#[rustc_legacy_const_generics(1)]
246#[stable(feature = "simd_x86", since = "1.27.0")]
247#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
248pub const fn _mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32 {
249 static_assert_uimm_bits!(IMM8, 2);
250 unsafe { simd_extract!(a.as_i32x4(), IMM8 as u32, i32) }
251}
252
253#[inline]
278#[target_feature(enable = "sse4.1")]
279#[cfg_attr(test, assert_instr(insertps, IMM8 = 0b1010))]
280#[rustc_legacy_const_generics(2)]
281#[stable(feature = "simd_x86", since = "1.27.0")]
282pub fn _mm_insert_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
283 static_assert_uimm_bits!(IMM8, 8);
284 unsafe { insertps(a, b, IMM8 as u8) }
285}
286
287#[inline]
292#[target_feature(enable = "sse4.1")]
293#[cfg_attr(test, assert_instr(pinsrb, IMM8 = 0))]
294#[rustc_legacy_const_generics(2)]
295#[stable(feature = "simd_x86", since = "1.27.0")]
296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
297pub const fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
298 static_assert_uimm_bits!(IMM8, 4);
299 unsafe { transmute(simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8)) }
300}
301
302#[inline]
307#[target_feature(enable = "sse4.1")]
308#[cfg_attr(test, assert_instr(pinsrd, IMM8 = 0))]
309#[rustc_legacy_const_generics(2)]
310#[stable(feature = "simd_x86", since = "1.27.0")]
311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
312pub const fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
313 static_assert_uimm_bits!(IMM8, 2);
314 unsafe { transmute(simd_insert!(a.as_i32x4(), IMM8 as u32, i)) }
315}
316
317#[inline]
322#[target_feature(enable = "sse4.1")]
323#[cfg_attr(test, assert_instr(pmaxsb))]
324#[stable(feature = "simd_x86", since = "1.27.0")]
325#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
326pub const fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
327 unsafe { simd_imax(a.as_i8x16(), b.as_i8x16()).as_m128i() }
328}
329
330#[inline]
335#[target_feature(enable = "sse4.1")]
336#[cfg_attr(test, assert_instr(pmaxuw))]
337#[stable(feature = "simd_x86", since = "1.27.0")]
338#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
339pub const fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
340 unsafe { simd_imax(a.as_u16x8(), b.as_u16x8()).as_m128i() }
341}
342
343#[inline]
348#[target_feature(enable = "sse4.1")]
349#[cfg_attr(test, assert_instr(pmaxsd))]
350#[stable(feature = "simd_x86", since = "1.27.0")]
351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
352pub const fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
353 unsafe { simd_imax(a.as_i32x4(), b.as_i32x4()).as_m128i() }
354}
355
356#[inline]
361#[target_feature(enable = "sse4.1")]
362#[cfg_attr(test, assert_instr(pmaxud))]
363#[stable(feature = "simd_x86", since = "1.27.0")]
364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
365pub const fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
366 unsafe { simd_imax(a.as_u32x4(), b.as_u32x4()).as_m128i() }
367}
368
369#[inline]
374#[target_feature(enable = "sse4.1")]
375#[cfg_attr(test, assert_instr(pminsb))]
376#[stable(feature = "simd_x86", since = "1.27.0")]
377#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
378pub const fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
379 unsafe { simd_imin(a.as_i8x16(), b.as_i8x16()).as_m128i() }
380}
381
382#[inline]
387#[target_feature(enable = "sse4.1")]
388#[cfg_attr(test, assert_instr(pminuw))]
389#[stable(feature = "simd_x86", since = "1.27.0")]
390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
391pub const fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
392 unsafe { simd_imin(a.as_u16x8(), b.as_u16x8()).as_m128i() }
393}
394
395#[inline]
400#[target_feature(enable = "sse4.1")]
401#[cfg_attr(test, assert_instr(pminsd))]
402#[stable(feature = "simd_x86", since = "1.27.0")]
403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
404pub const fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
405 unsafe { simd_imin(a.as_i32x4(), b.as_i32x4()).as_m128i() }
406}
407
408#[inline]
413#[target_feature(enable = "sse4.1")]
414#[cfg_attr(test, assert_instr(pminud))]
415#[stable(feature = "simd_x86", since = "1.27.0")]
416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
417pub const fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
418 unsafe { simd_imin(a.as_u32x4(), b.as_u32x4()).as_m128i() }
419}
420
421#[inline]
426#[target_feature(enable = "sse4.1")]
427#[cfg_attr(test, assert_instr(packusdw))]
428#[stable(feature = "simd_x86", since = "1.27.0")]
429pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
430 unsafe { transmute(packusdw(a.as_i32x4(), b.as_i32x4())) }
431}
432
433#[inline]
437#[target_feature(enable = "sse4.1")]
438#[cfg_attr(test, assert_instr(pcmpeqq))]
439#[stable(feature = "simd_x86", since = "1.27.0")]
440#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
441pub const fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
442 unsafe { transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) }
443}
444
445#[inline]
449#[target_feature(enable = "sse4.1")]
450#[cfg_attr(test, assert_instr(pmovsxbw))]
451#[stable(feature = "simd_x86", since = "1.27.0")]
452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
453pub const fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
454 unsafe {
455 let a = a.as_i8x16();
456 let a: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
457 transmute(simd_cast::<_, i16x8>(a))
458 }
459}
460
461#[inline]
465#[target_feature(enable = "sse4.1")]
466#[cfg_attr(test, assert_instr(pmovsxbd))]
467#[stable(feature = "simd_x86", since = "1.27.0")]
468#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
469pub const fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
470 unsafe {
471 let a = a.as_i8x16();
472 let a: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
473 transmute(simd_cast::<_, i32x4>(a))
474 }
475}
476
477#[inline]
482#[target_feature(enable = "sse4.1")]
483#[cfg_attr(test, assert_instr(pmovsxbq))]
484#[stable(feature = "simd_x86", since = "1.27.0")]
485#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
486pub const fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
487 unsafe {
488 let a = a.as_i8x16();
489 let a: i8x2 = simd_shuffle!(a, a, [0, 1]);
490 transmute(simd_cast::<_, i64x2>(a))
491 }
492}
493
494#[inline]
498#[target_feature(enable = "sse4.1")]
499#[cfg_attr(test, assert_instr(pmovsxwd))]
500#[stable(feature = "simd_x86", since = "1.27.0")]
501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
502pub const fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
503 unsafe {
504 let a = a.as_i16x8();
505 let a: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
506 transmute(simd_cast::<_, i32x4>(a))
507 }
508}
509
510#[inline]
514#[target_feature(enable = "sse4.1")]
515#[cfg_attr(test, assert_instr(pmovsxwq))]
516#[stable(feature = "simd_x86", since = "1.27.0")]
517#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
518pub const fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
519 unsafe {
520 let a = a.as_i16x8();
521 let a: i16x2 = simd_shuffle!(a, a, [0, 1]);
522 transmute(simd_cast::<_, i64x2>(a))
523 }
524}
525
526#[inline]
530#[target_feature(enable = "sse4.1")]
531#[cfg_attr(test, assert_instr(pmovsxdq))]
532#[stable(feature = "simd_x86", since = "1.27.0")]
533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
534pub const fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
535 unsafe {
536 let a = a.as_i32x4();
537 let a: i32x2 = simd_shuffle!(a, a, [0, 1]);
538 transmute(simd_cast::<_, i64x2>(a))
539 }
540}
541
542#[inline]
546#[target_feature(enable = "sse4.1")]
547#[cfg_attr(test, assert_instr(pmovzxbw))]
548#[stable(feature = "simd_x86", since = "1.27.0")]
549#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
550pub const fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
551 unsafe {
552 let a = a.as_u8x16();
553 let a: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
554 transmute(simd_cast::<_, i16x8>(a))
555 }
556}
557
558#[inline]
562#[target_feature(enable = "sse4.1")]
563#[cfg_attr(test, assert_instr(pmovzxbd))]
564#[stable(feature = "simd_x86", since = "1.27.0")]
565#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
566pub const fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
567 unsafe {
568 let a = a.as_u8x16();
569 let a: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
570 transmute(simd_cast::<_, i32x4>(a))
571 }
572}
573
574#[inline]
578#[target_feature(enable = "sse4.1")]
579#[cfg_attr(test, assert_instr(pmovzxbq))]
580#[stable(feature = "simd_x86", since = "1.27.0")]
581#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
582pub const fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
583 unsafe {
584 let a = a.as_u8x16();
585 let a: u8x2 = simd_shuffle!(a, a, [0, 1]);
586 transmute(simd_cast::<_, i64x2>(a))
587 }
588}
589
590#[inline]
595#[target_feature(enable = "sse4.1")]
596#[cfg_attr(test, assert_instr(pmovzxwd))]
597#[stable(feature = "simd_x86", since = "1.27.0")]
598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
599pub const fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
600 unsafe {
601 let a = a.as_u16x8();
602 let a: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
603 transmute(simd_cast::<_, i32x4>(a))
604 }
605}
606
607#[inline]
612#[target_feature(enable = "sse4.1")]
613#[cfg_attr(test, assert_instr(pmovzxwq))]
614#[stable(feature = "simd_x86", since = "1.27.0")]
615#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
616pub const fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
617 unsafe {
618 let a = a.as_u16x8();
619 let a: u16x2 = simd_shuffle!(a, a, [0, 1]);
620 transmute(simd_cast::<_, i64x2>(a))
621 }
622}
623
624#[inline]
629#[target_feature(enable = "sse4.1")]
630#[cfg_attr(test, assert_instr(pmovzxdq))]
631#[stable(feature = "simd_x86", since = "1.27.0")]
632#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
633pub const fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
634 unsafe {
635 let a = a.as_u32x4();
636 let a: u32x2 = simd_shuffle!(a, a, [0, 1]);
637 transmute(simd_cast::<_, i64x2>(a))
638 }
639}
640
641#[inline]
651#[target_feature(enable = "sse4.1")]
652#[cfg_attr(test, assert_instr(dppd, IMM8 = 0))]
653#[rustc_legacy_const_generics(2)]
654#[stable(feature = "simd_x86", since = "1.27.0")]
655pub fn _mm_dp_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
656 unsafe {
657 static_assert_uimm_bits!(IMM8, 8);
658 dppd(a, b, IMM8 as u8)
659 }
660}
661
662#[inline]
672#[target_feature(enable = "sse4.1")]
673#[cfg_attr(test, assert_instr(dpps, IMM8 = 0))]
674#[rustc_legacy_const_generics(2)]
675#[stable(feature = "simd_x86", since = "1.27.0")]
676pub fn _mm_dp_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
677 static_assert_uimm_bits!(IMM8, 8);
678 unsafe { dpps(a, b, IMM8 as u8) }
679}
680
681#[inline]
687#[target_feature(enable = "sse4.1")]
688#[cfg_attr(test, assert_instr(roundpd))]
689#[stable(feature = "simd_x86", since = "1.27.0")]
690#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
691pub const fn _mm_floor_pd(a: __m128d) -> __m128d {
692 unsafe { simd_floor(a) }
693}
694
695#[inline]
701#[target_feature(enable = "sse4.1")]
702#[cfg_attr(test, assert_instr(roundps))]
703#[stable(feature = "simd_x86", since = "1.27.0")]
704#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
705pub const fn _mm_floor_ps(a: __m128) -> __m128 {
706 unsafe { simd_floor(a) }
707}
708
709#[inline]
717#[target_feature(enable = "sse4.1")]
718#[cfg_attr(test, assert_instr(roundsd))]
719#[stable(feature = "simd_x86", since = "1.27.0")]
720pub fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d {
721 unsafe { roundsd(a, b, _MM_FROUND_FLOOR) }
722}
723
724#[inline]
732#[target_feature(enable = "sse4.1")]
733#[cfg_attr(test, assert_instr(roundss))]
734#[stable(feature = "simd_x86", since = "1.27.0")]
735pub fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 {
736 unsafe { roundss(a, b, _MM_FROUND_FLOOR) }
737}
738
739#[inline]
745#[target_feature(enable = "sse4.1")]
746#[cfg_attr(test, assert_instr(roundpd))]
747#[stable(feature = "simd_x86", since = "1.27.0")]
748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
749pub const fn _mm_ceil_pd(a: __m128d) -> __m128d {
750 unsafe { simd_ceil(a) }
751}
752
753#[inline]
759#[target_feature(enable = "sse4.1")]
760#[cfg_attr(test, assert_instr(roundps))]
761#[stable(feature = "simd_x86", since = "1.27.0")]
762#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
763pub const fn _mm_ceil_ps(a: __m128) -> __m128 {
764 unsafe { simd_ceil(a) }
765}
766
767#[inline]
775#[target_feature(enable = "sse4.1")]
776#[cfg_attr(test, assert_instr(roundsd))]
777#[stable(feature = "simd_x86", since = "1.27.0")]
778pub fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d {
779 unsafe { roundsd(a, b, _MM_FROUND_CEIL) }
780}
781
782#[inline]
790#[target_feature(enable = "sse4.1")]
791#[cfg_attr(test, assert_instr(roundss))]
792#[stable(feature = "simd_x86", since = "1.27.0")]
793pub fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
794 unsafe { roundss(a, b, _MM_FROUND_CEIL) }
795}
796
797#[inline]
810#[target_feature(enable = "sse4.1")]
811#[cfg_attr(test, assert_instr(roundpd, ROUNDING = 0))]
812#[rustc_legacy_const_generics(1)]
813#[stable(feature = "simd_x86", since = "1.27.0")]
814pub fn _mm_round_pd<const ROUNDING: i32>(a: __m128d) -> __m128d {
815 static_assert_uimm_bits!(ROUNDING, 4);
816 unsafe { roundpd(a, ROUNDING) }
817}
818
819#[inline]
832#[target_feature(enable = "sse4.1")]
833#[cfg_attr(test, assert_instr(roundps, ROUNDING = 0))]
834#[rustc_legacy_const_generics(1)]
835#[stable(feature = "simd_x86", since = "1.27.0")]
836pub fn _mm_round_ps<const ROUNDING: i32>(a: __m128) -> __m128 {
837 static_assert_uimm_bits!(ROUNDING, 4);
838 unsafe { roundps(a, ROUNDING) }
839}
840
841#[inline]
856#[target_feature(enable = "sse4.1")]
857#[cfg_attr(test, assert_instr(roundsd, ROUNDING = 0))]
858#[rustc_legacy_const_generics(2)]
859#[stable(feature = "simd_x86", since = "1.27.0")]
860pub fn _mm_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
861 static_assert_uimm_bits!(ROUNDING, 4);
862 unsafe { roundsd(a, b, ROUNDING) }
863}
864
865#[inline]
880#[target_feature(enable = "sse4.1")]
881#[cfg_attr(test, assert_instr(roundss, ROUNDING = 0))]
882#[rustc_legacy_const_generics(2)]
883#[stable(feature = "simd_x86", since = "1.27.0")]
884pub fn _mm_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
885 static_assert_uimm_bits!(ROUNDING, 4);
886 unsafe { roundss(a, b, ROUNDING) }
887}
888
889#[inline]
911#[target_feature(enable = "sse4.1")]
912#[cfg_attr(test, assert_instr(phminposuw))]
913#[stable(feature = "simd_x86", since = "1.27.0")]
914pub fn _mm_minpos_epu16(a: __m128i) -> __m128i {
915 unsafe { transmute(phminposuw(a.as_u16x8())) }
916}
917
918#[inline]
923#[target_feature(enable = "sse4.1")]
924#[cfg_attr(test, assert_instr(pmuldq))]
925#[stable(feature = "simd_x86", since = "1.27.0")]
926#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
927pub const fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
928 unsafe {
929 let a = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2()));
930 let b = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2()));
931 transmute(simd_mul(a, b))
932 }
933}
934
935#[inline]
944#[target_feature(enable = "sse4.1")]
945#[cfg_attr(test, assert_instr(pmulld))]
946#[stable(feature = "simd_x86", since = "1.27.0")]
947#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
948pub const fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
949 unsafe { transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) }
950}
951
952#[inline]
986#[target_feature(enable = "sse4.1")]
987#[cfg_attr(test, assert_instr(mpsadbw, IMM8 = 0))]
988#[rustc_legacy_const_generics(2)]
989#[stable(feature = "simd_x86", since = "1.27.0")]
990pub fn _mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
991 static_assert_uimm_bits!(IMM8, 3);
992 unsafe { transmute(mpsadbw(a.as_u8x16(), b.as_u8x16(), IMM8 as u8)) }
993}
994
995#[inline]
1011#[target_feature(enable = "sse4.1")]
1012#[cfg_attr(test, assert_instr(ptest))]
1013#[stable(feature = "simd_x86", since = "1.27.0")]
1014#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1015pub const fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
1016 unsafe {
1017 let r = simd_reduce_or(simd_and(a.as_i64x2(), mask.as_i64x2()));
1018 (0i64 == r) as i32
1019 }
1020}
1021
1022#[inline]
1038#[target_feature(enable = "sse4.1")]
1039#[cfg_attr(test, assert_instr(ptest))]
1040#[stable(feature = "simd_x86", since = "1.27.0")]
1041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1042pub const fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
1043 unsafe {
1044 let r = simd_reduce_or(simd_and(
1045 simd_xor(a.as_i64x2(), i64x2::splat(!0)),
1046 mask.as_i64x2(),
1047 ));
1048 (0i64 == r) as i32
1049 }
1050}
1051
1052#[inline]
1068#[target_feature(enable = "sse4.1")]
1069#[cfg_attr(test, assert_instr(ptest))]
1070#[stable(feature = "simd_x86", since = "1.27.0")]
1071pub fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
1072 unsafe { ptestnzc(a.as_i64x2(), mask.as_i64x2()) }
1073}
1074
1075#[inline]
1091#[target_feature(enable = "sse4.1")]
1092#[cfg_attr(test, assert_instr(ptest))]
1093#[stable(feature = "simd_x86", since = "1.27.0")]
1094#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1095pub const fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
1096 _mm_testz_si128(a, mask)
1097}
1098
1099#[inline]
1113#[target_feature(enable = "sse4.1")]
1114#[cfg_attr(test, assert_instr(pcmpeqd))]
1115#[cfg_attr(test, assert_instr(ptest))]
1116#[stable(feature = "simd_x86", since = "1.27.0")]
1117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1118pub const fn _mm_test_all_ones(a: __m128i) -> i32 {
1119 _mm_testc_si128(a, _mm_cmpeq_epi32(a, a))
1120}
1121
1122#[inline]
1138#[target_feature(enable = "sse4.1")]
1139#[cfg_attr(test, assert_instr(ptest))]
1140#[stable(feature = "simd_x86", since = "1.27.0")]
1141pub fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
1142 _mm_testnzc_si128(a, mask)
1143}
1144
1145#[inline]
1151#[target_feature(enable = "sse4.1")]
1152#[cfg_attr(test, assert_instr(movntdqa))]
1153#[stable(feature = "simd_x86_updates", since = "1.82.0")]
1154pub unsafe fn _mm_stream_load_si128(mem_addr: *const __m128i) -> __m128i {
1155 let dst: __m128i;
1156 crate::arch::asm!(
1157 vpl!("movntdqa {a}"),
1158 a = out(xmm_reg) dst,
1159 p = in(reg) mem_addr,
1160 options(pure, readonly, nostack, preserves_flags),
1161 );
1162 dst
1163}
1164
1165#[allow(improper_ctypes)]
1166unsafe extern "C" {
1167 #[link_name = "llvm.x86.sse41.insertps"]
1168 fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128;
1169 #[link_name = "llvm.x86.sse41.packusdw"]
1170 fn packusdw(a: i32x4, b: i32x4) -> u16x8;
1171 #[link_name = "llvm.x86.sse41.dppd"]
1172 fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d;
1173 #[link_name = "llvm.x86.sse41.dpps"]
1174 fn dpps(a: __m128, b: __m128, imm8: u8) -> __m128;
1175 #[link_name = "llvm.x86.sse41.round.pd"]
1176 fn roundpd(a: __m128d, rounding: i32) -> __m128d;
1177 #[link_name = "llvm.x86.sse41.round.ps"]
1178 fn roundps(a: __m128, rounding: i32) -> __m128;
1179 #[link_name = "llvm.x86.sse41.round.sd"]
1180 fn roundsd(a: __m128d, b: __m128d, rounding: i32) -> __m128d;
1181 #[link_name = "llvm.x86.sse41.round.ss"]
1182 fn roundss(a: __m128, b: __m128, rounding: i32) -> __m128;
1183 #[link_name = "llvm.x86.sse41.phminposuw"]
1184 fn phminposuw(a: u16x8) -> u16x8;
1185 #[link_name = "llvm.x86.sse41.mpsadbw"]
1186 fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8;
1187 #[link_name = "llvm.x86.sse41.ptestnzc"]
1188 fn ptestnzc(a: i64x2, mask: i64x2) -> i32;
1189}
1190
1191#[cfg(test)]
1192mod tests {
1193 use crate::core_arch::assert_eq_const as assert_eq;
1194 use crate::core_arch::x86::*;
1195 use std::mem;
1196 use stdarch_test::simd_test;
1197
1198 #[simd_test(enable = "sse4.1")]
1199 const unsafe fn test_mm_blendv_epi8() {
1200 #[rustfmt::skip]
1201 let a = _mm_setr_epi8(
1202 0, 1, 2, 3, 4, 5, 6, 7,
1203 8, 9, 10, 11, 12, 13, 14, 15,
1204 );
1205 #[rustfmt::skip]
1206 let b = _mm_setr_epi8(
1207 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
1208 );
1209 #[rustfmt::skip]
1210 let mask = _mm_setr_epi8(
1211 0, -1, 0, -1, 0, -1, 0, -1,
1212 0, -1, 0, -1, 0, -1, 0, -1,
1213 );
1214 #[rustfmt::skip]
1215 let e = _mm_setr_epi8(
1216 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31,
1217 );
1218 assert_eq_m128i(_mm_blendv_epi8(a, b, mask), e);
1219 }
1220
1221 #[simd_test(enable = "sse4.1")]
1222 const unsafe fn test_mm_blendv_pd() {
1223 let a = _mm_set1_pd(0.0);
1224 let b = _mm_set1_pd(1.0);
1225 let mask = transmute(_mm_setr_epi64x(0, -1));
1226 let r = _mm_blendv_pd(a, b, mask);
1227 let e = _mm_setr_pd(0.0, 1.0);
1228 assert_eq_m128d(r, e);
1229 }
1230
1231 #[simd_test(enable = "sse4.1")]
1232 const unsafe fn test_mm_blendv_ps() {
1233 let a = _mm_set1_ps(0.0);
1234 let b = _mm_set1_ps(1.0);
1235 let mask = transmute(_mm_setr_epi32(0, -1, 0, -1));
1236 let r = _mm_blendv_ps(a, b, mask);
1237 let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1238 assert_eq_m128(r, e);
1239 }
1240
1241 #[simd_test(enable = "sse4.1")]
1242 const unsafe fn test_mm_blend_pd() {
1243 let a = _mm_set1_pd(0.0);
1244 let b = _mm_set1_pd(1.0);
1245 let r = _mm_blend_pd::<0b10>(a, b);
1246 let e = _mm_setr_pd(0.0, 1.0);
1247 assert_eq_m128d(r, e);
1248 }
1249
1250 #[simd_test(enable = "sse4.1")]
1251 const unsafe fn test_mm_blend_ps() {
1252 let a = _mm_set1_ps(0.0);
1253 let b = _mm_set1_ps(1.0);
1254 let r = _mm_blend_ps::<0b1010>(a, b);
1255 let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1256 assert_eq_m128(r, e);
1257 }
1258
1259 #[simd_test(enable = "sse4.1")]
1260 const unsafe fn test_mm_blend_epi16() {
1261 let a = _mm_set1_epi16(0);
1262 let b = _mm_set1_epi16(1);
1263 let r = _mm_blend_epi16::<0b1010_1100>(a, b);
1264 let e = _mm_setr_epi16(0, 0, 1, 1, 0, 1, 0, 1);
1265 assert_eq_m128i(r, e);
1266 }
1267
1268 #[simd_test(enable = "sse4.1")]
1269 const unsafe fn test_mm_extract_ps() {
1270 let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0);
1271 let r: f32 = f32::from_bits(_mm_extract_ps::<1>(a) as u32);
1272 assert_eq!(r, 1.0);
1273 let r: f32 = f32::from_bits(_mm_extract_ps::<3>(a) as u32);
1274 assert_eq!(r, 3.0);
1275 }
1276
1277 #[simd_test(enable = "sse4.1")]
1278 const unsafe fn test_mm_extract_epi8() {
1279 #[rustfmt::skip]
1280 let a = _mm_setr_epi8(
1281 -1, 1, 2, 3, 4, 5, 6, 7,
1282 8, 9, 10, 11, 12, 13, 14, 15
1283 );
1284 let r1 = _mm_extract_epi8::<0>(a);
1285 let r2 = _mm_extract_epi8::<3>(a);
1286 assert_eq!(r1, 0xFF);
1287 assert_eq!(r2, 3);
1288 }
1289
1290 #[simd_test(enable = "sse4.1")]
1291 const unsafe fn test_mm_extract_epi32() {
1292 let a = _mm_setr_epi32(0, 1, 2, 3);
1293 let r = _mm_extract_epi32::<1>(a);
1294 assert_eq!(r, 1);
1295 let r = _mm_extract_epi32::<3>(a);
1296 assert_eq!(r, 3);
1297 }
1298
1299 #[simd_test(enable = "sse4.1")]
1300 unsafe fn test_mm_insert_ps() {
1301 let a = _mm_set1_ps(1.0);
1302 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1303 let r = _mm_insert_ps::<0b11_00_1100>(a, b);
1304 let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0);
1305 assert_eq_m128(r, e);
1306
1307 let a = _mm_set1_ps(1.0);
1309 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1310 let r = _mm_insert_ps::<0b11_00_0001>(a, b);
1311 let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0);
1312 assert_eq_m128(r, e);
1313 }
1314
1315 #[simd_test(enable = "sse4.1")]
1316 const unsafe fn test_mm_insert_epi8() {
1317 let a = _mm_set1_epi8(0);
1318 let e = _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1319 let r = _mm_insert_epi8::<1>(a, 32);
1320 assert_eq_m128i(r, e);
1321 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0);
1322 let r = _mm_insert_epi8::<14>(a, 32);
1323 assert_eq_m128i(r, e);
1324 }
1325
1326 #[simd_test(enable = "sse4.1")]
1327 const unsafe fn test_mm_insert_epi32() {
1328 let a = _mm_set1_epi32(0);
1329 let e = _mm_setr_epi32(0, 32, 0, 0);
1330 let r = _mm_insert_epi32::<1>(a, 32);
1331 assert_eq_m128i(r, e);
1332 let e = _mm_setr_epi32(0, 0, 0, 32);
1333 let r = _mm_insert_epi32::<3>(a, 32);
1334 assert_eq_m128i(r, e);
1335 }
1336
1337 #[simd_test(enable = "sse4.1")]
1338 const unsafe fn test_mm_max_epi8() {
1339 #[rustfmt::skip]
1340 let a = _mm_setr_epi8(
1341 1, 4, 5, 8, 9, 12, 13, 16,
1342 17, 20, 21, 24, 25, 28, 29, 32,
1343 );
1344 #[rustfmt::skip]
1345 let b = _mm_setr_epi8(
1346 2, 3, 6, 7, 10, 11, 14, 15,
1347 18, 19, 22, 23, 26, 27, 30, 31,
1348 );
1349 let r = _mm_max_epi8(a, b);
1350 #[rustfmt::skip]
1351 let e = _mm_setr_epi8(
1352 2, 4, 6, 8, 10, 12, 14, 16,
1353 18, 20, 22, 24, 26, 28, 30, 32,
1354 );
1355 assert_eq_m128i(r, e);
1356 }
1357
1358 #[simd_test(enable = "sse4.1")]
1359 const unsafe fn test_mm_max_epu16() {
1360 let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1361 let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1362 let r = _mm_max_epu16(a, b);
1363 let e = _mm_setr_epi16(2, 4, 6, 8, 10, 12, 14, 16);
1364 assert_eq_m128i(r, e);
1365 }
1366
1367 #[simd_test(enable = "sse4.1")]
1368 const unsafe fn test_mm_max_epi32() {
1369 let a = _mm_setr_epi32(1, 4, 5, 8);
1370 let b = _mm_setr_epi32(2, 3, 6, 7);
1371 let r = _mm_max_epi32(a, b);
1372 let e = _mm_setr_epi32(2, 4, 6, 8);
1373 assert_eq_m128i(r, e);
1374 }
1375
1376 #[simd_test(enable = "sse4.1")]
1377 const unsafe fn test_mm_max_epu32() {
1378 let a = _mm_setr_epi32(1, 4, 5, 8);
1379 let b = _mm_setr_epi32(2, 3, 6, 7);
1380 let r = _mm_max_epu32(a, b);
1381 let e = _mm_setr_epi32(2, 4, 6, 8);
1382 assert_eq_m128i(r, e);
1383 }
1384
1385 #[simd_test(enable = "sse4.1")]
1386 const unsafe fn test_mm_min_epi8() {
1387 #[rustfmt::skip]
1388 let a = _mm_setr_epi8(
1389 1, 4, 5, 8, 9, 12, 13, 16,
1390 17, 20, 21, 24, 25, 28, 29, 32,
1391 );
1392 #[rustfmt::skip]
1393 let b = _mm_setr_epi8(
1394 2, 3, 6, 7, 10, 11, 14, 15,
1395 18, 19, 22, 23, 26, 27, 30, 31,
1396 );
1397 let r = _mm_min_epi8(a, b);
1398 #[rustfmt::skip]
1399 let e = _mm_setr_epi8(
1400 1, 3, 5, 7, 9, 11, 13, 15,
1401 17, 19, 21, 23, 25, 27, 29, 31,
1402 );
1403 assert_eq_m128i(r, e);
1404
1405 #[rustfmt::skip]
1406 let a = _mm_setr_epi8(
1407 1, -4, -5, 8, -9, -12, 13, -16,
1408 17, 20, 21, 24, 25, 28, 29, 32,
1409 );
1410 #[rustfmt::skip]
1411 let b = _mm_setr_epi8(
1412 2, -3, -6, 7, -10, -11, 14, -15,
1413 18, 19, 22, 23, 26, 27, 30, 31,
1414 );
1415 let r = _mm_min_epi8(a, b);
1416 #[rustfmt::skip]
1417 let e = _mm_setr_epi8(
1418 1, -4, -6, 7, -10, -12, 13, -16,
1419 17, 19, 21, 23, 25, 27, 29, 31,
1420 );
1421 assert_eq_m128i(r, e);
1422 }
1423
1424 #[simd_test(enable = "sse4.1")]
1425 const unsafe fn test_mm_min_epu16() {
1426 let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1427 let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1428 let r = _mm_min_epu16(a, b);
1429 let e = _mm_setr_epi16(1, 3, 5, 7, 9, 11, 13, 15);
1430 assert_eq_m128i(r, e);
1431 }
1432
1433 #[simd_test(enable = "sse4.1")]
1434 const unsafe fn test_mm_min_epi32() {
1435 let a = _mm_setr_epi32(1, 4, 5, 8);
1436 let b = _mm_setr_epi32(2, 3, 6, 7);
1437 let r = _mm_min_epi32(a, b);
1438 let e = _mm_setr_epi32(1, 3, 5, 7);
1439 assert_eq_m128i(r, e);
1440
1441 let a = _mm_setr_epi32(-1, 4, 5, -7);
1442 let b = _mm_setr_epi32(-2, 3, -6, 8);
1443 let r = _mm_min_epi32(a, b);
1444 let e = _mm_setr_epi32(-2, 3, -6, -7);
1445 assert_eq_m128i(r, e);
1446 }
1447
1448 #[simd_test(enable = "sse4.1")]
1449 const unsafe fn test_mm_min_epu32() {
1450 let a = _mm_setr_epi32(1, 4, 5, 8);
1451 let b = _mm_setr_epi32(2, 3, 6, 7);
1452 let r = _mm_min_epu32(a, b);
1453 let e = _mm_setr_epi32(1, 3, 5, 7);
1454 assert_eq_m128i(r, e);
1455 }
1456
1457 #[simd_test(enable = "sse4.1")]
1458 unsafe fn test_mm_packus_epi32() {
1459 let a = _mm_setr_epi32(1, 2, 3, 4);
1460 let b = _mm_setr_epi32(-1, -2, -3, -4);
1461 let r = _mm_packus_epi32(a, b);
1462 let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0);
1463 assert_eq_m128i(r, e);
1464 }
1465
1466 #[simd_test(enable = "sse4.1")]
1467 const unsafe fn test_mm_cmpeq_epi64() {
1468 let a = _mm_setr_epi64x(0, 1);
1469 let b = _mm_setr_epi64x(0, 0);
1470 let r = _mm_cmpeq_epi64(a, b);
1471 let e = _mm_setr_epi64x(-1, 0);
1472 assert_eq_m128i(r, e);
1473 }
1474
1475 #[simd_test(enable = "sse4.1")]
1476 const unsafe fn test_mm_cvtepi8_epi16() {
1477 let a = _mm_set1_epi8(10);
1478 let r = _mm_cvtepi8_epi16(a);
1479 let e = _mm_set1_epi16(10);
1480 assert_eq_m128i(r, e);
1481 let a = _mm_set1_epi8(-10);
1482 let r = _mm_cvtepi8_epi16(a);
1483 let e = _mm_set1_epi16(-10);
1484 assert_eq_m128i(r, e);
1485 }
1486
1487 #[simd_test(enable = "sse4.1")]
1488 const unsafe fn test_mm_cvtepi8_epi32() {
1489 let a = _mm_set1_epi8(10);
1490 let r = _mm_cvtepi8_epi32(a);
1491 let e = _mm_set1_epi32(10);
1492 assert_eq_m128i(r, e);
1493 let a = _mm_set1_epi8(-10);
1494 let r = _mm_cvtepi8_epi32(a);
1495 let e = _mm_set1_epi32(-10);
1496 assert_eq_m128i(r, e);
1497 }
1498
1499 #[simd_test(enable = "sse4.1")]
1500 const unsafe fn test_mm_cvtepi8_epi64() {
1501 let a = _mm_set1_epi8(10);
1502 let r = _mm_cvtepi8_epi64(a);
1503 let e = _mm_set1_epi64x(10);
1504 assert_eq_m128i(r, e);
1505 let a = _mm_set1_epi8(-10);
1506 let r = _mm_cvtepi8_epi64(a);
1507 let e = _mm_set1_epi64x(-10);
1508 assert_eq_m128i(r, e);
1509 }
1510
1511 #[simd_test(enable = "sse4.1")]
1512 const unsafe fn test_mm_cvtepi16_epi32() {
1513 let a = _mm_set1_epi16(10);
1514 let r = _mm_cvtepi16_epi32(a);
1515 let e = _mm_set1_epi32(10);
1516 assert_eq_m128i(r, e);
1517 let a = _mm_set1_epi16(-10);
1518 let r = _mm_cvtepi16_epi32(a);
1519 let e = _mm_set1_epi32(-10);
1520 assert_eq_m128i(r, e);
1521 }
1522
1523 #[simd_test(enable = "sse4.1")]
1524 const unsafe fn test_mm_cvtepi16_epi64() {
1525 let a = _mm_set1_epi16(10);
1526 let r = _mm_cvtepi16_epi64(a);
1527 let e = _mm_set1_epi64x(10);
1528 assert_eq_m128i(r, e);
1529 let a = _mm_set1_epi16(-10);
1530 let r = _mm_cvtepi16_epi64(a);
1531 let e = _mm_set1_epi64x(-10);
1532 assert_eq_m128i(r, e);
1533 }
1534
1535 #[simd_test(enable = "sse4.1")]
1536 const unsafe fn test_mm_cvtepi32_epi64() {
1537 let a = _mm_set1_epi32(10);
1538 let r = _mm_cvtepi32_epi64(a);
1539 let e = _mm_set1_epi64x(10);
1540 assert_eq_m128i(r, e);
1541 let a = _mm_set1_epi32(-10);
1542 let r = _mm_cvtepi32_epi64(a);
1543 let e = _mm_set1_epi64x(-10);
1544 assert_eq_m128i(r, e);
1545 }
1546
1547 #[simd_test(enable = "sse4.1")]
1548 const unsafe fn test_mm_cvtepu8_epi16() {
1549 let a = _mm_set1_epi8(10);
1550 let r = _mm_cvtepu8_epi16(a);
1551 let e = _mm_set1_epi16(10);
1552 assert_eq_m128i(r, e);
1553 }
1554
1555 #[simd_test(enable = "sse4.1")]
1556 const unsafe fn test_mm_cvtepu8_epi32() {
1557 let a = _mm_set1_epi8(10);
1558 let r = _mm_cvtepu8_epi32(a);
1559 let e = _mm_set1_epi32(10);
1560 assert_eq_m128i(r, e);
1561 }
1562
1563 #[simd_test(enable = "sse4.1")]
1564 const unsafe fn test_mm_cvtepu8_epi64() {
1565 let a = _mm_set1_epi8(10);
1566 let r = _mm_cvtepu8_epi64(a);
1567 let e = _mm_set1_epi64x(10);
1568 assert_eq_m128i(r, e);
1569 }
1570
1571 #[simd_test(enable = "sse4.1")]
1572 const unsafe fn test_mm_cvtepu16_epi32() {
1573 let a = _mm_set1_epi16(10);
1574 let r = _mm_cvtepu16_epi32(a);
1575 let e = _mm_set1_epi32(10);
1576 assert_eq_m128i(r, e);
1577 }
1578
1579 #[simd_test(enable = "sse4.1")]
1580 const unsafe fn test_mm_cvtepu16_epi64() {
1581 let a = _mm_set1_epi16(10);
1582 let r = _mm_cvtepu16_epi64(a);
1583 let e = _mm_set1_epi64x(10);
1584 assert_eq_m128i(r, e);
1585 }
1586
1587 #[simd_test(enable = "sse4.1")]
1588 const unsafe fn test_mm_cvtepu32_epi64() {
1589 let a = _mm_set1_epi32(10);
1590 let r = _mm_cvtepu32_epi64(a);
1591 let e = _mm_set1_epi64x(10);
1592 assert_eq_m128i(r, e);
1593 }
1594
1595 #[simd_test(enable = "sse4.1")]
1596 unsafe fn test_mm_dp_pd() {
1597 let a = _mm_setr_pd(2.0, 3.0);
1598 let b = _mm_setr_pd(1.0, 4.0);
1599 let e = _mm_setr_pd(14.0, 0.0);
1600 assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e);
1601 }
1602
1603 #[simd_test(enable = "sse4.1")]
1604 unsafe fn test_mm_dp_ps() {
1605 let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0);
1606 let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0);
1607 let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0);
1608 assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e);
1609 }
1610
1611 #[simd_test(enable = "sse4.1")]
1612 const unsafe fn test_mm_floor_pd() {
1613 let a = _mm_setr_pd(2.5, 4.5);
1614 let r = _mm_floor_pd(a);
1615 let e = _mm_setr_pd(2.0, 4.0);
1616 assert_eq_m128d(r, e);
1617 }
1618
1619 #[simd_test(enable = "sse4.1")]
1620 const unsafe fn test_mm_floor_ps() {
1621 let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1622 let r = _mm_floor_ps(a);
1623 let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1624 assert_eq_m128(r, e);
1625 }
1626
1627 #[simd_test(enable = "sse4.1")]
1628 unsafe fn test_mm_floor_sd() {
1629 let a = _mm_setr_pd(2.5, 4.5);
1630 let b = _mm_setr_pd(-1.5, -3.5);
1631 let r = _mm_floor_sd(a, b);
1632 let e = _mm_setr_pd(-2.0, 4.5);
1633 assert_eq_m128d(r, e);
1634 }
1635
1636 #[simd_test(enable = "sse4.1")]
1637 unsafe fn test_mm_floor_ss() {
1638 let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1639 let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5);
1640 let r = _mm_floor_ss(a, b);
1641 let e = _mm_setr_ps(-2.0, 4.5, 8.5, 16.5);
1642 assert_eq_m128(r, e);
1643 }
1644
1645 #[simd_test(enable = "sse4.1")]
1646 const unsafe fn test_mm_ceil_pd() {
1647 let a = _mm_setr_pd(1.5, 3.5);
1648 let r = _mm_ceil_pd(a);
1649 let e = _mm_setr_pd(2.0, 4.0);
1650 assert_eq_m128d(r, e);
1651 }
1652
1653 #[simd_test(enable = "sse4.1")]
1654 const unsafe fn test_mm_ceil_ps() {
1655 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1656 let r = _mm_ceil_ps(a);
1657 let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1658 assert_eq_m128(r, e);
1659 }
1660
1661 #[simd_test(enable = "sse4.1")]
1662 unsafe fn test_mm_ceil_sd() {
1663 let a = _mm_setr_pd(1.5, 3.5);
1664 let b = _mm_setr_pd(-2.5, -4.5);
1665 let r = _mm_ceil_sd(a, b);
1666 let e = _mm_setr_pd(-2.0, 3.5);
1667 assert_eq_m128d(r, e);
1668 }
1669
1670 #[simd_test(enable = "sse4.1")]
1671 unsafe fn test_mm_ceil_ss() {
1672 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1673 let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5);
1674 let r = _mm_ceil_ss(a, b);
1675 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1676 assert_eq_m128(r, e);
1677 }
1678
1679 #[simd_test(enable = "sse4.1")]
1680 unsafe fn test_mm_round_pd() {
1681 let a = _mm_setr_pd(1.25, 3.75);
1682 let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a);
1683 let e = _mm_setr_pd(1.0, 4.0);
1684 assert_eq_m128d(r, e);
1685 }
1686
1687 #[simd_test(enable = "sse4.1")]
1688 unsafe fn test_mm_round_ps() {
1689 let a = _mm_setr_ps(2.25, 4.75, -1.75, -4.25);
1690 let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a);
1691 let e = _mm_setr_ps(2.0, 4.0, -1.0, -4.0);
1692 assert_eq_m128(r, e);
1693 }
1694
1695 #[simd_test(enable = "sse4.1")]
1696 unsafe fn test_mm_round_sd() {
1697 let a = _mm_setr_pd(1.5, 3.5);
1698 let b = _mm_setr_pd(-2.5, -4.5);
1699 let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b);
1700 let e = _mm_setr_pd(-2.0, 3.5);
1701 assert_eq_m128d(r, e);
1702
1703 let a = _mm_setr_pd(1.5, 3.5);
1704 let b = _mm_setr_pd(-2.5, -4.5);
1705 let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b);
1706 let e = _mm_setr_pd(-3.0, 3.5);
1707 assert_eq_m128d(r, e);
1708
1709 let a = _mm_setr_pd(1.5, 3.5);
1710 let b = _mm_setr_pd(-2.5, -4.5);
1711 let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b);
1712 let e = _mm_setr_pd(-2.0, 3.5);
1713 assert_eq_m128d(r, e);
1714
1715 let a = _mm_setr_pd(1.5, 3.5);
1716 let b = _mm_setr_pd(-2.5, -4.5);
1717 let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b);
1718 let e = _mm_setr_pd(-2.0, 3.5);
1719 assert_eq_m128d(r, e);
1720 }
1721
1722 #[simd_test(enable = "sse4.1")]
1723 unsafe fn test_mm_round_ss() {
1724 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1725 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1726 let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b);
1727 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1728 assert_eq_m128(r, e);
1729
1730 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1731 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1732 let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b);
1733 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1734 assert_eq_m128(r, e);
1735
1736 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1737 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1738 let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b);
1739 let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
1740 assert_eq_m128(r, e);
1741
1742 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1743 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1744 let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b);
1745 let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
1746 assert_eq_m128(r, e);
1747 }
1748
1749 #[simd_test(enable = "sse4.1")]
1750 unsafe fn test_mm_minpos_epu16_1() {
1751 let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66);
1752 let r = _mm_minpos_epu16(a);
1753 let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
1754 assert_eq_m128i(r, e);
1755 }
1756
1757 #[simd_test(enable = "sse4.1")]
1758 unsafe fn test_mm_minpos_epu16_2() {
1759 let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66);
1760 let r = _mm_minpos_epu16(a);
1761 let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0);
1762 assert_eq_m128i(r, e);
1763 }
1764
1765 #[simd_test(enable = "sse4.1")]
1766 unsafe fn test_mm_minpos_epu16_3() {
1767 let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13);
1769 let r = _mm_minpos_epu16(a);
1770 let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
1771 assert_eq_m128i(r, e);
1772 }
1773
1774 #[simd_test(enable = "sse4.1")]
1775 const unsafe fn test_mm_mul_epi32() {
1776 {
1777 let a = _mm_setr_epi32(1, 1, 1, 1);
1778 let b = _mm_setr_epi32(1, 2, 3, 4);
1779 let r = _mm_mul_epi32(a, b);
1780 let e = _mm_setr_epi64x(1, 3);
1781 assert_eq_m128i(r, e);
1782 }
1783 {
1784 let a = _mm_setr_epi32(15, 2 , 1234567, 4 );
1785 let b = _mm_setr_epi32(
1786 -20, -256, 666666, 666666, );
1789 let r = _mm_mul_epi32(a, b);
1790 let e = _mm_setr_epi64x(-300, 823043843622);
1791 assert_eq_m128i(r, e);
1792 }
1793 }
1794
1795 #[simd_test(enable = "sse4.1")]
1796 const unsafe fn test_mm_mullo_epi32() {
1797 {
1798 let a = _mm_setr_epi32(1, 1, 1, 1);
1799 let b = _mm_setr_epi32(1, 2, 3, 4);
1800 let r = _mm_mullo_epi32(a, b);
1801 let e = _mm_setr_epi32(1, 2, 3, 4);
1802 assert_eq_m128i(r, e);
1803 }
1804 {
1805 let a = _mm_setr_epi32(15, -2, 1234567, 99999);
1806 let b = _mm_setr_epi32(-20, -256, 666666, -99999);
1807 let r = _mm_mullo_epi32(a, b);
1808 let e = _mm_setr_epi32(-300, 512, -1589877210, -1409865409);
1812 assert_eq_m128i(r, e);
1813 }
1814 }
1815
1816 #[simd_test(enable = "sse4.1")]
1817 unsafe fn test_mm_minpos_epu16() {
1818 let a = _mm_setr_epi16(8, 7, 6, 5, 4, 1, 2, 3);
1819 let r = _mm_minpos_epu16(a);
1820 let e = _mm_setr_epi16(1, 5, 0, 0, 0, 0, 0, 0);
1821 assert_eq_m128i(r, e);
1822 }
1823
1824 #[simd_test(enable = "sse4.1")]
1825 unsafe fn test_mm_mpsadbw_epu8() {
1826 #[rustfmt::skip]
1827 let a = _mm_setr_epi8(
1828 0, 1, 2, 3, 4, 5, 6, 7,
1829 8, 9, 10, 11, 12, 13, 14, 15,
1830 );
1831
1832 let r = _mm_mpsadbw_epu8::<0b000>(a, a);
1833 let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1834 assert_eq_m128i(r, e);
1835
1836 let r = _mm_mpsadbw_epu8::<0b001>(a, a);
1837 let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12);
1838 assert_eq_m128i(r, e);
1839
1840 let r = _mm_mpsadbw_epu8::<0b100>(a, a);
1841 let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44);
1842 assert_eq_m128i(r, e);
1843
1844 let r = _mm_mpsadbw_epu8::<0b101>(a, a);
1845 let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1846 assert_eq_m128i(r, e);
1847
1848 let r = _mm_mpsadbw_epu8::<0b111>(a, a);
1849 let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4);
1850 assert_eq_m128i(r, e);
1851 }
1852
1853 #[simd_test(enable = "sse4.1")]
1854 const unsafe fn test_mm_testz_si128() {
1855 let a = _mm_set1_epi8(1);
1856 let mask = _mm_set1_epi8(0);
1857 let r = _mm_testz_si128(a, mask);
1858 assert_eq!(r, 1);
1859 let a = _mm_set1_epi8(0b101);
1860 let mask = _mm_set1_epi8(0b110);
1861 let r = _mm_testz_si128(a, mask);
1862 assert_eq!(r, 0);
1863 let a = _mm_set1_epi8(0b011);
1864 let mask = _mm_set1_epi8(0b100);
1865 let r = _mm_testz_si128(a, mask);
1866 assert_eq!(r, 1);
1867 }
1868
1869 #[simd_test(enable = "sse4.1")]
1870 const unsafe fn test_mm_testc_si128() {
1871 let a = _mm_set1_epi8(-1);
1872 let mask = _mm_set1_epi8(0);
1873 let r = _mm_testc_si128(a, mask);
1874 assert_eq!(r, 1);
1875 let a = _mm_set1_epi8(0b101);
1876 let mask = _mm_set1_epi8(0b110);
1877 let r = _mm_testc_si128(a, mask);
1878 assert_eq!(r, 0);
1879 let a = _mm_set1_epi8(0b101);
1880 let mask = _mm_set1_epi8(0b100);
1881 let r = _mm_testc_si128(a, mask);
1882 assert_eq!(r, 1);
1883 }
1884
1885 #[simd_test(enable = "sse4.1")]
1886 unsafe fn test_mm_testnzc_si128() {
1887 let a = _mm_set1_epi8(0);
1888 let mask = _mm_set1_epi8(1);
1889 let r = _mm_testnzc_si128(a, mask);
1890 assert_eq!(r, 0);
1891 let a = _mm_set1_epi8(-1);
1892 let mask = _mm_set1_epi8(0);
1893 let r = _mm_testnzc_si128(a, mask);
1894 assert_eq!(r, 0);
1895 let a = _mm_set1_epi8(0b101);
1896 let mask = _mm_set1_epi8(0b110);
1897 let r = _mm_testnzc_si128(a, mask);
1898 assert_eq!(r, 1);
1899 let a = _mm_set1_epi8(0b101);
1900 let mask = _mm_set1_epi8(0b101);
1901 let r = _mm_testnzc_si128(a, mask);
1902 assert_eq!(r, 0);
1903 }
1904
1905 #[simd_test(enable = "sse4.1")]
1906 const unsafe fn test_mm_test_all_zeros() {
1907 let a = _mm_set1_epi8(1);
1908 let mask = _mm_set1_epi8(0);
1909 let r = _mm_test_all_zeros(a, mask);
1910 assert_eq!(r, 1);
1911 let a = _mm_set1_epi8(0b101);
1912 let mask = _mm_set1_epi8(0b110);
1913 let r = _mm_test_all_zeros(a, mask);
1914 assert_eq!(r, 0);
1915 let a = _mm_set1_epi8(0b011);
1916 let mask = _mm_set1_epi8(0b100);
1917 let r = _mm_test_all_zeros(a, mask);
1918 assert_eq!(r, 1);
1919 }
1920
1921 #[simd_test(enable = "sse4.1")]
1922 const unsafe fn test_mm_test_all_ones() {
1923 let a = _mm_set1_epi8(-1);
1924 let r = _mm_test_all_ones(a);
1925 assert_eq!(r, 1);
1926 let a = _mm_set1_epi8(0b101);
1927 let r = _mm_test_all_ones(a);
1928 assert_eq!(r, 0);
1929 }
1930
1931 #[simd_test(enable = "sse4.1")]
1932 unsafe fn test_mm_test_mix_ones_zeros() {
1933 let a = _mm_set1_epi8(0);
1934 let mask = _mm_set1_epi8(1);
1935 let r = _mm_test_mix_ones_zeros(a, mask);
1936 assert_eq!(r, 0);
1937 let a = _mm_set1_epi8(-1);
1938 let mask = _mm_set1_epi8(0);
1939 let r = _mm_test_mix_ones_zeros(a, mask);
1940 assert_eq!(r, 0);
1941 let a = _mm_set1_epi8(0b101);
1942 let mask = _mm_set1_epi8(0b110);
1943 let r = _mm_test_mix_ones_zeros(a, mask);
1944 assert_eq!(r, 1);
1945 let a = _mm_set1_epi8(0b101);
1946 let mask = _mm_set1_epi8(0b101);
1947 let r = _mm_test_mix_ones_zeros(a, mask);
1948 assert_eq!(r, 0);
1949 }
1950
1951 #[simd_test(enable = "sse4.1")]
1952 unsafe fn test_mm_stream_load_si128() {
1953 let a = _mm_set_epi64x(5, 6);
1954 let r = _mm_stream_load_si128(core::ptr::addr_of!(a) as *const _);
1955 assert_eq_m128i(a, r);
1956 }
1957}