1use crate::core_arch::{simd::*, x86::*};
22use crate::intrinsics::simd::*;
23
24#[cfg(test)]
25use stdarch_test::assert_instr;
26
27#[inline]
31#[target_feature(enable = "avx2")]
32#[cfg_attr(test, assert_instr(vpabsd))]
33#[stable(feature = "simd_x86", since = "1.27.0")]
34#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35pub const fn _mm256_abs_epi32(a: __m256i) -> __m256i {
36 unsafe {
37 let a = a.as_i32x8();
38 let r = simd_select::<m32x8, _>(simd_lt(a, i32x8::ZERO), simd_neg(a), a);
39 transmute(r)
40 }
41}
42
43#[inline]
47#[target_feature(enable = "avx2")]
48#[cfg_attr(test, assert_instr(vpabsw))]
49#[stable(feature = "simd_x86", since = "1.27.0")]
50#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
51pub const fn _mm256_abs_epi16(a: __m256i) -> __m256i {
52 unsafe {
53 let a = a.as_i16x16();
54 let r = simd_select::<m16x16, _>(simd_lt(a, i16x16::ZERO), simd_neg(a), a);
55 transmute(r)
56 }
57}
58
59#[inline]
63#[target_feature(enable = "avx2")]
64#[cfg_attr(test, assert_instr(vpabsb))]
65#[stable(feature = "simd_x86", since = "1.27.0")]
66#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
67pub const fn _mm256_abs_epi8(a: __m256i) -> __m256i {
68 unsafe {
69 let a = a.as_i8x32();
70 let r = simd_select::<m8x32, _>(simd_lt(a, i8x32::ZERO), simd_neg(a), a);
71 transmute(r)
72 }
73}
74
75#[inline]
79#[target_feature(enable = "avx2")]
80#[cfg_attr(test, assert_instr(vpaddq))]
81#[stable(feature = "simd_x86", since = "1.27.0")]
82#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
83pub const fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
84 unsafe { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) }
85}
86
87#[inline]
91#[target_feature(enable = "avx2")]
92#[cfg_attr(test, assert_instr(vpaddd))]
93#[stable(feature = "simd_x86", since = "1.27.0")]
94#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
95pub const fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
96 unsafe { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) }
97}
98
99#[inline]
103#[target_feature(enable = "avx2")]
104#[cfg_attr(test, assert_instr(vpaddw))]
105#[stable(feature = "simd_x86", since = "1.27.0")]
106#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
107pub const fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
108 unsafe { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) }
109}
110
111#[inline]
115#[target_feature(enable = "avx2")]
116#[cfg_attr(test, assert_instr(vpaddb))]
117#[stable(feature = "simd_x86", since = "1.27.0")]
118#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
119pub const fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
120 unsafe { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) }
121}
122
123#[inline]
127#[target_feature(enable = "avx2")]
128#[cfg_attr(test, assert_instr(vpaddsb))]
129#[stable(feature = "simd_x86", since = "1.27.0")]
130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
131pub const fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
132 unsafe { transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) }
133}
134
135#[inline]
139#[target_feature(enable = "avx2")]
140#[cfg_attr(test, assert_instr(vpaddsw))]
141#[stable(feature = "simd_x86", since = "1.27.0")]
142#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
143pub const fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
144 unsafe { transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) }
145}
146
147#[inline]
151#[target_feature(enable = "avx2")]
152#[cfg_attr(test, assert_instr(vpaddusb))]
153#[stable(feature = "simd_x86", since = "1.27.0")]
154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
155pub const fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
156 unsafe { transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) }
157}
158
159#[inline]
163#[target_feature(enable = "avx2")]
164#[cfg_attr(test, assert_instr(vpaddusw))]
165#[stable(feature = "simd_x86", since = "1.27.0")]
166#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
167pub const fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
168 unsafe { transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) }
169}
170
171#[inline]
176#[target_feature(enable = "avx2")]
177#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
178#[rustc_legacy_const_generics(2)]
179#[stable(feature = "simd_x86", since = "1.27.0")]
180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
181pub const fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
182 static_assert_uimm_bits!(IMM8, 8);
183
184 if IMM8 >= 32 {
187 return _mm256_setzero_si256();
188 }
189 let (a, b) = if IMM8 > 16 {
192 (_mm256_setzero_si256(), a)
193 } else {
194 (a, b)
195 };
196 unsafe {
197 if IMM8 == 16 {
198 return transmute(a);
199 }
200 }
201 const fn mask(shift: u32, i: u32) -> u32 {
202 let shift = shift % 16;
203 let mod_i = i % 16;
204 if mod_i < (16 - shift) {
205 i + shift
206 } else {
207 i + 16 + shift
208 }
209 }
210
211 unsafe {
212 let r: i8x32 = simd_shuffle!(
213 b.as_i8x32(),
214 a.as_i8x32(),
215 [
216 mask(IMM8 as u32, 0),
217 mask(IMM8 as u32, 1),
218 mask(IMM8 as u32, 2),
219 mask(IMM8 as u32, 3),
220 mask(IMM8 as u32, 4),
221 mask(IMM8 as u32, 5),
222 mask(IMM8 as u32, 6),
223 mask(IMM8 as u32, 7),
224 mask(IMM8 as u32, 8),
225 mask(IMM8 as u32, 9),
226 mask(IMM8 as u32, 10),
227 mask(IMM8 as u32, 11),
228 mask(IMM8 as u32, 12),
229 mask(IMM8 as u32, 13),
230 mask(IMM8 as u32, 14),
231 mask(IMM8 as u32, 15),
232 mask(IMM8 as u32, 16),
233 mask(IMM8 as u32, 17),
234 mask(IMM8 as u32, 18),
235 mask(IMM8 as u32, 19),
236 mask(IMM8 as u32, 20),
237 mask(IMM8 as u32, 21),
238 mask(IMM8 as u32, 22),
239 mask(IMM8 as u32, 23),
240 mask(IMM8 as u32, 24),
241 mask(IMM8 as u32, 25),
242 mask(IMM8 as u32, 26),
243 mask(IMM8 as u32, 27),
244 mask(IMM8 as u32, 28),
245 mask(IMM8 as u32, 29),
246 mask(IMM8 as u32, 30),
247 mask(IMM8 as u32, 31),
248 ],
249 );
250 transmute(r)
251 }
252}
253
254#[inline]
259#[target_feature(enable = "avx2")]
260#[cfg_attr(test, assert_instr(vandps))]
261#[stable(feature = "simd_x86", since = "1.27.0")]
262#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
263pub const fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
264 unsafe { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) }
265}
266
267#[inline]
272#[target_feature(enable = "avx2")]
273#[cfg_attr(test, assert_instr(vandnps))]
274#[stable(feature = "simd_x86", since = "1.27.0")]
275#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
276pub const fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
277 unsafe {
278 let all_ones = _mm256_set1_epi8(-1);
279 transmute(simd_and(
280 simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
281 b.as_i64x4(),
282 ))
283 }
284}
285
286#[inline]
290#[target_feature(enable = "avx2")]
291#[cfg_attr(test, assert_instr(vpavgw))]
292#[stable(feature = "simd_x86", since = "1.27.0")]
293#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
294pub const fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
295 unsafe {
296 let a = simd_cast::<_, u32x16>(a.as_u16x16());
297 let b = simd_cast::<_, u32x16>(b.as_u16x16());
298 let r = simd_shr(simd_add(simd_add(a, b), u32x16::splat(1)), u32x16::splat(1));
299 transmute(simd_cast::<_, u16x16>(r))
300 }
301}
302
303#[inline]
307#[target_feature(enable = "avx2")]
308#[cfg_attr(test, assert_instr(vpavgb))]
309#[stable(feature = "simd_x86", since = "1.27.0")]
310#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
311pub const fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
312 unsafe {
313 let a = simd_cast::<_, u16x32>(a.as_u8x32());
314 let b = simd_cast::<_, u16x32>(b.as_u8x32());
315 let r = simd_shr(simd_add(simd_add(a, b), u16x32::splat(1)), u16x32::splat(1));
316 transmute(simd_cast::<_, u8x32>(r))
317 }
318}
319
320#[inline]
324#[target_feature(enable = "avx2")]
325#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
326#[rustc_legacy_const_generics(2)]
327#[stable(feature = "simd_x86", since = "1.27.0")]
328#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
329pub const fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
330 static_assert_uimm_bits!(IMM4, 4);
331 unsafe {
332 let a = a.as_i32x4();
333 let b = b.as_i32x4();
334 let r: i32x4 = simd_shuffle!(
335 a,
336 b,
337 [
338 [0, 4, 0, 4][IMM4 as usize & 0b11],
339 [1, 1, 5, 5][IMM4 as usize & 0b11],
340 [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
341 [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
342 ],
343 );
344 transmute(r)
345 }
346}
347
348#[inline]
352#[target_feature(enable = "avx2")]
353#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
354#[rustc_legacy_const_generics(2)]
355#[stable(feature = "simd_x86", since = "1.27.0")]
356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
357pub const fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
358 static_assert_uimm_bits!(IMM8, 8);
359 unsafe {
360 let a = a.as_i32x8();
361 let b = b.as_i32x8();
362 let r: i32x8 = simd_shuffle!(
363 a,
364 b,
365 [
366 [0, 8, 0, 8][IMM8 as usize & 0b11],
367 [1, 1, 9, 9][IMM8 as usize & 0b11],
368 [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
369 [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
370 [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
371 [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
372 [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
373 [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
374 ],
375 );
376 transmute(r)
377 }
378}
379
380#[inline]
384#[target_feature(enable = "avx2")]
385#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
386#[rustc_legacy_const_generics(2)]
387#[stable(feature = "simd_x86", since = "1.27.0")]
388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
389pub const fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
390 static_assert_uimm_bits!(IMM8, 8);
391 unsafe {
392 let a = a.as_i16x16();
393 let b = b.as_i16x16();
394
395 let r: i16x16 = simd_shuffle!(
396 a,
397 b,
398 [
399 [0, 16, 0, 16][IMM8 as usize & 0b11],
400 [1, 1, 17, 17][IMM8 as usize & 0b11],
401 [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
402 [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
403 [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
404 [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
405 [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
406 [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
407 [8, 24, 8, 24][IMM8 as usize & 0b11],
408 [9, 9, 25, 25][IMM8 as usize & 0b11],
409 [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
410 [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
411 [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
412 [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
413 [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
414 [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
415 ],
416 );
417 transmute(r)
418 }
419}
420
421#[inline]
425#[target_feature(enable = "avx2")]
426#[cfg_attr(test, assert_instr(vpblendvb))]
427#[stable(feature = "simd_x86", since = "1.27.0")]
428#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
429pub const fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
430 unsafe {
431 let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO);
432 transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32()))
433 }
434}
435
436#[inline]
441#[target_feature(enable = "avx2")]
442#[cfg_attr(test, assert_instr(vpbroadcastb))]
443#[stable(feature = "simd_x86", since = "1.27.0")]
444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
445pub const fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
446 unsafe {
447 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]);
448 transmute::<i8x16, _>(ret)
449 }
450}
451
452#[inline]
457#[target_feature(enable = "avx2")]
458#[cfg_attr(test, assert_instr(vpbroadcastb))]
459#[stable(feature = "simd_x86", since = "1.27.0")]
460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
461pub const fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
462 unsafe {
463 let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]);
464 transmute::<i8x32, _>(ret)
465 }
466}
467
468#[inline]
475#[target_feature(enable = "avx2")]
476#[cfg_attr(test, assert_instr(vbroadcastss))]
477#[stable(feature = "simd_x86", since = "1.27.0")]
478#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
479pub const fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
480 unsafe {
481 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]);
482 transmute::<i32x4, _>(ret)
483 }
484}
485
486#[inline]
493#[target_feature(enable = "avx2")]
494#[cfg_attr(test, assert_instr(vbroadcastss))]
495#[stable(feature = "simd_x86", since = "1.27.0")]
496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
497pub const fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
498 unsafe {
499 let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]);
500 transmute::<i32x8, _>(ret)
501 }
502}
503
504#[inline]
509#[target_feature(enable = "avx2")]
510#[cfg_attr(test, assert_instr(vmovddup))]
513#[stable(feature = "simd_x86", since = "1.27.0")]
514#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
515pub const fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
516 unsafe {
517 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
518 transmute::<i64x2, _>(ret)
519 }
520}
521
522#[inline]
527#[target_feature(enable = "avx2")]
528#[cfg_attr(test, assert_instr(vbroadcastsd))]
529#[stable(feature = "simd_x86", since = "1.27.0")]
530#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
531pub const fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
532 unsafe {
533 let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
534 transmute::<i64x4, _>(ret)
535 }
536}
537
538#[inline]
543#[target_feature(enable = "avx2")]
544#[cfg_attr(test, assert_instr(vmovddup))]
545#[stable(feature = "simd_x86", since = "1.27.0")]
546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
547pub const fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
548 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) }
549}
550
551#[inline]
556#[target_feature(enable = "avx2")]
557#[cfg_attr(test, assert_instr(vbroadcastsd))]
558#[stable(feature = "simd_x86", since = "1.27.0")]
559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
560pub const fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
561 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) }
562}
563
564#[inline]
569#[target_feature(enable = "avx2")]
570#[stable(feature = "simd_x86_updates", since = "1.82.0")]
571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
572pub const fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
573 unsafe {
574 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
575 transmute::<i64x4, _>(ret)
576 }
577}
578
579#[inline]
586#[target_feature(enable = "avx2")]
587#[stable(feature = "simd_x86", since = "1.27.0")]
588#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
589pub const fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
590 unsafe {
591 let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
592 transmute::<i64x4, _>(ret)
593 }
594}
595
596#[inline]
601#[target_feature(enable = "avx2")]
602#[cfg_attr(test, assert_instr(vbroadcastss))]
603#[stable(feature = "simd_x86", since = "1.27.0")]
604#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
605pub const fn _mm_broadcastss_ps(a: __m128) -> __m128 {
606 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) }
607}
608
609#[inline]
614#[target_feature(enable = "avx2")]
615#[cfg_attr(test, assert_instr(vbroadcastss))]
616#[stable(feature = "simd_x86", since = "1.27.0")]
617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
618pub const fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
619 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) }
620}
621
622#[inline]
627#[target_feature(enable = "avx2")]
628#[cfg_attr(test, assert_instr(vpbroadcastw))]
629#[stable(feature = "simd_x86", since = "1.27.0")]
630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
631pub const fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
632 unsafe {
633 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]);
634 transmute::<i16x8, _>(ret)
635 }
636}
637
638#[inline]
643#[target_feature(enable = "avx2")]
644#[cfg_attr(test, assert_instr(vpbroadcastw))]
645#[stable(feature = "simd_x86", since = "1.27.0")]
646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
647pub const fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
648 unsafe {
649 let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]);
650 transmute::<i16x16, _>(ret)
651 }
652}
653
654#[inline]
658#[target_feature(enable = "avx2")]
659#[cfg_attr(test, assert_instr(vpcmpeqq))]
660#[stable(feature = "simd_x86", since = "1.27.0")]
661#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
662pub const fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
663 unsafe { transmute::<i64x4, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
664}
665
666#[inline]
670#[target_feature(enable = "avx2")]
671#[cfg_attr(test, assert_instr(vpcmpeqd))]
672#[stable(feature = "simd_x86", since = "1.27.0")]
673#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
674pub const fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
675 unsafe { transmute::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
676}
677
678#[inline]
682#[target_feature(enable = "avx2")]
683#[cfg_attr(test, assert_instr(vpcmpeqw))]
684#[stable(feature = "simd_x86", since = "1.27.0")]
685#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
686pub const fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
687 unsafe { transmute::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
688}
689
690#[inline]
694#[target_feature(enable = "avx2")]
695#[cfg_attr(test, assert_instr(vpcmpeqb))]
696#[stable(feature = "simd_x86", since = "1.27.0")]
697#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
698pub const fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
699 unsafe { transmute::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
700}
701
702#[inline]
706#[target_feature(enable = "avx2")]
707#[cfg_attr(test, assert_instr(vpcmpgtq))]
708#[stable(feature = "simd_x86", since = "1.27.0")]
709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
710pub const fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
711 unsafe { transmute::<i64x4, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
712}
713
714#[inline]
718#[target_feature(enable = "avx2")]
719#[cfg_attr(test, assert_instr(vpcmpgtd))]
720#[stable(feature = "simd_x86", since = "1.27.0")]
721#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
722pub const fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
723 unsafe { transmute::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
724}
725
726#[inline]
730#[target_feature(enable = "avx2")]
731#[cfg_attr(test, assert_instr(vpcmpgtw))]
732#[stable(feature = "simd_x86", since = "1.27.0")]
733#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
734pub const fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
735 unsafe { transmute::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
736}
737
738#[inline]
742#[target_feature(enable = "avx2")]
743#[cfg_attr(test, assert_instr(vpcmpgtb))]
744#[stable(feature = "simd_x86", since = "1.27.0")]
745#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
746pub const fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
747 unsafe { transmute::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
748}
749
750#[inline]
754#[target_feature(enable = "avx2")]
755#[cfg_attr(test, assert_instr(vpmovsxwd))]
756#[stable(feature = "simd_x86", since = "1.27.0")]
757#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
758pub const fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
759 unsafe { transmute::<i32x8, _>(simd_cast(a.as_i16x8())) }
760}
761
762#[inline]
766#[target_feature(enable = "avx2")]
767#[cfg_attr(test, assert_instr(vpmovsxwq))]
768#[stable(feature = "simd_x86", since = "1.27.0")]
769#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
770pub const fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
771 unsafe {
772 let a = a.as_i16x8();
773 let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
774 transmute::<i64x4, _>(simd_cast(v64))
775 }
776}
777
778#[inline]
782#[target_feature(enable = "avx2")]
783#[cfg_attr(test, assert_instr(vpmovsxdq))]
784#[stable(feature = "simd_x86", since = "1.27.0")]
785#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
786pub const fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
787 unsafe { transmute::<i64x4, _>(simd_cast(a.as_i32x4())) }
788}
789
790#[inline]
794#[target_feature(enable = "avx2")]
795#[cfg_attr(test, assert_instr(vpmovsxbw))]
796#[stable(feature = "simd_x86", since = "1.27.0")]
797#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
798pub const fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
799 unsafe { transmute::<i16x16, _>(simd_cast(a.as_i8x16())) }
800}
801
802#[inline]
806#[target_feature(enable = "avx2")]
807#[cfg_attr(test, assert_instr(vpmovsxbd))]
808#[stable(feature = "simd_x86", since = "1.27.0")]
809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
810pub const fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
811 unsafe {
812 let a = a.as_i8x16();
813 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
814 transmute::<i32x8, _>(simd_cast(v64))
815 }
816}
817
818#[inline]
822#[target_feature(enable = "avx2")]
823#[cfg_attr(test, assert_instr(vpmovsxbq))]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
826pub const fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
827 unsafe {
828 let a = a.as_i8x16();
829 let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
830 transmute::<i64x4, _>(simd_cast(v32))
831 }
832}
833
834#[inline]
839#[target_feature(enable = "avx2")]
840#[cfg_attr(test, assert_instr(vpmovzxwd))]
841#[stable(feature = "simd_x86", since = "1.27.0")]
842#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
843pub const fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
844 unsafe { transmute::<i32x8, _>(simd_cast(a.as_u16x8())) }
845}
846
847#[inline]
852#[target_feature(enable = "avx2")]
853#[cfg_attr(test, assert_instr(vpmovzxwq))]
854#[stable(feature = "simd_x86", since = "1.27.0")]
855#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
856pub const fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
857 unsafe {
858 let a = a.as_u16x8();
859 let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
860 transmute::<i64x4, _>(simd_cast(v64))
861 }
862}
863
864#[inline]
868#[target_feature(enable = "avx2")]
869#[cfg_attr(test, assert_instr(vpmovzxdq))]
870#[stable(feature = "simd_x86", since = "1.27.0")]
871#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
872pub const fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
873 unsafe { transmute::<i64x4, _>(simd_cast(a.as_u32x4())) }
874}
875
876#[inline]
880#[target_feature(enable = "avx2")]
881#[cfg_attr(test, assert_instr(vpmovzxbw))]
882#[stable(feature = "simd_x86", since = "1.27.0")]
883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
884pub const fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
885 unsafe { transmute::<i16x16, _>(simd_cast(a.as_u8x16())) }
886}
887
888#[inline]
893#[target_feature(enable = "avx2")]
894#[cfg_attr(test, assert_instr(vpmovzxbd))]
895#[stable(feature = "simd_x86", since = "1.27.0")]
896#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
897pub const fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
898 unsafe {
899 let a = a.as_u8x16();
900 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
901 transmute::<i32x8, _>(simd_cast(v64))
902 }
903}
904
905#[inline]
910#[target_feature(enable = "avx2")]
911#[cfg_attr(test, assert_instr(vpmovzxbq))]
912#[stable(feature = "simd_x86", since = "1.27.0")]
913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
914pub const fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
915 unsafe {
916 let a = a.as_u8x16();
917 let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
918 transmute::<i64x4, _>(simd_cast(v32))
919 }
920}
921
922#[inline]
926#[target_feature(enable = "avx2")]
927#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
928#[rustc_legacy_const_generics(1)]
929#[stable(feature = "simd_x86", since = "1.27.0")]
930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
931pub const fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
932 static_assert_uimm_bits!(IMM1, 1);
933 unsafe {
934 let a = a.as_i64x4();
935 let b = i64x4::ZERO;
936 let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
937 transmute(dst)
938 }
939}
940
941#[inline]
945#[target_feature(enable = "avx2")]
946#[cfg_attr(test, assert_instr(vphaddw))]
947#[stable(feature = "simd_x86", since = "1.27.0")]
948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
949pub const fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
950 let a = a.as_i16x16();
951 let b = b.as_i16x16();
952 unsafe {
953 let even: i16x16 = simd_shuffle!(
954 a,
955 b,
956 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
957 );
958 let odd: i16x16 = simd_shuffle!(
959 a,
960 b,
961 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
962 );
963 simd_add(even, odd).as_m256i()
964 }
965}
966
967#[inline]
971#[target_feature(enable = "avx2")]
972#[cfg_attr(test, assert_instr(vphaddd))]
973#[stable(feature = "simd_x86", since = "1.27.0")]
974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
975pub const fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
976 let a = a.as_i32x8();
977 let b = b.as_i32x8();
978 unsafe {
979 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
980 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
981 simd_add(even, odd).as_m256i()
982 }
983}
984
985#[inline]
990#[target_feature(enable = "avx2")]
991#[cfg_attr(test, assert_instr(vphaddsw))]
992#[stable(feature = "simd_x86", since = "1.27.0")]
993pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
994 unsafe { transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) }
995}
996
997#[inline]
1001#[target_feature(enable = "avx2")]
1002#[cfg_attr(test, assert_instr(vphsubw))]
1003#[stable(feature = "simd_x86", since = "1.27.0")]
1004#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1005pub const fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
1006 let a = a.as_i16x16();
1007 let b = b.as_i16x16();
1008 unsafe {
1009 let even: i16x16 = simd_shuffle!(
1010 a,
1011 b,
1012 [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
1013 );
1014 let odd: i16x16 = simd_shuffle!(
1015 a,
1016 b,
1017 [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
1018 );
1019 simd_sub(even, odd).as_m256i()
1020 }
1021}
1022
1023#[inline]
1027#[target_feature(enable = "avx2")]
1028#[cfg_attr(test, assert_instr(vphsubd))]
1029#[stable(feature = "simd_x86", since = "1.27.0")]
1030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1031pub const fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
1032 let a = a.as_i32x8();
1033 let b = b.as_i32x8();
1034 unsafe {
1035 let even: i32x8 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
1036 let odd: i32x8 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
1037 simd_sub(even, odd).as_m256i()
1038 }
1039}
1040
1041#[inline]
1046#[target_feature(enable = "avx2")]
1047#[cfg_attr(test, assert_instr(vphsubsw))]
1048#[stable(feature = "simd_x86", since = "1.27.0")]
1049pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1050 unsafe { transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) }
1051}
1052
1053#[inline]
1059#[target_feature(enable = "avx2")]
1060#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1061#[rustc_legacy_const_generics(2)]
1062#[stable(feature = "simd_x86", since = "1.27.0")]
1063pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
1064 slice: *const i32,
1065 offsets: __m128i,
1066) -> __m128i {
1067 static_assert_imm8_scale!(SCALE);
1068 let zero = i32x4::ZERO;
1069 let neg_one = _mm_set1_epi32(-1).as_i32x4();
1070 let offsets = offsets.as_i32x4();
1071 let slice = slice as *const i8;
1072 let r = pgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1073 transmute(r)
1074}
1075
1076#[inline]
1083#[target_feature(enable = "avx2")]
1084#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1085#[rustc_legacy_const_generics(4)]
1086#[stable(feature = "simd_x86", since = "1.27.0")]
1087pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1088 src: __m128i,
1089 slice: *const i32,
1090 offsets: __m128i,
1091 mask: __m128i,
1092) -> __m128i {
1093 static_assert_imm8_scale!(SCALE);
1094 let src = src.as_i32x4();
1095 let mask = mask.as_i32x4();
1096 let offsets = offsets.as_i32x4();
1097 let slice = slice as *const i8;
1098 let r = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1099 transmute(r)
1100}
1101
1102#[inline]
1108#[target_feature(enable = "avx2")]
1109#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1110#[rustc_legacy_const_generics(2)]
1111#[stable(feature = "simd_x86", since = "1.27.0")]
1112pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1113 slice: *const i32,
1114 offsets: __m256i,
1115) -> __m256i {
1116 static_assert_imm8_scale!(SCALE);
1117 let zero = i32x8::ZERO;
1118 let neg_one = _mm256_set1_epi32(-1).as_i32x8();
1119 let offsets = offsets.as_i32x8();
1120 let slice = slice as *const i8;
1121 let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE as i8);
1122 transmute(r)
1123}
1124
1125#[inline]
1132#[target_feature(enable = "avx2")]
1133#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1134#[rustc_legacy_const_generics(4)]
1135#[stable(feature = "simd_x86", since = "1.27.0")]
1136pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1137 src: __m256i,
1138 slice: *const i32,
1139 offsets: __m256i,
1140 mask: __m256i,
1141) -> __m256i {
1142 static_assert_imm8_scale!(SCALE);
1143 let src = src.as_i32x8();
1144 let mask = mask.as_i32x8();
1145 let offsets = offsets.as_i32x8();
1146 let slice = slice as *const i8;
1147 let r = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1148 transmute(r)
1149}
1150
1151#[inline]
1157#[target_feature(enable = "avx2")]
1158#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1159#[rustc_legacy_const_generics(2)]
1160#[stable(feature = "simd_x86", since = "1.27.0")]
1161pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1162 static_assert_imm8_scale!(SCALE);
1163 let zero = _mm_setzero_ps();
1164 let neg_one = _mm_set1_ps(-1.0);
1165 let offsets = offsets.as_i32x4();
1166 let slice = slice as *const i8;
1167 pgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1168}
1169
1170#[inline]
1177#[target_feature(enable = "avx2")]
1178#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1179#[rustc_legacy_const_generics(4)]
1180#[stable(feature = "simd_x86", since = "1.27.0")]
1181pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1182 src: __m128,
1183 slice: *const f32,
1184 offsets: __m128i,
1185 mask: __m128,
1186) -> __m128 {
1187 static_assert_imm8_scale!(SCALE);
1188 let offsets = offsets.as_i32x4();
1189 let slice = slice as *const i8;
1190 pgatherdps(src, slice, offsets, mask, SCALE as i8)
1191}
1192
1193#[inline]
1199#[target_feature(enable = "avx2")]
1200#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1201#[rustc_legacy_const_generics(2)]
1202#[stable(feature = "simd_x86", since = "1.27.0")]
1203pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1204 static_assert_imm8_scale!(SCALE);
1205 let zero = _mm256_setzero_ps();
1206 let neg_one = _mm256_set1_ps(-1.0);
1207 let offsets = offsets.as_i32x8();
1208 let slice = slice as *const i8;
1209 vpgatherdps(zero, slice, offsets, neg_one, SCALE as i8)
1210}
1211
1212#[inline]
1219#[target_feature(enable = "avx2")]
1220#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1221#[rustc_legacy_const_generics(4)]
1222#[stable(feature = "simd_x86", since = "1.27.0")]
1223pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1224 src: __m256,
1225 slice: *const f32,
1226 offsets: __m256i,
1227 mask: __m256,
1228) -> __m256 {
1229 static_assert_imm8_scale!(SCALE);
1230 let offsets = offsets.as_i32x8();
1231 let slice = slice as *const i8;
1232 vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1233}
1234
1235#[inline]
1241#[target_feature(enable = "avx2")]
1242#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1243#[rustc_legacy_const_generics(2)]
1244#[stable(feature = "simd_x86", since = "1.27.0")]
1245pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1246 slice: *const i64,
1247 offsets: __m128i,
1248) -> __m128i {
1249 static_assert_imm8_scale!(SCALE);
1250 let zero = i64x2::ZERO;
1251 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1252 let offsets = offsets.as_i32x4();
1253 let slice = slice as *const i8;
1254 let r = pgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1255 transmute(r)
1256}
1257
1258#[inline]
1265#[target_feature(enable = "avx2")]
1266#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1267#[rustc_legacy_const_generics(4)]
1268#[stable(feature = "simd_x86", since = "1.27.0")]
1269pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1270 src: __m128i,
1271 slice: *const i64,
1272 offsets: __m128i,
1273 mask: __m128i,
1274) -> __m128i {
1275 static_assert_imm8_scale!(SCALE);
1276 let src = src.as_i64x2();
1277 let mask = mask.as_i64x2();
1278 let offsets = offsets.as_i32x4();
1279 let slice = slice as *const i8;
1280 let r = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1281 transmute(r)
1282}
1283
1284#[inline]
1290#[target_feature(enable = "avx2")]
1291#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1292#[rustc_legacy_const_generics(2)]
1293#[stable(feature = "simd_x86", since = "1.27.0")]
1294pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1295 slice: *const i64,
1296 offsets: __m128i,
1297) -> __m256i {
1298 static_assert_imm8_scale!(SCALE);
1299 let zero = i64x4::ZERO;
1300 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1301 let offsets = offsets.as_i32x4();
1302 let slice = slice as *const i8;
1303 let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE as i8);
1304 transmute(r)
1305}
1306
1307#[inline]
1314#[target_feature(enable = "avx2")]
1315#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1316#[rustc_legacy_const_generics(4)]
1317#[stable(feature = "simd_x86", since = "1.27.0")]
1318pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1319 src: __m256i,
1320 slice: *const i64,
1321 offsets: __m128i,
1322 mask: __m256i,
1323) -> __m256i {
1324 static_assert_imm8_scale!(SCALE);
1325 let src = src.as_i64x4();
1326 let mask = mask.as_i64x4();
1327 let offsets = offsets.as_i32x4();
1328 let slice = slice as *const i8;
1329 let r = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1330 transmute(r)
1331}
1332
1333#[inline]
1339#[target_feature(enable = "avx2")]
1340#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1341#[rustc_legacy_const_generics(2)]
1342#[stable(feature = "simd_x86", since = "1.27.0")]
1343pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1344 static_assert_imm8_scale!(SCALE);
1345 let zero = _mm_setzero_pd();
1346 let neg_one = _mm_set1_pd(-1.0);
1347 let offsets = offsets.as_i32x4();
1348 let slice = slice as *const i8;
1349 pgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1350}
1351
1352#[inline]
1359#[target_feature(enable = "avx2")]
1360#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1361#[rustc_legacy_const_generics(4)]
1362#[stable(feature = "simd_x86", since = "1.27.0")]
1363pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1364 src: __m128d,
1365 slice: *const f64,
1366 offsets: __m128i,
1367 mask: __m128d,
1368) -> __m128d {
1369 static_assert_imm8_scale!(SCALE);
1370 let offsets = offsets.as_i32x4();
1371 let slice = slice as *const i8;
1372 pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1373}
1374
1375#[inline]
1381#[target_feature(enable = "avx2")]
1382#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1383#[rustc_legacy_const_generics(2)]
1384#[stable(feature = "simd_x86", since = "1.27.0")]
1385pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1386 slice: *const f64,
1387 offsets: __m128i,
1388) -> __m256d {
1389 static_assert_imm8_scale!(SCALE);
1390 let zero = _mm256_setzero_pd();
1391 let neg_one = _mm256_set1_pd(-1.0);
1392 let offsets = offsets.as_i32x4();
1393 let slice = slice as *const i8;
1394 vpgatherdpd(zero, slice, offsets, neg_one, SCALE as i8)
1395}
1396
1397#[inline]
1404#[target_feature(enable = "avx2")]
1405#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1406#[rustc_legacy_const_generics(4)]
1407#[stable(feature = "simd_x86", since = "1.27.0")]
1408pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1409 src: __m256d,
1410 slice: *const f64,
1411 offsets: __m128i,
1412 mask: __m256d,
1413) -> __m256d {
1414 static_assert_imm8_scale!(SCALE);
1415 let offsets = offsets.as_i32x4();
1416 let slice = slice as *const i8;
1417 vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1418}
1419
1420#[inline]
1426#[target_feature(enable = "avx2")]
1427#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1428#[rustc_legacy_const_generics(2)]
1429#[stable(feature = "simd_x86", since = "1.27.0")]
1430pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1431 slice: *const i32,
1432 offsets: __m128i,
1433) -> __m128i {
1434 static_assert_imm8_scale!(SCALE);
1435 let zero = i32x4::ZERO;
1436 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1437 let offsets = offsets.as_i64x2();
1438 let slice = slice as *const i8;
1439 let r = pgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1440 transmute(r)
1441}
1442
1443#[inline]
1450#[target_feature(enable = "avx2")]
1451#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1452#[rustc_legacy_const_generics(4)]
1453#[stable(feature = "simd_x86", since = "1.27.0")]
1454pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1455 src: __m128i,
1456 slice: *const i32,
1457 offsets: __m128i,
1458 mask: __m128i,
1459) -> __m128i {
1460 static_assert_imm8_scale!(SCALE);
1461 let src = src.as_i32x4();
1462 let mask = mask.as_i32x4();
1463 let offsets = offsets.as_i64x2();
1464 let slice = slice as *const i8;
1465 let r = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1466 transmute(r)
1467}
1468
1469#[inline]
1475#[target_feature(enable = "avx2")]
1476#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1477#[rustc_legacy_const_generics(2)]
1478#[stable(feature = "simd_x86", since = "1.27.0")]
1479pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1480 slice: *const i32,
1481 offsets: __m256i,
1482) -> __m128i {
1483 static_assert_imm8_scale!(SCALE);
1484 let zero = i32x4::ZERO;
1485 let neg_one = _mm_set1_epi64x(-1).as_i32x4();
1486 let offsets = offsets.as_i64x4();
1487 let slice = slice as *const i8;
1488 let r = vpgatherqd(zero, slice, offsets, neg_one, SCALE as i8);
1489 transmute(r)
1490}
1491
1492#[inline]
1499#[target_feature(enable = "avx2")]
1500#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1501#[rustc_legacy_const_generics(4)]
1502#[stable(feature = "simd_x86", since = "1.27.0")]
1503pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1504 src: __m128i,
1505 slice: *const i32,
1506 offsets: __m256i,
1507 mask: __m128i,
1508) -> __m128i {
1509 static_assert_imm8_scale!(SCALE);
1510 let src = src.as_i32x4();
1511 let mask = mask.as_i32x4();
1512 let offsets = offsets.as_i64x4();
1513 let slice = slice as *const i8;
1514 let r = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1515 transmute(r)
1516}
1517
1518#[inline]
1524#[target_feature(enable = "avx2")]
1525#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1526#[rustc_legacy_const_generics(2)]
1527#[stable(feature = "simd_x86", since = "1.27.0")]
1528pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1529 static_assert_imm8_scale!(SCALE);
1530 let zero = _mm_setzero_ps();
1531 let neg_one = _mm_set1_ps(-1.0);
1532 let offsets = offsets.as_i64x2();
1533 let slice = slice as *const i8;
1534 pgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1535}
1536
1537#[inline]
1544#[target_feature(enable = "avx2")]
1545#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1546#[rustc_legacy_const_generics(4)]
1547#[stable(feature = "simd_x86", since = "1.27.0")]
1548pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1549 src: __m128,
1550 slice: *const f32,
1551 offsets: __m128i,
1552 mask: __m128,
1553) -> __m128 {
1554 static_assert_imm8_scale!(SCALE);
1555 let offsets = offsets.as_i64x2();
1556 let slice = slice as *const i8;
1557 pgatherqps(src, slice, offsets, mask, SCALE as i8)
1558}
1559
1560#[inline]
1566#[target_feature(enable = "avx2")]
1567#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1568#[rustc_legacy_const_generics(2)]
1569#[stable(feature = "simd_x86", since = "1.27.0")]
1570pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1571 static_assert_imm8_scale!(SCALE);
1572 let zero = _mm_setzero_ps();
1573 let neg_one = _mm_set1_ps(-1.0);
1574 let offsets = offsets.as_i64x4();
1575 let slice = slice as *const i8;
1576 vpgatherqps(zero, slice, offsets, neg_one, SCALE as i8)
1577}
1578
1579#[inline]
1586#[target_feature(enable = "avx2")]
1587#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1588#[rustc_legacy_const_generics(4)]
1589#[stable(feature = "simd_x86", since = "1.27.0")]
1590pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1591 src: __m128,
1592 slice: *const f32,
1593 offsets: __m256i,
1594 mask: __m128,
1595) -> __m128 {
1596 static_assert_imm8_scale!(SCALE);
1597 let offsets = offsets.as_i64x4();
1598 let slice = slice as *const i8;
1599 vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1600}
1601
1602#[inline]
1608#[target_feature(enable = "avx2")]
1609#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1610#[rustc_legacy_const_generics(2)]
1611#[stable(feature = "simd_x86", since = "1.27.0")]
1612pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1613 slice: *const i64,
1614 offsets: __m128i,
1615) -> __m128i {
1616 static_assert_imm8_scale!(SCALE);
1617 let zero = i64x2::ZERO;
1618 let neg_one = _mm_set1_epi64x(-1).as_i64x2();
1619 let slice = slice as *const i8;
1620 let offsets = offsets.as_i64x2();
1621 let r = pgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1622 transmute(r)
1623}
1624
1625#[inline]
1632#[target_feature(enable = "avx2")]
1633#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1634#[rustc_legacy_const_generics(4)]
1635#[stable(feature = "simd_x86", since = "1.27.0")]
1636pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1637 src: __m128i,
1638 slice: *const i64,
1639 offsets: __m128i,
1640 mask: __m128i,
1641) -> __m128i {
1642 static_assert_imm8_scale!(SCALE);
1643 let src = src.as_i64x2();
1644 let mask = mask.as_i64x2();
1645 let offsets = offsets.as_i64x2();
1646 let slice = slice as *const i8;
1647 let r = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1648 transmute(r)
1649}
1650
1651#[inline]
1657#[target_feature(enable = "avx2")]
1658#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1659#[rustc_legacy_const_generics(2)]
1660#[stable(feature = "simd_x86", since = "1.27.0")]
1661pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1662 slice: *const i64,
1663 offsets: __m256i,
1664) -> __m256i {
1665 static_assert_imm8_scale!(SCALE);
1666 let zero = i64x4::ZERO;
1667 let neg_one = _mm256_set1_epi64x(-1).as_i64x4();
1668 let slice = slice as *const i8;
1669 let offsets = offsets.as_i64x4();
1670 let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE as i8);
1671 transmute(r)
1672}
1673
1674#[inline]
1681#[target_feature(enable = "avx2")]
1682#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1683#[rustc_legacy_const_generics(4)]
1684#[stable(feature = "simd_x86", since = "1.27.0")]
1685pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1686 src: __m256i,
1687 slice: *const i64,
1688 offsets: __m256i,
1689 mask: __m256i,
1690) -> __m256i {
1691 static_assert_imm8_scale!(SCALE);
1692 let src = src.as_i64x4();
1693 let mask = mask.as_i64x4();
1694 let offsets = offsets.as_i64x4();
1695 let slice = slice as *const i8;
1696 let r = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1697 transmute(r)
1698}
1699
1700#[inline]
1706#[target_feature(enable = "avx2")]
1707#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1708#[rustc_legacy_const_generics(2)]
1709#[stable(feature = "simd_x86", since = "1.27.0")]
1710pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1711 static_assert_imm8_scale!(SCALE);
1712 let zero = _mm_setzero_pd();
1713 let neg_one = _mm_set1_pd(-1.0);
1714 let slice = slice as *const i8;
1715 let offsets = offsets.as_i64x2();
1716 pgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1717}
1718
1719#[inline]
1726#[target_feature(enable = "avx2")]
1727#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1728#[rustc_legacy_const_generics(4)]
1729#[stable(feature = "simd_x86", since = "1.27.0")]
1730pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1731 src: __m128d,
1732 slice: *const f64,
1733 offsets: __m128i,
1734 mask: __m128d,
1735) -> __m128d {
1736 static_assert_imm8_scale!(SCALE);
1737 let slice = slice as *const i8;
1738 let offsets = offsets.as_i64x2();
1739 pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1740}
1741
1742#[inline]
1748#[target_feature(enable = "avx2")]
1749#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1750#[rustc_legacy_const_generics(2)]
1751#[stable(feature = "simd_x86", since = "1.27.0")]
1752pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1753 slice: *const f64,
1754 offsets: __m256i,
1755) -> __m256d {
1756 static_assert_imm8_scale!(SCALE);
1757 let zero = _mm256_setzero_pd();
1758 let neg_one = _mm256_set1_pd(-1.0);
1759 let slice = slice as *const i8;
1760 let offsets = offsets.as_i64x4();
1761 vpgatherqpd(zero, slice, offsets, neg_one, SCALE as i8)
1762}
1763
1764#[inline]
1771#[target_feature(enable = "avx2")]
1772#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1773#[rustc_legacy_const_generics(4)]
1774#[stable(feature = "simd_x86", since = "1.27.0")]
1775pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1776 src: __m256d,
1777 slice: *const f64,
1778 offsets: __m256i,
1779 mask: __m256d,
1780) -> __m256d {
1781 static_assert_imm8_scale!(SCALE);
1782 let slice = slice as *const i8;
1783 let offsets = offsets.as_i64x4();
1784 vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1785}
1786
1787#[inline]
1792#[target_feature(enable = "avx2")]
1793#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1794#[rustc_legacy_const_generics(2)]
1795#[stable(feature = "simd_x86", since = "1.27.0")]
1796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1797pub const fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1798 static_assert_uimm_bits!(IMM1, 1);
1799 unsafe {
1800 let a = a.as_i64x4();
1801 let b = _mm256_castsi128_si256(b).as_i64x4();
1802 let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
1803 transmute(dst)
1804 }
1805}
1806
1807#[inline]
1813#[target_feature(enable = "avx2")]
1814#[cfg_attr(test, assert_instr(vpmaddwd))]
1815#[stable(feature = "simd_x86", since = "1.27.0")]
1816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1817pub const fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1818 unsafe {
1819 let r: i32x16 = simd_mul(simd_cast(a.as_i16x16()), simd_cast(b.as_i16x16()));
1820 let even: i32x8 = simd_shuffle!(r, r, [0, 2, 4, 6, 8, 10, 12, 14]);
1821 let odd: i32x8 = simd_shuffle!(r, r, [1, 3, 5, 7, 9, 11, 13, 15]);
1822 simd_add(even, odd).as_m256i()
1823 }
1824}
1825
1826#[inline]
1833#[target_feature(enable = "avx2")]
1834#[cfg_attr(test, assert_instr(vpmaddubsw))]
1835#[stable(feature = "simd_x86", since = "1.27.0")]
1836pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1837 unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_i8x32())) }
1838}
1839
1840#[inline]
1846#[target_feature(enable = "avx2")]
1847#[cfg_attr(test, assert_instr(vpmaskmovd))]
1848#[stable(feature = "simd_x86", since = "1.27.0")]
1849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1850pub const unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1851 let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
1852 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x4::ZERO).as_m128i()
1853}
1854
1855#[inline]
1861#[target_feature(enable = "avx2")]
1862#[cfg_attr(test, assert_instr(vpmaskmovd))]
1863#[stable(feature = "simd_x86", since = "1.27.0")]
1864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1865pub const unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1866 let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
1867 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x8::ZERO).as_m256i()
1868}
1869
1870#[inline]
1876#[target_feature(enable = "avx2")]
1877#[cfg_attr(test, assert_instr(vpmaskmovq))]
1878#[stable(feature = "simd_x86", since = "1.27.0")]
1879#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1880pub const unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1881 let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
1882 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x2::ZERO).as_m128i()
1883}
1884
1885#[inline]
1891#[target_feature(enable = "avx2")]
1892#[cfg_attr(test, assert_instr(vpmaskmovq))]
1893#[stable(feature = "simd_x86", since = "1.27.0")]
1894#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1895pub const unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1896 let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
1897 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x4::ZERO).as_m256i()
1898}
1899
1900#[inline]
1906#[target_feature(enable = "avx2")]
1907#[cfg_attr(test, assert_instr(vpmaskmovd))]
1908#[stable(feature = "simd_x86", since = "1.27.0")]
1909#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1910pub const unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1911 let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
1912 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4())
1913}
1914
1915#[inline]
1921#[target_feature(enable = "avx2")]
1922#[cfg_attr(test, assert_instr(vpmaskmovd))]
1923#[stable(feature = "simd_x86", since = "1.27.0")]
1924#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1925pub const unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1926 let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
1927 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8())
1928}
1929
1930#[inline]
1936#[target_feature(enable = "avx2")]
1937#[cfg_attr(test, assert_instr(vpmaskmovq))]
1938#[stable(feature = "simd_x86", since = "1.27.0")]
1939#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1940pub const unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1941 let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
1942 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2())
1943}
1944
1945#[inline]
1951#[target_feature(enable = "avx2")]
1952#[cfg_attr(test, assert_instr(vpmaskmovq))]
1953#[stable(feature = "simd_x86", since = "1.27.0")]
1954#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1955pub const unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1956 let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
1957 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4())
1958}
1959
1960#[inline]
1965#[target_feature(enable = "avx2")]
1966#[cfg_attr(test, assert_instr(vpmaxsw))]
1967#[stable(feature = "simd_x86", since = "1.27.0")]
1968#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1969pub const fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1970 unsafe { simd_imax(a.as_i16x16(), b.as_i16x16()).as_m256i() }
1971}
1972
1973#[inline]
1978#[target_feature(enable = "avx2")]
1979#[cfg_attr(test, assert_instr(vpmaxsd))]
1980#[stable(feature = "simd_x86", since = "1.27.0")]
1981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1982pub const fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1983 unsafe { simd_imax(a.as_i32x8(), b.as_i32x8()).as_m256i() }
1984}
1985
1986#[inline]
1991#[target_feature(enable = "avx2")]
1992#[cfg_attr(test, assert_instr(vpmaxsb))]
1993#[stable(feature = "simd_x86", since = "1.27.0")]
1994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1995pub const fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1996 unsafe { simd_imax(a.as_i8x32(), b.as_i8x32()).as_m256i() }
1997}
1998
1999#[inline]
2004#[target_feature(enable = "avx2")]
2005#[cfg_attr(test, assert_instr(vpmaxuw))]
2006#[stable(feature = "simd_x86", since = "1.27.0")]
2007#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2008pub const fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
2009 unsafe { simd_imax(a.as_u16x16(), b.as_u16x16()).as_m256i() }
2010}
2011
2012#[inline]
2017#[target_feature(enable = "avx2")]
2018#[cfg_attr(test, assert_instr(vpmaxud))]
2019#[stable(feature = "simd_x86", since = "1.27.0")]
2020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2021pub const fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
2022 unsafe { simd_imax(a.as_u32x8(), b.as_u32x8()).as_m256i() }
2023}
2024
2025#[inline]
2030#[target_feature(enable = "avx2")]
2031#[cfg_attr(test, assert_instr(vpmaxub))]
2032#[stable(feature = "simd_x86", since = "1.27.0")]
2033#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2034pub const fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
2035 unsafe { simd_imax(a.as_u8x32(), b.as_u8x32()).as_m256i() }
2036}
2037
2038#[inline]
2043#[target_feature(enable = "avx2")]
2044#[cfg_attr(test, assert_instr(vpminsw))]
2045#[stable(feature = "simd_x86", since = "1.27.0")]
2046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2047pub const fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
2048 unsafe { simd_imin(a.as_i16x16(), b.as_i16x16()).as_m256i() }
2049}
2050
2051#[inline]
2056#[target_feature(enable = "avx2")]
2057#[cfg_attr(test, assert_instr(vpminsd))]
2058#[stable(feature = "simd_x86", since = "1.27.0")]
2059#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2060pub const fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
2061 unsafe { simd_imin(a.as_i32x8(), b.as_i32x8()).as_m256i() }
2062}
2063
2064#[inline]
2069#[target_feature(enable = "avx2")]
2070#[cfg_attr(test, assert_instr(vpminsb))]
2071#[stable(feature = "simd_x86", since = "1.27.0")]
2072#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2073pub const fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
2074 unsafe { simd_imin(a.as_i8x32(), b.as_i8x32()).as_m256i() }
2075}
2076
2077#[inline]
2082#[target_feature(enable = "avx2")]
2083#[cfg_attr(test, assert_instr(vpminuw))]
2084#[stable(feature = "simd_x86", since = "1.27.0")]
2085#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2086pub const fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2087 unsafe { simd_imin(a.as_u16x16(), b.as_u16x16()).as_m256i() }
2088}
2089
2090#[inline]
2095#[target_feature(enable = "avx2")]
2096#[cfg_attr(test, assert_instr(vpminud))]
2097#[stable(feature = "simd_x86", since = "1.27.0")]
2098#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2099pub const fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2100 unsafe { simd_imin(a.as_u32x8(), b.as_u32x8()).as_m256i() }
2101}
2102
2103#[inline]
2108#[target_feature(enable = "avx2")]
2109#[cfg_attr(test, assert_instr(vpminub))]
2110#[stable(feature = "simd_x86", since = "1.27.0")]
2111#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2112pub const fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2113 unsafe { simd_imin(a.as_u8x32(), b.as_u8x32()).as_m256i() }
2114}
2115
2116#[inline]
2121#[target_feature(enable = "avx2")]
2122#[cfg_attr(test, assert_instr(vpmovmskb))]
2123#[stable(feature = "simd_x86", since = "1.27.0")]
2124#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2125pub const fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2126 unsafe {
2127 let z = i8x32::ZERO;
2128 let m: i8x32 = simd_lt(a.as_i8x32(), z);
2129 simd_bitmask::<_, u32>(m) as i32
2130 }
2131}
2132
2133#[inline]
2143#[target_feature(enable = "avx2")]
2144#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))]
2145#[rustc_legacy_const_generics(2)]
2146#[stable(feature = "simd_x86", since = "1.27.0")]
2147pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2148 static_assert_uimm_bits!(IMM8, 8);
2149 unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8 as i8)) }
2150}
2151
2152#[inline]
2159#[target_feature(enable = "avx2")]
2160#[cfg_attr(test, assert_instr(vpmuldq))]
2161#[stable(feature = "simd_x86", since = "1.27.0")]
2162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2163pub const fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2164 unsafe {
2165 let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2166 let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2167 transmute(simd_mul(a, b))
2168 }
2169}
2170
2171#[inline]
2178#[target_feature(enable = "avx2")]
2179#[cfg_attr(test, assert_instr(vpmuludq))]
2180#[stable(feature = "simd_x86", since = "1.27.0")]
2181#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2182pub const fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2183 unsafe {
2184 let a = a.as_u64x4();
2185 let b = b.as_u64x4();
2186 let mask = u64x4::splat(u32::MAX as u64);
2187 transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
2188 }
2189}
2190
2191#[inline]
2197#[target_feature(enable = "avx2")]
2198#[cfg_attr(test, assert_instr(vpmulhw))]
2199#[stable(feature = "simd_x86", since = "1.27.0")]
2200#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2201pub const fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2202 unsafe {
2203 let a = simd_cast::<_, i32x16>(a.as_i16x16());
2204 let b = simd_cast::<_, i32x16>(b.as_i16x16());
2205 let r = simd_shr(simd_mul(a, b), i32x16::splat(16));
2206 transmute(simd_cast::<i32x16, i16x16>(r))
2207 }
2208}
2209
2210#[inline]
2216#[target_feature(enable = "avx2")]
2217#[cfg_attr(test, assert_instr(vpmulhuw))]
2218#[stable(feature = "simd_x86", since = "1.27.0")]
2219#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2220pub const fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2221 unsafe {
2222 let a = simd_cast::<_, u32x16>(a.as_u16x16());
2223 let b = simd_cast::<_, u32x16>(b.as_u16x16());
2224 let r = simd_shr(simd_mul(a, b), u32x16::splat(16));
2225 transmute(simd_cast::<u32x16, u16x16>(r))
2226 }
2227}
2228
2229#[inline]
2235#[target_feature(enable = "avx2")]
2236#[cfg_attr(test, assert_instr(vpmullw))]
2237#[stable(feature = "simd_x86", since = "1.27.0")]
2238#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2239pub const fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2240 unsafe { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) }
2241}
2242
2243#[inline]
2249#[target_feature(enable = "avx2")]
2250#[cfg_attr(test, assert_instr(vpmulld))]
2251#[stable(feature = "simd_x86", since = "1.27.0")]
2252#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2253pub const fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2254 unsafe { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) }
2255}
2256
2257#[inline]
2264#[target_feature(enable = "avx2")]
2265#[cfg_attr(test, assert_instr(vpmulhrsw))]
2266#[stable(feature = "simd_x86", since = "1.27.0")]
2267pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2268 unsafe { transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) }
2269}
2270
2271#[inline]
2276#[target_feature(enable = "avx2")]
2277#[cfg_attr(test, assert_instr(vorps))]
2278#[stable(feature = "simd_x86", since = "1.27.0")]
2279#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2280pub const fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2281 unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
2282}
2283
2284#[inline]
2289#[target_feature(enable = "avx2")]
2290#[cfg_attr(test, assert_instr(vpacksswb))]
2291#[stable(feature = "simd_x86", since = "1.27.0")]
2292pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2293 unsafe { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) }
2294}
2295
2296#[inline]
2301#[target_feature(enable = "avx2")]
2302#[cfg_attr(test, assert_instr(vpackssdw))]
2303#[stable(feature = "simd_x86", since = "1.27.0")]
2304pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2305 unsafe { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) }
2306}
2307
2308#[inline]
2313#[target_feature(enable = "avx2")]
2314#[cfg_attr(test, assert_instr(vpackuswb))]
2315#[stable(feature = "simd_x86", since = "1.27.0")]
2316pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2317 unsafe { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) }
2318}
2319
2320#[inline]
2325#[target_feature(enable = "avx2")]
2326#[cfg_attr(test, assert_instr(vpackusdw))]
2327#[stable(feature = "simd_x86", since = "1.27.0")]
2328pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2329 unsafe { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) }
2330}
2331
2332#[inline]
2339#[target_feature(enable = "avx2")]
2340#[cfg_attr(test, assert_instr(vpermps))]
2341#[stable(feature = "simd_x86", since = "1.27.0")]
2342pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2343 unsafe { transmute(permd(a.as_u32x8(), b.as_u32x8())) }
2344}
2345
2346#[inline]
2350#[target_feature(enable = "avx2")]
2351#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2352#[rustc_legacy_const_generics(1)]
2353#[stable(feature = "simd_x86", since = "1.27.0")]
2354#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2355pub const fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2356 static_assert_uimm_bits!(IMM8, 8);
2357 unsafe {
2358 let zero = i64x4::ZERO;
2359 let r: i64x4 = simd_shuffle!(
2360 a.as_i64x4(),
2361 zero,
2362 [
2363 IMM8 as u32 & 0b11,
2364 (IMM8 as u32 >> 2) & 0b11,
2365 (IMM8 as u32 >> 4) & 0b11,
2366 (IMM8 as u32 >> 6) & 0b11,
2367 ],
2368 );
2369 transmute(r)
2370 }
2371}
2372
2373#[inline]
2377#[target_feature(enable = "avx2")]
2378#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
2379#[rustc_legacy_const_generics(2)]
2380#[stable(feature = "simd_x86", since = "1.27.0")]
2381#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2382pub const fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2383 static_assert_uimm_bits!(IMM8, 8);
2384 _mm256_permute2f128_si256::<IMM8>(a, b)
2385}
2386
2387#[inline]
2392#[target_feature(enable = "avx2")]
2393#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2394#[rustc_legacy_const_generics(1)]
2395#[stable(feature = "simd_x86", since = "1.27.0")]
2396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2397pub const fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2398 static_assert_uimm_bits!(IMM8, 8);
2399 unsafe {
2400 simd_shuffle!(
2401 a,
2402 _mm256_undefined_pd(),
2403 [
2404 IMM8 as u32 & 0b11,
2405 (IMM8 as u32 >> 2) & 0b11,
2406 (IMM8 as u32 >> 4) & 0b11,
2407 (IMM8 as u32 >> 6) & 0b11,
2408 ],
2409 )
2410 }
2411}
2412
2413#[inline]
2418#[target_feature(enable = "avx2")]
2419#[cfg_attr(test, assert_instr(vpermps))]
2420#[stable(feature = "simd_x86", since = "1.27.0")]
2421pub fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2422 unsafe { permps(a, idx.as_i32x8()) }
2423}
2424
2425#[inline]
2432#[target_feature(enable = "avx2")]
2433#[cfg_attr(test, assert_instr(vpsadbw))]
2434#[stable(feature = "simd_x86", since = "1.27.0")]
2435pub fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2436 unsafe { transmute(psadbw(a.as_u8x32(), b.as_u8x32())) }
2437}
2438
2439#[inline]
2470#[target_feature(enable = "avx2")]
2471#[cfg_attr(test, assert_instr(vpshufb))]
2472#[stable(feature = "simd_x86", since = "1.27.0")]
2473pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2474 unsafe { transmute(pshufb(a.as_u8x32(), b.as_u8x32())) }
2475}
2476
2477#[inline]
2508#[target_feature(enable = "avx2")]
2509#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
2510#[rustc_legacy_const_generics(1)]
2511#[stable(feature = "simd_x86", since = "1.27.0")]
2512#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2513pub const fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2514 static_assert_uimm_bits!(MASK, 8);
2515 unsafe {
2516 let r: i32x8 = simd_shuffle!(
2517 a.as_i32x8(),
2518 a.as_i32x8(),
2519 [
2520 MASK as u32 & 0b11,
2521 (MASK as u32 >> 2) & 0b11,
2522 (MASK as u32 >> 4) & 0b11,
2523 (MASK as u32 >> 6) & 0b11,
2524 (MASK as u32 & 0b11) + 4,
2525 ((MASK as u32 >> 2) & 0b11) + 4,
2526 ((MASK as u32 >> 4) & 0b11) + 4,
2527 ((MASK as u32 >> 6) & 0b11) + 4,
2528 ],
2529 );
2530 transmute(r)
2531 }
2532}
2533
2534#[inline]
2540#[target_feature(enable = "avx2")]
2541#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2542#[rustc_legacy_const_generics(1)]
2543#[stable(feature = "simd_x86", since = "1.27.0")]
2544#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2545pub const fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2546 static_assert_uimm_bits!(IMM8, 8);
2547 unsafe {
2548 let a = a.as_i16x16();
2549 let r: i16x16 = simd_shuffle!(
2550 a,
2551 a,
2552 [
2553 0,
2554 1,
2555 2,
2556 3,
2557 4 + (IMM8 as u32 & 0b11),
2558 4 + ((IMM8 as u32 >> 2) & 0b11),
2559 4 + ((IMM8 as u32 >> 4) & 0b11),
2560 4 + ((IMM8 as u32 >> 6) & 0b11),
2561 8,
2562 9,
2563 10,
2564 11,
2565 12 + (IMM8 as u32 & 0b11),
2566 12 + ((IMM8 as u32 >> 2) & 0b11),
2567 12 + ((IMM8 as u32 >> 4) & 0b11),
2568 12 + ((IMM8 as u32 >> 6) & 0b11),
2569 ],
2570 );
2571 transmute(r)
2572 }
2573}
2574
2575#[inline]
2581#[target_feature(enable = "avx2")]
2582#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2583#[rustc_legacy_const_generics(1)]
2584#[stable(feature = "simd_x86", since = "1.27.0")]
2585#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2586pub const fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2587 static_assert_uimm_bits!(IMM8, 8);
2588 unsafe {
2589 let a = a.as_i16x16();
2590 let r: i16x16 = simd_shuffle!(
2591 a,
2592 a,
2593 [
2594 0 + (IMM8 as u32 & 0b11),
2595 0 + ((IMM8 as u32 >> 2) & 0b11),
2596 0 + ((IMM8 as u32 >> 4) & 0b11),
2597 0 + ((IMM8 as u32 >> 6) & 0b11),
2598 4,
2599 5,
2600 6,
2601 7,
2602 8 + (IMM8 as u32 & 0b11),
2603 8 + ((IMM8 as u32 >> 2) & 0b11),
2604 8 + ((IMM8 as u32 >> 4) & 0b11),
2605 8 + ((IMM8 as u32 >> 6) & 0b11),
2606 12,
2607 13,
2608 14,
2609 15,
2610 ],
2611 );
2612 transmute(r)
2613 }
2614}
2615
2616#[inline]
2622#[target_feature(enable = "avx2")]
2623#[cfg_attr(test, assert_instr(vpsignw))]
2624#[stable(feature = "simd_x86", since = "1.27.0")]
2625pub fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2626 unsafe { transmute(psignw(a.as_i16x16(), b.as_i16x16())) }
2627}
2628
2629#[inline]
2635#[target_feature(enable = "avx2")]
2636#[cfg_attr(test, assert_instr(vpsignd))]
2637#[stable(feature = "simd_x86", since = "1.27.0")]
2638pub fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2639 unsafe { transmute(psignd(a.as_i32x8(), b.as_i32x8())) }
2640}
2641
2642#[inline]
2648#[target_feature(enable = "avx2")]
2649#[cfg_attr(test, assert_instr(vpsignb))]
2650#[stable(feature = "simd_x86", since = "1.27.0")]
2651pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2652 unsafe { transmute(psignb(a.as_i8x32(), b.as_i8x32())) }
2653}
2654
2655#[inline]
2660#[target_feature(enable = "avx2")]
2661#[cfg_attr(test, assert_instr(vpsllw))]
2662#[stable(feature = "simd_x86", since = "1.27.0")]
2663pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2664 unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) }
2665}
2666
2667#[inline]
2672#[target_feature(enable = "avx2")]
2673#[cfg_attr(test, assert_instr(vpslld))]
2674#[stable(feature = "simd_x86", since = "1.27.0")]
2675pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2676 unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) }
2677}
2678
2679#[inline]
2684#[target_feature(enable = "avx2")]
2685#[cfg_attr(test, assert_instr(vpsllq))]
2686#[stable(feature = "simd_x86", since = "1.27.0")]
2687pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2688 unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) }
2689}
2690
2691#[inline]
2696#[target_feature(enable = "avx2")]
2697#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
2698#[rustc_legacy_const_generics(1)]
2699#[stable(feature = "simd_x86", since = "1.27.0")]
2700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2701pub const fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2702 static_assert_uimm_bits!(IMM8, 8);
2703 unsafe {
2704 if IMM8 >= 16 {
2705 _mm256_setzero_si256()
2706 } else {
2707 transmute(simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
2708 }
2709 }
2710}
2711
2712#[inline]
2717#[target_feature(enable = "avx2")]
2718#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
2719#[rustc_legacy_const_generics(1)]
2720#[stable(feature = "simd_x86", since = "1.27.0")]
2721#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2722pub const fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2723 unsafe {
2724 static_assert_uimm_bits!(IMM8, 8);
2725 if IMM8 >= 32 {
2726 _mm256_setzero_si256()
2727 } else {
2728 transmute(simd_shl(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
2729 }
2730 }
2731}
2732
2733#[inline]
2738#[target_feature(enable = "avx2")]
2739#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
2740#[rustc_legacy_const_generics(1)]
2741#[stable(feature = "simd_x86", since = "1.27.0")]
2742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2743pub const fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2744 unsafe {
2745 static_assert_uimm_bits!(IMM8, 8);
2746 if IMM8 >= 64 {
2747 _mm256_setzero_si256()
2748 } else {
2749 transmute(simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
2750 }
2751 }
2752}
2753
2754#[inline]
2758#[target_feature(enable = "avx2")]
2759#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2760#[rustc_legacy_const_generics(1)]
2761#[stable(feature = "simd_x86", since = "1.27.0")]
2762#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2763pub const fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2764 static_assert_uimm_bits!(IMM8, 8);
2765 _mm256_bslli_epi128::<IMM8>(a)
2766}
2767
2768#[inline]
2772#[target_feature(enable = "avx2")]
2773#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2774#[rustc_legacy_const_generics(1)]
2775#[stable(feature = "simd_x86", since = "1.27.0")]
2776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2777pub const fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2778 static_assert_uimm_bits!(IMM8, 8);
2779 const fn mask(shift: i32, i: u32) -> u32 {
2780 let shift = shift as u32 & 0xff;
2781 if shift > 15 || i % 16 < shift {
2782 0
2783 } else {
2784 32 + (i - shift)
2785 }
2786 }
2787 unsafe {
2788 let a = a.as_i8x32();
2789 let r: i8x32 = simd_shuffle!(
2790 i8x32::ZERO,
2791 a,
2792 [
2793 mask(IMM8, 0),
2794 mask(IMM8, 1),
2795 mask(IMM8, 2),
2796 mask(IMM8, 3),
2797 mask(IMM8, 4),
2798 mask(IMM8, 5),
2799 mask(IMM8, 6),
2800 mask(IMM8, 7),
2801 mask(IMM8, 8),
2802 mask(IMM8, 9),
2803 mask(IMM8, 10),
2804 mask(IMM8, 11),
2805 mask(IMM8, 12),
2806 mask(IMM8, 13),
2807 mask(IMM8, 14),
2808 mask(IMM8, 15),
2809 mask(IMM8, 16),
2810 mask(IMM8, 17),
2811 mask(IMM8, 18),
2812 mask(IMM8, 19),
2813 mask(IMM8, 20),
2814 mask(IMM8, 21),
2815 mask(IMM8, 22),
2816 mask(IMM8, 23),
2817 mask(IMM8, 24),
2818 mask(IMM8, 25),
2819 mask(IMM8, 26),
2820 mask(IMM8, 27),
2821 mask(IMM8, 28),
2822 mask(IMM8, 29),
2823 mask(IMM8, 30),
2824 mask(IMM8, 31),
2825 ],
2826 );
2827 transmute(r)
2828 }
2829}
2830
2831#[inline]
2837#[target_feature(enable = "avx2")]
2838#[cfg_attr(test, assert_instr(vpsllvd))]
2839#[stable(feature = "simd_x86", since = "1.27.0")]
2840#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2841pub const fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2842 unsafe {
2843 let count = count.as_u32x4();
2844 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
2845 let count = simd_select(no_overflow, count, u32x4::ZERO);
2846 simd_select(no_overflow, simd_shl(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
2847 }
2848}
2849
2850#[inline]
2856#[target_feature(enable = "avx2")]
2857#[cfg_attr(test, assert_instr(vpsllvd))]
2858#[stable(feature = "simd_x86", since = "1.27.0")]
2859#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2860pub const fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2861 unsafe {
2862 let count = count.as_u32x8();
2863 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
2864 let count = simd_select(no_overflow, count, u32x8::ZERO);
2865 simd_select(no_overflow, simd_shl(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
2866 }
2867}
2868
2869#[inline]
2875#[target_feature(enable = "avx2")]
2876#[cfg_attr(test, assert_instr(vpsllvq))]
2877#[stable(feature = "simd_x86", since = "1.27.0")]
2878#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2879pub const fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2880 unsafe {
2881 let count = count.as_u64x2();
2882 let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
2883 let count = simd_select(no_overflow, count, u64x2::ZERO);
2884 simd_select(no_overflow, simd_shl(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
2885 }
2886}
2887
2888#[inline]
2894#[target_feature(enable = "avx2")]
2895#[cfg_attr(test, assert_instr(vpsllvq))]
2896#[stable(feature = "simd_x86", since = "1.27.0")]
2897#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2898pub const fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2899 unsafe {
2900 let count = count.as_u64x4();
2901 let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
2902 let count = simd_select(no_overflow, count, u64x4::ZERO);
2903 simd_select(no_overflow, simd_shl(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
2904 }
2905}
2906
2907#[inline]
2912#[target_feature(enable = "avx2")]
2913#[cfg_attr(test, assert_instr(vpsraw))]
2914#[stable(feature = "simd_x86", since = "1.27.0")]
2915pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2916 unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) }
2917}
2918
2919#[inline]
2924#[target_feature(enable = "avx2")]
2925#[cfg_attr(test, assert_instr(vpsrad))]
2926#[stable(feature = "simd_x86", since = "1.27.0")]
2927pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2928 unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) }
2929}
2930
2931#[inline]
2936#[target_feature(enable = "avx2")]
2937#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
2938#[rustc_legacy_const_generics(1)]
2939#[stable(feature = "simd_x86", since = "1.27.0")]
2940#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2941pub const fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2942 static_assert_uimm_bits!(IMM8, 8);
2943 unsafe { transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) }
2944}
2945
2946#[inline]
2951#[target_feature(enable = "avx2")]
2952#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
2953#[rustc_legacy_const_generics(1)]
2954#[stable(feature = "simd_x86", since = "1.27.0")]
2955#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2956pub const fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2957 static_assert_uimm_bits!(IMM8, 8);
2958 unsafe { transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) }
2959}
2960
2961#[inline]
2966#[target_feature(enable = "avx2")]
2967#[cfg_attr(test, assert_instr(vpsravd))]
2968#[stable(feature = "simd_x86", since = "1.27.0")]
2969#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2970pub const fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2971 unsafe {
2972 let count = count.as_u32x4();
2973 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
2974 let count = simd_select(no_overflow, transmute(count), i32x4::splat(31));
2975 simd_shr(a.as_i32x4(), count).as_m128i()
2976 }
2977}
2978
2979#[inline]
2984#[target_feature(enable = "avx2")]
2985#[cfg_attr(test, assert_instr(vpsravd))]
2986#[stable(feature = "simd_x86", since = "1.27.0")]
2987#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2988pub const fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2989 unsafe {
2990 let count = count.as_u32x8();
2991 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
2992 let count = simd_select(no_overflow, transmute(count), i32x8::splat(31));
2993 simd_shr(a.as_i32x8(), count).as_m256i()
2994 }
2995}
2996
2997#[inline]
3001#[target_feature(enable = "avx2")]
3002#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
3003#[rustc_legacy_const_generics(1)]
3004#[stable(feature = "simd_x86", since = "1.27.0")]
3005#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3006pub const fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
3007 static_assert_uimm_bits!(IMM8, 8);
3008 _mm256_bsrli_epi128::<IMM8>(a)
3009}
3010
3011#[inline]
3015#[target_feature(enable = "avx2")]
3016#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
3017#[rustc_legacy_const_generics(1)]
3018#[stable(feature = "simd_x86", since = "1.27.0")]
3019#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3020pub const fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
3021 static_assert_uimm_bits!(IMM8, 8);
3022 const fn mask(shift: i32, i: u32) -> u32 {
3023 let shift = shift as u32 & 0xff;
3024 if shift > 15 || (15 - (i % 16)) < shift {
3025 0
3026 } else {
3027 32 + (i + shift)
3028 }
3029 }
3030 unsafe {
3031 let a = a.as_i8x32();
3032 let r: i8x32 = simd_shuffle!(
3033 i8x32::ZERO,
3034 a,
3035 [
3036 mask(IMM8, 0),
3037 mask(IMM8, 1),
3038 mask(IMM8, 2),
3039 mask(IMM8, 3),
3040 mask(IMM8, 4),
3041 mask(IMM8, 5),
3042 mask(IMM8, 6),
3043 mask(IMM8, 7),
3044 mask(IMM8, 8),
3045 mask(IMM8, 9),
3046 mask(IMM8, 10),
3047 mask(IMM8, 11),
3048 mask(IMM8, 12),
3049 mask(IMM8, 13),
3050 mask(IMM8, 14),
3051 mask(IMM8, 15),
3052 mask(IMM8, 16),
3053 mask(IMM8, 17),
3054 mask(IMM8, 18),
3055 mask(IMM8, 19),
3056 mask(IMM8, 20),
3057 mask(IMM8, 21),
3058 mask(IMM8, 22),
3059 mask(IMM8, 23),
3060 mask(IMM8, 24),
3061 mask(IMM8, 25),
3062 mask(IMM8, 26),
3063 mask(IMM8, 27),
3064 mask(IMM8, 28),
3065 mask(IMM8, 29),
3066 mask(IMM8, 30),
3067 mask(IMM8, 31),
3068 ],
3069 );
3070 transmute(r)
3071 }
3072}
3073
3074#[inline]
3079#[target_feature(enable = "avx2")]
3080#[cfg_attr(test, assert_instr(vpsrlw))]
3081#[stable(feature = "simd_x86", since = "1.27.0")]
3082pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
3083 unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) }
3084}
3085
3086#[inline]
3091#[target_feature(enable = "avx2")]
3092#[cfg_attr(test, assert_instr(vpsrld))]
3093#[stable(feature = "simd_x86", since = "1.27.0")]
3094pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
3095 unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) }
3096}
3097
3098#[inline]
3103#[target_feature(enable = "avx2")]
3104#[cfg_attr(test, assert_instr(vpsrlq))]
3105#[stable(feature = "simd_x86", since = "1.27.0")]
3106pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3107 unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) }
3108}
3109
3110#[inline]
3115#[target_feature(enable = "avx2")]
3116#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
3117#[rustc_legacy_const_generics(1)]
3118#[stable(feature = "simd_x86", since = "1.27.0")]
3119#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3120pub const fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3121 static_assert_uimm_bits!(IMM8, 8);
3122 unsafe {
3123 if IMM8 >= 16 {
3124 _mm256_setzero_si256()
3125 } else {
3126 transmute(simd_shr(a.as_u16x16(), u16x16::splat(IMM8 as u16)))
3127 }
3128 }
3129}
3130
3131#[inline]
3136#[target_feature(enable = "avx2")]
3137#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
3138#[rustc_legacy_const_generics(1)]
3139#[stable(feature = "simd_x86", since = "1.27.0")]
3140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3141pub const fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3142 static_assert_uimm_bits!(IMM8, 8);
3143 unsafe {
3144 if IMM8 >= 32 {
3145 _mm256_setzero_si256()
3146 } else {
3147 transmute(simd_shr(a.as_u32x8(), u32x8::splat(IMM8 as u32)))
3148 }
3149 }
3150}
3151
3152#[inline]
3157#[target_feature(enable = "avx2")]
3158#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
3159#[rustc_legacy_const_generics(1)]
3160#[stable(feature = "simd_x86", since = "1.27.0")]
3161#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3162pub const fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3163 static_assert_uimm_bits!(IMM8, 8);
3164 unsafe {
3165 if IMM8 >= 64 {
3166 _mm256_setzero_si256()
3167 } else {
3168 transmute(simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)))
3169 }
3170 }
3171}
3172
3173#[inline]
3178#[target_feature(enable = "avx2")]
3179#[cfg_attr(test, assert_instr(vpsrlvd))]
3180#[stable(feature = "simd_x86", since = "1.27.0")]
3181#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3182pub const fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3183 unsafe {
3184 let count = count.as_u32x4();
3185 let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
3186 let count = simd_select(no_overflow, count, u32x4::ZERO);
3187 simd_select(no_overflow, simd_shr(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
3188 }
3189}
3190
3191#[inline]
3196#[target_feature(enable = "avx2")]
3197#[cfg_attr(test, assert_instr(vpsrlvd))]
3198#[stable(feature = "simd_x86", since = "1.27.0")]
3199#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3200pub const fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3201 unsafe {
3202 let count = count.as_u32x8();
3203 let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
3204 let count = simd_select(no_overflow, count, u32x8::ZERO);
3205 simd_select(no_overflow, simd_shr(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
3206 }
3207}
3208
3209#[inline]
3214#[target_feature(enable = "avx2")]
3215#[cfg_attr(test, assert_instr(vpsrlvq))]
3216#[stable(feature = "simd_x86", since = "1.27.0")]
3217#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3218pub const fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3219 unsafe {
3220 let count = count.as_u64x2();
3221 let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
3222 let count = simd_select(no_overflow, count, u64x2::ZERO);
3223 simd_select(no_overflow, simd_shr(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
3224 }
3225}
3226
3227#[inline]
3232#[target_feature(enable = "avx2")]
3233#[cfg_attr(test, assert_instr(vpsrlvq))]
3234#[stable(feature = "simd_x86", since = "1.27.0")]
3235#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3236pub const fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3237 unsafe {
3238 let count = count.as_u64x4();
3239 let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
3240 let count = simd_select(no_overflow, count, u64x4::ZERO);
3241 simd_select(no_overflow, simd_shr(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
3242 }
3243}
3244
3245#[inline]
3251#[target_feature(enable = "avx2")]
3252#[cfg_attr(test, assert_instr(vmovntdqa))]
3253#[stable(feature = "simd_x86_updates", since = "1.82.0")]
3254pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i {
3255 let dst: __m256i;
3256 crate::arch::asm!(
3257 vpl!("vmovntdqa {a}"),
3258 a = out(ymm_reg) dst,
3259 p = in(reg) mem_addr,
3260 options(pure, readonly, nostack, preserves_flags),
3261 );
3262 dst
3263}
3264
3265#[inline]
3269#[target_feature(enable = "avx2")]
3270#[cfg_attr(test, assert_instr(vpsubw))]
3271#[stable(feature = "simd_x86", since = "1.27.0")]
3272#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3273pub const fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3274 unsafe { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) }
3275}
3276
3277#[inline]
3281#[target_feature(enable = "avx2")]
3282#[cfg_attr(test, assert_instr(vpsubd))]
3283#[stable(feature = "simd_x86", since = "1.27.0")]
3284#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3285pub const fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3286 unsafe { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) }
3287}
3288
3289#[inline]
3293#[target_feature(enable = "avx2")]
3294#[cfg_attr(test, assert_instr(vpsubq))]
3295#[stable(feature = "simd_x86", since = "1.27.0")]
3296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3297pub const fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3298 unsafe { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) }
3299}
3300
3301#[inline]
3305#[target_feature(enable = "avx2")]
3306#[cfg_attr(test, assert_instr(vpsubb))]
3307#[stable(feature = "simd_x86", since = "1.27.0")]
3308#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3309pub const fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3310 unsafe { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) }
3311}
3312
3313#[inline]
3318#[target_feature(enable = "avx2")]
3319#[cfg_attr(test, assert_instr(vpsubsw))]
3320#[stable(feature = "simd_x86", since = "1.27.0")]
3321#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3322pub const fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3323 unsafe { transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) }
3324}
3325
3326#[inline]
3331#[target_feature(enable = "avx2")]
3332#[cfg_attr(test, assert_instr(vpsubsb))]
3333#[stable(feature = "simd_x86", since = "1.27.0")]
3334#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3335pub const fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3336 unsafe { transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) }
3337}
3338
3339#[inline]
3344#[target_feature(enable = "avx2")]
3345#[cfg_attr(test, assert_instr(vpsubusw))]
3346#[stable(feature = "simd_x86", since = "1.27.0")]
3347#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3348pub const fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3349 unsafe { transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) }
3350}
3351
3352#[inline]
3357#[target_feature(enable = "avx2")]
3358#[cfg_attr(test, assert_instr(vpsubusb))]
3359#[stable(feature = "simd_x86", since = "1.27.0")]
3360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3361pub const fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3362 unsafe { transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) }
3363}
3364
3365#[inline]
3405#[target_feature(enable = "avx2")]
3406#[cfg_attr(test, assert_instr(vpunpckhbw))]
3407#[stable(feature = "simd_x86", since = "1.27.0")]
3408#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3409pub const fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3410 unsafe {
3411 #[rustfmt::skip]
3412 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3413 8, 40, 9, 41, 10, 42, 11, 43,
3414 12, 44, 13, 45, 14, 46, 15, 47,
3415 24, 56, 25, 57, 26, 58, 27, 59,
3416 28, 60, 29, 61, 30, 62, 31, 63,
3417 ]);
3418 transmute(r)
3419 }
3420}
3421
3422#[inline]
3461#[target_feature(enable = "avx2")]
3462#[cfg_attr(test, assert_instr(vpunpcklbw))]
3463#[stable(feature = "simd_x86", since = "1.27.0")]
3464#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3465pub const fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3466 unsafe {
3467 #[rustfmt::skip]
3468 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3469 0, 32, 1, 33, 2, 34, 3, 35,
3470 4, 36, 5, 37, 6, 38, 7, 39,
3471 16, 48, 17, 49, 18, 50, 19, 51,
3472 20, 52, 21, 53, 22, 54, 23, 55,
3473 ]);
3474 transmute(r)
3475 }
3476}
3477
3478#[inline]
3513#[target_feature(enable = "avx2")]
3514#[cfg_attr(test, assert_instr(vpunpckhwd))]
3515#[stable(feature = "simd_x86", since = "1.27.0")]
3516#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3517pub const fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3518 unsafe {
3519 let r: i16x16 = simd_shuffle!(
3520 a.as_i16x16(),
3521 b.as_i16x16(),
3522 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3523 );
3524 transmute(r)
3525 }
3526}
3527
3528#[inline]
3564#[target_feature(enable = "avx2")]
3565#[cfg_attr(test, assert_instr(vpunpcklwd))]
3566#[stable(feature = "simd_x86", since = "1.27.0")]
3567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3568pub const fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3569 unsafe {
3570 let r: i16x16 = simd_shuffle!(
3571 a.as_i16x16(),
3572 b.as_i16x16(),
3573 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3574 );
3575 transmute(r)
3576 }
3577}
3578
3579#[inline]
3608#[target_feature(enable = "avx2")]
3609#[cfg_attr(test, assert_instr(vunpckhps))]
3610#[stable(feature = "simd_x86", since = "1.27.0")]
3611#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3612pub const fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3613 unsafe {
3614 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3615 transmute(r)
3616 }
3617}
3618
3619#[inline]
3648#[target_feature(enable = "avx2")]
3649#[cfg_attr(test, assert_instr(vunpcklps))]
3650#[stable(feature = "simd_x86", since = "1.27.0")]
3651#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3652pub const fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3653 unsafe {
3654 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3655 transmute(r)
3656 }
3657}
3658
3659#[inline]
3688#[target_feature(enable = "avx2")]
3689#[cfg_attr(test, assert_instr(vunpckhpd))]
3690#[stable(feature = "simd_x86", since = "1.27.0")]
3691#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3692pub const fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3693 unsafe {
3694 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
3695 transmute(r)
3696 }
3697}
3698
3699#[inline]
3728#[target_feature(enable = "avx2")]
3729#[cfg_attr(test, assert_instr(vunpcklpd))]
3730#[stable(feature = "simd_x86", since = "1.27.0")]
3731#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3732pub const fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3733 unsafe {
3734 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
3735 transmute(r)
3736 }
3737}
3738
3739#[inline]
3744#[target_feature(enable = "avx2")]
3745#[cfg_attr(test, assert_instr(vxorps))]
3746#[stable(feature = "simd_x86", since = "1.27.0")]
3747#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3748pub const fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3749 unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
3750}
3751
3752#[inline]
3759#[target_feature(enable = "avx2")]
3760#[rustc_legacy_const_generics(1)]
3762#[stable(feature = "simd_x86", since = "1.27.0")]
3763#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3764pub const fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3765 static_assert_uimm_bits!(INDEX, 5);
3766 unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 }
3767}
3768
3769#[inline]
3776#[target_feature(enable = "avx2")]
3777#[rustc_legacy_const_generics(1)]
3779#[stable(feature = "simd_x86", since = "1.27.0")]
3780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3781pub const fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3782 static_assert_uimm_bits!(INDEX, 4);
3783 unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 }
3784}
3785
3786#[allow(improper_ctypes)]
3787unsafe extern "C" {
3788 #[link_name = "llvm.x86.avx2.phadd.sw"]
3789 fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3790 #[link_name = "llvm.x86.avx2.phsub.sw"]
3791 fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3792 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3793 fn pmaddubsw(a: u8x32, b: i8x32) -> i16x16;
3794 #[link_name = "llvm.x86.avx2.mpsadbw"]
3795 fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
3796 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3797 fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3798 #[link_name = "llvm.x86.avx2.packsswb"]
3799 fn packsswb(a: i16x16, b: i16x16) -> i8x32;
3800 #[link_name = "llvm.x86.avx2.packssdw"]
3801 fn packssdw(a: i32x8, b: i32x8) -> i16x16;
3802 #[link_name = "llvm.x86.avx2.packuswb"]
3803 fn packuswb(a: i16x16, b: i16x16) -> u8x32;
3804 #[link_name = "llvm.x86.avx2.packusdw"]
3805 fn packusdw(a: i32x8, b: i32x8) -> u16x16;
3806 #[link_name = "llvm.x86.avx2.psad.bw"]
3807 fn psadbw(a: u8x32, b: u8x32) -> u64x4;
3808 #[link_name = "llvm.x86.avx2.psign.b"]
3809 fn psignb(a: i8x32, b: i8x32) -> i8x32;
3810 #[link_name = "llvm.x86.avx2.psign.w"]
3811 fn psignw(a: i16x16, b: i16x16) -> i16x16;
3812 #[link_name = "llvm.x86.avx2.psign.d"]
3813 fn psignd(a: i32x8, b: i32x8) -> i32x8;
3814 #[link_name = "llvm.x86.avx2.psll.w"]
3815 fn psllw(a: i16x16, count: i16x8) -> i16x16;
3816 #[link_name = "llvm.x86.avx2.psll.d"]
3817 fn pslld(a: i32x8, count: i32x4) -> i32x8;
3818 #[link_name = "llvm.x86.avx2.psll.q"]
3819 fn psllq(a: i64x4, count: i64x2) -> i64x4;
3820 #[link_name = "llvm.x86.avx2.psra.w"]
3821 fn psraw(a: i16x16, count: i16x8) -> i16x16;
3822 #[link_name = "llvm.x86.avx2.psra.d"]
3823 fn psrad(a: i32x8, count: i32x4) -> i32x8;
3824 #[link_name = "llvm.x86.avx2.psrl.w"]
3825 fn psrlw(a: i16x16, count: i16x8) -> i16x16;
3826 #[link_name = "llvm.x86.avx2.psrl.d"]
3827 fn psrld(a: i32x8, count: i32x4) -> i32x8;
3828 #[link_name = "llvm.x86.avx2.psrl.q"]
3829 fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3830 #[link_name = "llvm.x86.avx2.pshuf.b"]
3831 fn pshufb(a: u8x32, b: u8x32) -> u8x32;
3832 #[link_name = "llvm.x86.avx2.permd"]
3833 fn permd(a: u32x8, b: u32x8) -> u32x8;
3834 #[link_name = "llvm.x86.avx2.permps"]
3835 fn permps(a: __m256, b: i32x8) -> __m256;
3836 #[link_name = "llvm.x86.avx2.gather.d.d"]
3837 fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3838 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3839 fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3840 #[link_name = "llvm.x86.avx2.gather.d.q"]
3841 fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3842 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3843 fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3844 #[link_name = "llvm.x86.avx2.gather.q.d"]
3845 fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3846 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3847 fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3848 #[link_name = "llvm.x86.avx2.gather.q.q"]
3849 fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3850 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3851 fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3852 #[link_name = "llvm.x86.avx2.gather.d.pd"]
3853 fn pgatherdpd(
3854 src: __m128d,
3855 slice: *const i8,
3856 offsets: i32x4,
3857 mask: __m128d,
3858 scale: i8,
3859 ) -> __m128d;
3860 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3861 fn vpgatherdpd(
3862 src: __m256d,
3863 slice: *const i8,
3864 offsets: i32x4,
3865 mask: __m256d,
3866 scale: i8,
3867 ) -> __m256d;
3868 #[link_name = "llvm.x86.avx2.gather.q.pd"]
3869 fn pgatherqpd(
3870 src: __m128d,
3871 slice: *const i8,
3872 offsets: i64x2,
3873 mask: __m128d,
3874 scale: i8,
3875 ) -> __m128d;
3876 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3877 fn vpgatherqpd(
3878 src: __m256d,
3879 slice: *const i8,
3880 offsets: i64x4,
3881 mask: __m256d,
3882 scale: i8,
3883 ) -> __m256d;
3884 #[link_name = "llvm.x86.avx2.gather.d.ps"]
3885 fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3886 -> __m128;
3887 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3888 fn vpgatherdps(
3889 src: __m256,
3890 slice: *const i8,
3891 offsets: i32x8,
3892 mask: __m256,
3893 scale: i8,
3894 ) -> __m256;
3895 #[link_name = "llvm.x86.avx2.gather.q.ps"]
3896 fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3897 -> __m128;
3898 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3899 fn vpgatherqps(
3900 src: __m128,
3901 slice: *const i8,
3902 offsets: i64x4,
3903 mask: __m128,
3904 scale: i8,
3905 ) -> __m128;
3906}
3907
3908#[cfg(test)]
3909mod tests {
3910 use crate::core_arch::assert_eq_const as assert_eq;
3911
3912 use stdarch_test::simd_test;
3913
3914 use crate::core_arch::x86::*;
3915
3916 #[simd_test(enable = "avx2")]
3917 const unsafe fn test_mm256_abs_epi32() {
3918 #[rustfmt::skip]
3919 let a = _mm256_setr_epi32(
3920 0, 1, -1, i32::MAX,
3921 i32::MIN, 100, -100, -32,
3922 );
3923 let r = _mm256_abs_epi32(a);
3924 #[rustfmt::skip]
3925 let e = _mm256_setr_epi32(
3926 0, 1, 1, i32::MAX,
3927 i32::MAX.wrapping_add(1), 100, 100, 32,
3928 );
3929 assert_eq_m256i(r, e);
3930 }
3931
3932 #[simd_test(enable = "avx2")]
3933 const unsafe fn test_mm256_abs_epi16() {
3934 #[rustfmt::skip]
3935 let a = _mm256_setr_epi16(
3936 0, 1, -1, 2, -2, 3, -3, 4,
3937 -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32,
3938 );
3939 let r = _mm256_abs_epi16(a);
3940 #[rustfmt::skip]
3941 let e = _mm256_setr_epi16(
3942 0, 1, 1, 2, 2, 3, 3, 4,
3943 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32,
3944 );
3945 assert_eq_m256i(r, e);
3946 }
3947
3948 #[simd_test(enable = "avx2")]
3949 const unsafe fn test_mm256_abs_epi8() {
3950 #[rustfmt::skip]
3951 let a = _mm256_setr_epi8(
3952 0, 1, -1, 2, -2, 3, -3, 4,
3953 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3954 0, 1, -1, 2, -2, 3, -3, 4,
3955 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3956 );
3957 let r = _mm256_abs_epi8(a);
3958 #[rustfmt::skip]
3959 let e = _mm256_setr_epi8(
3960 0, 1, 1, 2, 2, 3, 3, 4,
3961 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3962 0, 1, 1, 2, 2, 3, 3, 4,
3963 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3964 );
3965 assert_eq_m256i(r, e);
3966 }
3967
3968 #[simd_test(enable = "avx2")]
3969 const unsafe fn test_mm256_add_epi64() {
3970 let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
3971 let b = _mm256_setr_epi64x(-1, 0, 1, 2);
3972 let r = _mm256_add_epi64(a, b);
3973 let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
3974 assert_eq_m256i(r, e);
3975 }
3976
3977 #[simd_test(enable = "avx2")]
3978 const unsafe fn test_mm256_add_epi32() {
3979 let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
3980 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3981 let r = _mm256_add_epi32(a, b);
3982 let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
3983 assert_eq_m256i(r, e);
3984 }
3985
3986 #[simd_test(enable = "avx2")]
3987 const unsafe fn test_mm256_add_epi16() {
3988 #[rustfmt::skip]
3989 let a = _mm256_setr_epi16(
3990 0, 1, 2, 3, 4, 5, 6, 7,
3991 8, 9, 10, 11, 12, 13, 14, 15,
3992 );
3993 #[rustfmt::skip]
3994 let b = _mm256_setr_epi16(
3995 0, 1, 2, 3, 4, 5, 6, 7,
3996 8, 9, 10, 11, 12, 13, 14, 15,
3997 );
3998 let r = _mm256_add_epi16(a, b);
3999 #[rustfmt::skip]
4000 let e = _mm256_setr_epi16(
4001 0, 2, 4, 6, 8, 10, 12, 14,
4002 16, 18, 20, 22, 24, 26, 28, 30,
4003 );
4004 assert_eq_m256i(r, e);
4005 }
4006
4007 #[simd_test(enable = "avx2")]
4008 const unsafe fn test_mm256_add_epi8() {
4009 #[rustfmt::skip]
4010 let a = _mm256_setr_epi8(
4011 0, 1, 2, 3, 4, 5, 6, 7,
4012 8, 9, 10, 11, 12, 13, 14, 15,
4013 16, 17, 18, 19, 20, 21, 22, 23,
4014 24, 25, 26, 27, 28, 29, 30, 31,
4015 );
4016 #[rustfmt::skip]
4017 let b = _mm256_setr_epi8(
4018 0, 1, 2, 3, 4, 5, 6, 7,
4019 8, 9, 10, 11, 12, 13, 14, 15,
4020 16, 17, 18, 19, 20, 21, 22, 23,
4021 24, 25, 26, 27, 28, 29, 30, 31,
4022 );
4023 let r = _mm256_add_epi8(a, b);
4024 #[rustfmt::skip]
4025 let e = _mm256_setr_epi8(
4026 0, 2, 4, 6, 8, 10, 12, 14,
4027 16, 18, 20, 22, 24, 26, 28, 30,
4028 32, 34, 36, 38, 40, 42, 44, 46,
4029 48, 50, 52, 54, 56, 58, 60, 62,
4030 );
4031 assert_eq_m256i(r, e);
4032 }
4033
4034 #[simd_test(enable = "avx2")]
4035 const unsafe fn test_mm256_adds_epi8() {
4036 #[rustfmt::skip]
4037 let a = _mm256_setr_epi8(
4038 0, 1, 2, 3, 4, 5, 6, 7,
4039 8, 9, 10, 11, 12, 13, 14, 15,
4040 16, 17, 18, 19, 20, 21, 22, 23,
4041 24, 25, 26, 27, 28, 29, 30, 31,
4042 );
4043 #[rustfmt::skip]
4044 let b = _mm256_setr_epi8(
4045 32, 33, 34, 35, 36, 37, 38, 39,
4046 40, 41, 42, 43, 44, 45, 46, 47,
4047 48, 49, 50, 51, 52, 53, 54, 55,
4048 56, 57, 58, 59, 60, 61, 62, 63,
4049 );
4050 let r = _mm256_adds_epi8(a, b);
4051 #[rustfmt::skip]
4052 let e = _mm256_setr_epi8(
4053 32, 34, 36, 38, 40, 42, 44, 46,
4054 48, 50, 52, 54, 56, 58, 60, 62,
4055 64, 66, 68, 70, 72, 74, 76, 78,
4056 80, 82, 84, 86, 88, 90, 92, 94,
4057 );
4058 assert_eq_m256i(r, e);
4059 }
4060
4061 #[simd_test(enable = "avx2")]
4062 unsafe fn test_mm256_adds_epi8_saturate_positive() {
4063 let a = _mm256_set1_epi8(0x7F);
4064 let b = _mm256_set1_epi8(1);
4065 let r = _mm256_adds_epi8(a, b);
4066 assert_eq_m256i(r, a);
4067 }
4068
4069 #[simd_test(enable = "avx2")]
4070 unsafe fn test_mm256_adds_epi8_saturate_negative() {
4071 let a = _mm256_set1_epi8(-0x80);
4072 let b = _mm256_set1_epi8(-1);
4073 let r = _mm256_adds_epi8(a, b);
4074 assert_eq_m256i(r, a);
4075 }
4076
4077 #[simd_test(enable = "avx2")]
4078 const unsafe fn test_mm256_adds_epi16() {
4079 #[rustfmt::skip]
4080 let a = _mm256_setr_epi16(
4081 0, 1, 2, 3, 4, 5, 6, 7,
4082 8, 9, 10, 11, 12, 13, 14, 15,
4083 );
4084 #[rustfmt::skip]
4085 let b = _mm256_setr_epi16(
4086 32, 33, 34, 35, 36, 37, 38, 39,
4087 40, 41, 42, 43, 44, 45, 46, 47,
4088 );
4089 let r = _mm256_adds_epi16(a, b);
4090 #[rustfmt::skip]
4091 let e = _mm256_setr_epi16(
4092 32, 34, 36, 38, 40, 42, 44, 46,
4093 48, 50, 52, 54, 56, 58, 60, 62,
4094 );
4095
4096 assert_eq_m256i(r, e);
4097 }
4098
4099 #[simd_test(enable = "avx2")]
4100 unsafe fn test_mm256_adds_epi16_saturate_positive() {
4101 let a = _mm256_set1_epi16(0x7FFF);
4102 let b = _mm256_set1_epi16(1);
4103 let r = _mm256_adds_epi16(a, b);
4104 assert_eq_m256i(r, a);
4105 }
4106
4107 #[simd_test(enable = "avx2")]
4108 unsafe fn test_mm256_adds_epi16_saturate_negative() {
4109 let a = _mm256_set1_epi16(-0x8000);
4110 let b = _mm256_set1_epi16(-1);
4111 let r = _mm256_adds_epi16(a, b);
4112 assert_eq_m256i(r, a);
4113 }
4114
4115 #[simd_test(enable = "avx2")]
4116 const unsafe fn test_mm256_adds_epu8() {
4117 #[rustfmt::skip]
4118 let a = _mm256_setr_epi8(
4119 0, 1, 2, 3, 4, 5, 6, 7,
4120 8, 9, 10, 11, 12, 13, 14, 15,
4121 16, 17, 18, 19, 20, 21, 22, 23,
4122 24, 25, 26, 27, 28, 29, 30, 31,
4123 );
4124 #[rustfmt::skip]
4125 let b = _mm256_setr_epi8(
4126 32, 33, 34, 35, 36, 37, 38, 39,
4127 40, 41, 42, 43, 44, 45, 46, 47,
4128 48, 49, 50, 51, 52, 53, 54, 55,
4129 56, 57, 58, 59, 60, 61, 62, 63,
4130 );
4131 let r = _mm256_adds_epu8(a, b);
4132 #[rustfmt::skip]
4133 let e = _mm256_setr_epi8(
4134 32, 34, 36, 38, 40, 42, 44, 46,
4135 48, 50, 52, 54, 56, 58, 60, 62,
4136 64, 66, 68, 70, 72, 74, 76, 78,
4137 80, 82, 84, 86, 88, 90, 92, 94,
4138 );
4139 assert_eq_m256i(r, e);
4140 }
4141
4142 #[simd_test(enable = "avx2")]
4143 unsafe fn test_mm256_adds_epu8_saturate() {
4144 let a = _mm256_set1_epi8(!0);
4145 let b = _mm256_set1_epi8(1);
4146 let r = _mm256_adds_epu8(a, b);
4147 assert_eq_m256i(r, a);
4148 }
4149
4150 #[simd_test(enable = "avx2")]
4151 const unsafe fn test_mm256_adds_epu16() {
4152 #[rustfmt::skip]
4153 let a = _mm256_setr_epi16(
4154 0, 1, 2, 3, 4, 5, 6, 7,
4155 8, 9, 10, 11, 12, 13, 14, 15,
4156 );
4157 #[rustfmt::skip]
4158 let b = _mm256_setr_epi16(
4159 32, 33, 34, 35, 36, 37, 38, 39,
4160 40, 41, 42, 43, 44, 45, 46, 47,
4161 );
4162 let r = _mm256_adds_epu16(a, b);
4163 #[rustfmt::skip]
4164 let e = _mm256_setr_epi16(
4165 32, 34, 36, 38, 40, 42, 44, 46,
4166 48, 50, 52, 54, 56, 58, 60, 62,
4167 );
4168
4169 assert_eq_m256i(r, e);
4170 }
4171
4172 #[simd_test(enable = "avx2")]
4173 unsafe fn test_mm256_adds_epu16_saturate() {
4174 let a = _mm256_set1_epi16(!0);
4175 let b = _mm256_set1_epi16(1);
4176 let r = _mm256_adds_epu16(a, b);
4177 assert_eq_m256i(r, a);
4178 }
4179
4180 #[simd_test(enable = "avx2")]
4181 const unsafe fn test_mm256_and_si256() {
4182 let a = _mm256_set1_epi8(5);
4183 let b = _mm256_set1_epi8(3);
4184 let got = _mm256_and_si256(a, b);
4185 assert_eq_m256i(got, _mm256_set1_epi8(1));
4186 }
4187
4188 #[simd_test(enable = "avx2")]
4189 const unsafe fn test_mm256_andnot_si256() {
4190 let a = _mm256_set1_epi8(5);
4191 let b = _mm256_set1_epi8(3);
4192 let got = _mm256_andnot_si256(a, b);
4193 assert_eq_m256i(got, _mm256_set1_epi8(2));
4194 }
4195
4196 #[simd_test(enable = "avx2")]
4197 const unsafe fn test_mm256_avg_epu8() {
4198 let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4199 let r = _mm256_avg_epu8(a, b);
4200 assert_eq_m256i(r, _mm256_set1_epi8(6));
4201 }
4202
4203 #[simd_test(enable = "avx2")]
4204 const unsafe fn test_mm256_avg_epu16() {
4205 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4206 let r = _mm256_avg_epu16(a, b);
4207 assert_eq_m256i(r, _mm256_set1_epi16(6));
4208 }
4209
4210 #[simd_test(enable = "avx2")]
4211 const unsafe fn test_mm_blend_epi32() {
4212 let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4213 let e = _mm_setr_epi32(9, 3, 3, 3);
4214 let r = _mm_blend_epi32::<0x01>(a, b);
4215 assert_eq_m128i(r, e);
4216
4217 let r = _mm_blend_epi32::<0x0E>(b, a);
4218 assert_eq_m128i(r, e);
4219 }
4220
4221 #[simd_test(enable = "avx2")]
4222 const unsafe fn test_mm256_blend_epi32() {
4223 let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4224 let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4225 let r = _mm256_blend_epi32::<0x01>(a, b);
4226 assert_eq_m256i(r, e);
4227
4228 let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4229 let r = _mm256_blend_epi32::<0x82>(a, b);
4230 assert_eq_m256i(r, e);
4231
4232 let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4233 let r = _mm256_blend_epi32::<0x7C>(a, b);
4234 assert_eq_m256i(r, e);
4235 }
4236
4237 #[simd_test(enable = "avx2")]
4238 const unsafe fn test_mm256_blend_epi16() {
4239 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4240 let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4241 let r = _mm256_blend_epi16::<0x01>(a, b);
4242 assert_eq_m256i(r, e);
4243
4244 let r = _mm256_blend_epi16::<0xFE>(b, a);
4245 assert_eq_m256i(r, e);
4246 }
4247
4248 #[simd_test(enable = "avx2")]
4249 const unsafe fn test_mm256_blendv_epi8() {
4250 let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4251 let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
4252 let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
4253 let r = _mm256_blendv_epi8(a, b, mask);
4254 assert_eq_m256i(r, e);
4255 }
4256
4257 #[simd_test(enable = "avx2")]
4258 const unsafe fn test_mm_broadcastb_epi8() {
4259 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4260 let res = _mm_broadcastb_epi8(a);
4261 assert_eq_m128i(res, _mm_set1_epi8(0x2a));
4262 }
4263
4264 #[simd_test(enable = "avx2")]
4265 const unsafe fn test_mm256_broadcastb_epi8() {
4266 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4267 let res = _mm256_broadcastb_epi8(a);
4268 assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
4269 }
4270
4271 #[simd_test(enable = "avx2")]
4272 const unsafe fn test_mm_broadcastd_epi32() {
4273 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4274 let res = _mm_broadcastd_epi32(a);
4275 assert_eq_m128i(res, _mm_set1_epi32(0x2a));
4276 }
4277
4278 #[simd_test(enable = "avx2")]
4279 const unsafe fn test_mm256_broadcastd_epi32() {
4280 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4281 let res = _mm256_broadcastd_epi32(a);
4282 assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
4283 }
4284
4285 #[simd_test(enable = "avx2")]
4286 const unsafe fn test_mm_broadcastq_epi64() {
4287 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4288 let res = _mm_broadcastq_epi64(a);
4289 assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
4290 }
4291
4292 #[simd_test(enable = "avx2")]
4293 const unsafe fn test_mm256_broadcastq_epi64() {
4294 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4295 let res = _mm256_broadcastq_epi64(a);
4296 assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
4297 }
4298
4299 #[simd_test(enable = "avx2")]
4300 const unsafe fn test_mm_broadcastsd_pd() {
4301 let a = _mm_setr_pd(6.88, 3.44);
4302 let res = _mm_broadcastsd_pd(a);
4303 assert_eq_m128d(res, _mm_set1_pd(6.88));
4304 }
4305
4306 #[simd_test(enable = "avx2")]
4307 const unsafe fn test_mm256_broadcastsd_pd() {
4308 let a = _mm_setr_pd(6.88, 3.44);
4309 let res = _mm256_broadcastsd_pd(a);
4310 assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
4311 }
4312
4313 #[simd_test(enable = "avx2")]
4314 const unsafe fn test_mm_broadcastsi128_si256() {
4315 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4316 let res = _mm_broadcastsi128_si256(a);
4317 let retval = _mm256_setr_epi64x(
4318 0x0987654321012334,
4319 0x5678909876543210,
4320 0x0987654321012334,
4321 0x5678909876543210,
4322 );
4323 assert_eq_m256i(res, retval);
4324 }
4325
4326 #[simd_test(enable = "avx2")]
4327 const unsafe fn test_mm256_broadcastsi128_si256() {
4328 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4329 let res = _mm256_broadcastsi128_si256(a);
4330 let retval = _mm256_setr_epi64x(
4331 0x0987654321012334,
4332 0x5678909876543210,
4333 0x0987654321012334,
4334 0x5678909876543210,
4335 );
4336 assert_eq_m256i(res, retval);
4337 }
4338
4339 #[simd_test(enable = "avx2")]
4340 const unsafe fn test_mm_broadcastss_ps() {
4341 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4342 let res = _mm_broadcastss_ps(a);
4343 assert_eq_m128(res, _mm_set1_ps(6.88));
4344 }
4345
4346 #[simd_test(enable = "avx2")]
4347 const unsafe fn test_mm256_broadcastss_ps() {
4348 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4349 let res = _mm256_broadcastss_ps(a);
4350 assert_eq_m256(res, _mm256_set1_ps(6.88));
4351 }
4352
4353 #[simd_test(enable = "avx2")]
4354 const unsafe fn test_mm_broadcastw_epi16() {
4355 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4356 let res = _mm_broadcastw_epi16(a);
4357 assert_eq_m128i(res, _mm_set1_epi16(0x22b));
4358 }
4359
4360 #[simd_test(enable = "avx2")]
4361 const unsafe fn test_mm256_broadcastw_epi16() {
4362 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4363 let res = _mm256_broadcastw_epi16(a);
4364 assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
4365 }
4366
4367 #[simd_test(enable = "avx2")]
4368 const unsafe fn test_mm256_cmpeq_epi8() {
4369 #[rustfmt::skip]
4370 let a = _mm256_setr_epi8(
4371 0, 1, 2, 3, 4, 5, 6, 7,
4372 8, 9, 10, 11, 12, 13, 14, 15,
4373 16, 17, 18, 19, 20, 21, 22, 23,
4374 24, 25, 26, 27, 28, 29, 30, 31,
4375 );
4376 #[rustfmt::skip]
4377 let b = _mm256_setr_epi8(
4378 31, 30, 2, 28, 27, 26, 25, 24,
4379 23, 22, 21, 20, 19, 18, 17, 16,
4380 15, 14, 13, 12, 11, 10, 9, 8,
4381 7, 6, 5, 4, 3, 2, 1, 0,
4382 );
4383 let r = _mm256_cmpeq_epi8(a, b);
4384 assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0));
4385 }
4386
4387 #[simd_test(enable = "avx2")]
4388 const unsafe fn test_mm256_cmpeq_epi16() {
4389 #[rustfmt::skip]
4390 let a = _mm256_setr_epi16(
4391 0, 1, 2, 3, 4, 5, 6, 7,
4392 8, 9, 10, 11, 12, 13, 14, 15,
4393 );
4394 #[rustfmt::skip]
4395 let b = _mm256_setr_epi16(
4396 15, 14, 2, 12, 11, 10, 9, 8,
4397 7, 6, 5, 4, 3, 2, 1, 0,
4398 );
4399 let r = _mm256_cmpeq_epi16(a, b);
4400 assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0));
4401 }
4402
4403 #[simd_test(enable = "avx2")]
4404 const unsafe fn test_mm256_cmpeq_epi32() {
4405 let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4406 let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4407 let r = _mm256_cmpeq_epi32(a, b);
4408 let e = _mm256_set1_epi32(0);
4409 let e = _mm256_insert_epi32::<2>(e, !0);
4410 assert_eq_m256i(r, e);
4411 }
4412
4413 #[simd_test(enable = "avx2")]
4414 const unsafe fn test_mm256_cmpeq_epi64() {
4415 let a = _mm256_setr_epi64x(0, 1, 2, 3);
4416 let b = _mm256_setr_epi64x(3, 2, 2, 0);
4417 let r = _mm256_cmpeq_epi64(a, b);
4418 assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0));
4419 }
4420
4421 #[simd_test(enable = "avx2")]
4422 const unsafe fn test_mm256_cmpgt_epi8() {
4423 let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
4424 let b = _mm256_set1_epi8(0);
4425 let r = _mm256_cmpgt_epi8(a, b);
4426 assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0));
4427 }
4428
4429 #[simd_test(enable = "avx2")]
4430 const unsafe fn test_mm256_cmpgt_epi16() {
4431 let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
4432 let b = _mm256_set1_epi16(0);
4433 let r = _mm256_cmpgt_epi16(a, b);
4434 assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0));
4435 }
4436
4437 #[simd_test(enable = "avx2")]
4438 const unsafe fn test_mm256_cmpgt_epi32() {
4439 let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
4440 let b = _mm256_set1_epi32(0);
4441 let r = _mm256_cmpgt_epi32(a, b);
4442 assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0));
4443 }
4444
4445 #[simd_test(enable = "avx2")]
4446 const unsafe fn test_mm256_cmpgt_epi64() {
4447 let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
4448 let b = _mm256_set1_epi64x(0);
4449 let r = _mm256_cmpgt_epi64(a, b);
4450 assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0));
4451 }
4452
4453 #[simd_test(enable = "avx2")]
4454 const unsafe fn test_mm256_cvtepi8_epi16() {
4455 #[rustfmt::skip]
4456 let a = _mm_setr_epi8(
4457 0, 0, -1, 1, -2, 2, -3, 3,
4458 -4, 4, -5, 5, -6, 6, -7, 7,
4459 );
4460 #[rustfmt::skip]
4461 let r = _mm256_setr_epi16(
4462 0, 0, -1, 1, -2, 2, -3, 3,
4463 -4, 4, -5, 5, -6, 6, -7, 7,
4464 );
4465 assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4466 }
4467
4468 #[simd_test(enable = "avx2")]
4469 const unsafe fn test_mm256_cvtepi8_epi32() {
4470 #[rustfmt::skip]
4471 let a = _mm_setr_epi8(
4472 0, 0, -1, 1, -2, 2, -3, 3,
4473 -4, 4, -5, 5, -6, 6, -7, 7,
4474 );
4475 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4476 assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4477 }
4478
4479 #[simd_test(enable = "avx2")]
4480 const unsafe fn test_mm256_cvtepi8_epi64() {
4481 #[rustfmt::skip]
4482 let a = _mm_setr_epi8(
4483 0, 0, -1, 1, -2, 2, -3, 3,
4484 -4, 4, -5, 5, -6, 6, -7, 7,
4485 );
4486 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4487 assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4488 }
4489
4490 #[simd_test(enable = "avx2")]
4491 const unsafe fn test_mm256_cvtepi16_epi32() {
4492 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4493 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4494 assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4495 }
4496
4497 #[simd_test(enable = "avx2")]
4498 const unsafe fn test_mm256_cvtepi16_epi64() {
4499 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4500 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4501 assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4502 }
4503
4504 #[simd_test(enable = "avx2")]
4505 const unsafe fn test_mm256_cvtepi32_epi64() {
4506 let a = _mm_setr_epi32(0, 0, -1, 1);
4507 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4508 assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4509 }
4510
4511 #[simd_test(enable = "avx2")]
4512 const unsafe fn test_mm256_cvtepu16_epi32() {
4513 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4514 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4515 assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4516 }
4517
4518 #[simd_test(enable = "avx2")]
4519 const unsafe fn test_mm256_cvtepu16_epi64() {
4520 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4521 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4522 assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4523 }
4524
4525 #[simd_test(enable = "avx2")]
4526 const unsafe fn test_mm256_cvtepu32_epi64() {
4527 let a = _mm_setr_epi32(0, 1, 2, 3);
4528 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4529 assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4530 }
4531
4532 #[simd_test(enable = "avx2")]
4533 const unsafe fn test_mm256_cvtepu8_epi16() {
4534 #[rustfmt::skip]
4535 let a = _mm_setr_epi8(
4536 0, 1, 2, 3, 4, 5, 6, 7,
4537 8, 9, 10, 11, 12, 13, 14, 15,
4538 );
4539 #[rustfmt::skip]
4540 let r = _mm256_setr_epi16(
4541 0, 1, 2, 3, 4, 5, 6, 7,
4542 8, 9, 10, 11, 12, 13, 14, 15,
4543 );
4544 assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4545 }
4546
4547 #[simd_test(enable = "avx2")]
4548 const unsafe fn test_mm256_cvtepu8_epi32() {
4549 #[rustfmt::skip]
4550 let a = _mm_setr_epi8(
4551 0, 1, 2, 3, 4, 5, 6, 7,
4552 8, 9, 10, 11, 12, 13, 14, 15,
4553 );
4554 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4555 assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4556 }
4557
4558 #[simd_test(enable = "avx2")]
4559 const unsafe fn test_mm256_cvtepu8_epi64() {
4560 #[rustfmt::skip]
4561 let a = _mm_setr_epi8(
4562 0, 1, 2, 3, 4, 5, 6, 7,
4563 8, 9, 10, 11, 12, 13, 14, 15,
4564 );
4565 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4566 assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4567 }
4568
4569 #[simd_test(enable = "avx2")]
4570 const unsafe fn test_mm256_extracti128_si256() {
4571 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4572 let r = _mm256_extracti128_si256::<1>(a);
4573 let e = _mm_setr_epi64x(3, 4);
4574 assert_eq_m128i(r, e);
4575 }
4576
4577 #[simd_test(enable = "avx2")]
4578 const unsafe fn test_mm256_hadd_epi16() {
4579 let a = _mm256_set1_epi16(2);
4580 let b = _mm256_set1_epi16(4);
4581 let r = _mm256_hadd_epi16(a, b);
4582 let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4583 assert_eq_m256i(r, e);
4584 }
4585
4586 #[simd_test(enable = "avx2")]
4587 const unsafe fn test_mm256_hadd_epi32() {
4588 let a = _mm256_set1_epi32(2);
4589 let b = _mm256_set1_epi32(4);
4590 let r = _mm256_hadd_epi32(a, b);
4591 let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4592 assert_eq_m256i(r, e);
4593 }
4594
4595 #[simd_test(enable = "avx2")]
4596 unsafe fn test_mm256_hadds_epi16() {
4597 let a = _mm256_set1_epi16(2);
4598 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4599 let a = _mm256_insert_epi16::<1>(a, 1);
4600 let b = _mm256_set1_epi16(4);
4601 let r = _mm256_hadds_epi16(a, b);
4602 #[rustfmt::skip]
4603 let e = _mm256_setr_epi16(
4604 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4605 4, 4, 4, 4, 8, 8, 8, 8,
4606 );
4607 assert_eq_m256i(r, e);
4608 }
4609
4610 #[simd_test(enable = "avx2")]
4611 const unsafe fn test_mm256_hsub_epi16() {
4612 let a = _mm256_set1_epi16(2);
4613 let b = _mm256_set1_epi16(4);
4614 let r = _mm256_hsub_epi16(a, b);
4615 let e = _mm256_set1_epi16(0);
4616 assert_eq_m256i(r, e);
4617 }
4618
4619 #[simd_test(enable = "avx2")]
4620 const unsafe fn test_mm256_hsub_epi32() {
4621 let a = _mm256_set1_epi32(2);
4622 let b = _mm256_set1_epi32(4);
4623 let r = _mm256_hsub_epi32(a, b);
4624 let e = _mm256_set1_epi32(0);
4625 assert_eq_m256i(r, e);
4626 }
4627
4628 #[simd_test(enable = "avx2")]
4629 unsafe fn test_mm256_hsubs_epi16() {
4630 let a = _mm256_set1_epi16(2);
4631 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4632 let a = _mm256_insert_epi16::<1>(a, -1);
4633 let b = _mm256_set1_epi16(4);
4634 let r = _mm256_hsubs_epi16(a, b);
4635 let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF);
4636 assert_eq_m256i(r, e);
4637 }
4638
4639 #[simd_test(enable = "avx2")]
4640 const unsafe fn test_mm256_madd_epi16() {
4641 let a = _mm256_set1_epi16(2);
4642 let b = _mm256_set1_epi16(4);
4643 let r = _mm256_madd_epi16(a, b);
4644 let e = _mm256_set1_epi32(16);
4645 assert_eq_m256i(r, e);
4646 }
4647
4648 #[simd_test(enable = "avx2")]
4649 const unsafe fn test_mm256_inserti128_si256() {
4650 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4651 let b = _mm_setr_epi64x(7, 8);
4652 let r = _mm256_inserti128_si256::<1>(a, b);
4653 let e = _mm256_setr_epi64x(1, 2, 7, 8);
4654 assert_eq_m256i(r, e);
4655 }
4656
4657 #[simd_test(enable = "avx2")]
4658 unsafe fn test_mm256_maddubs_epi16() {
4659 let a = _mm256_set1_epi8(2);
4660 let b = _mm256_set1_epi8(4);
4661 let r = _mm256_maddubs_epi16(a, b);
4662 let e = _mm256_set1_epi16(16);
4663 assert_eq_m256i(r, e);
4664 }
4665
4666 #[simd_test(enable = "avx2")]
4667 const unsafe fn test_mm_maskload_epi32() {
4668 let nums = [1, 2, 3, 4];
4669 let a = &nums as *const i32;
4670 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4671 let r = _mm_maskload_epi32(a, mask);
4672 let e = _mm_setr_epi32(1, 0, 0, 4);
4673 assert_eq_m128i(r, e);
4674 }
4675
4676 #[simd_test(enable = "avx2")]
4677 const unsafe fn test_mm256_maskload_epi32() {
4678 let nums = [1, 2, 3, 4, 5, 6, 7, 8];
4679 let a = &nums as *const i32;
4680 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4681 let r = _mm256_maskload_epi32(a, mask);
4682 let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4683 assert_eq_m256i(r, e);
4684 }
4685
4686 #[simd_test(enable = "avx2")]
4687 const unsafe fn test_mm_maskload_epi64() {
4688 let nums = [1_i64, 2_i64];
4689 let a = &nums as *const i64;
4690 let mask = _mm_setr_epi64x(0, -1);
4691 let r = _mm_maskload_epi64(a, mask);
4692 let e = _mm_setr_epi64x(0, 2);
4693 assert_eq_m128i(r, e);
4694 }
4695
4696 #[simd_test(enable = "avx2")]
4697 const unsafe fn test_mm256_maskload_epi64() {
4698 let nums = [1_i64, 2_i64, 3_i64, 4_i64];
4699 let a = &nums as *const i64;
4700 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4701 let r = _mm256_maskload_epi64(a, mask);
4702 let e = _mm256_setr_epi64x(0, 2, 3, 0);
4703 assert_eq_m256i(r, e);
4704 }
4705
4706 #[simd_test(enable = "avx2")]
4707 const unsafe fn test_mm_maskstore_epi32() {
4708 let a = _mm_setr_epi32(1, 2, 3, 4);
4709 let mut arr = [-1, -1, -1, -1];
4710 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4711 _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4712 let e = [1, -1, -1, 4];
4713 assert_eq!(arr, e);
4714 }
4715
4716 #[simd_test(enable = "avx2")]
4717 const unsafe fn test_mm256_maskstore_epi32() {
4718 let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4719 let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4720 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4721 _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4722 let e = [1, -1, -1, 42, -1, 6, 7, -1];
4723 assert_eq!(arr, e);
4724 }
4725
4726 #[simd_test(enable = "avx2")]
4727 const unsafe fn test_mm_maskstore_epi64() {
4728 let a = _mm_setr_epi64x(1_i64, 2_i64);
4729 let mut arr = [-1_i64, -1_i64];
4730 let mask = _mm_setr_epi64x(0, -1);
4731 _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4732 let e = [-1, 2];
4733 assert_eq!(arr, e);
4734 }
4735
4736 #[simd_test(enable = "avx2")]
4737 const unsafe fn test_mm256_maskstore_epi64() {
4738 let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4739 let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
4740 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4741 _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4742 let e = [-1, 2, 3, -1];
4743 assert_eq!(arr, e);
4744 }
4745
4746 #[simd_test(enable = "avx2")]
4747 const unsafe fn test_mm256_max_epi16() {
4748 let a = _mm256_set1_epi16(2);
4749 let b = _mm256_set1_epi16(4);
4750 let r = _mm256_max_epi16(a, b);
4751 assert_eq_m256i(r, b);
4752 }
4753
4754 #[simd_test(enable = "avx2")]
4755 const unsafe fn test_mm256_max_epi32() {
4756 let a = _mm256_set1_epi32(2);
4757 let b = _mm256_set1_epi32(4);
4758 let r = _mm256_max_epi32(a, b);
4759 assert_eq_m256i(r, b);
4760 }
4761
4762 #[simd_test(enable = "avx2")]
4763 const unsafe fn test_mm256_max_epi8() {
4764 let a = _mm256_set1_epi8(2);
4765 let b = _mm256_set1_epi8(4);
4766 let r = _mm256_max_epi8(a, b);
4767 assert_eq_m256i(r, b);
4768 }
4769
4770 #[simd_test(enable = "avx2")]
4771 const unsafe fn test_mm256_max_epu16() {
4772 let a = _mm256_set1_epi16(2);
4773 let b = _mm256_set1_epi16(4);
4774 let r = _mm256_max_epu16(a, b);
4775 assert_eq_m256i(r, b);
4776 }
4777
4778 #[simd_test(enable = "avx2")]
4779 const unsafe fn test_mm256_max_epu32() {
4780 let a = _mm256_set1_epi32(2);
4781 let b = _mm256_set1_epi32(4);
4782 let r = _mm256_max_epu32(a, b);
4783 assert_eq_m256i(r, b);
4784 }
4785
4786 #[simd_test(enable = "avx2")]
4787 const unsafe fn test_mm256_max_epu8() {
4788 let a = _mm256_set1_epi8(2);
4789 let b = _mm256_set1_epi8(4);
4790 let r = _mm256_max_epu8(a, b);
4791 assert_eq_m256i(r, b);
4792 }
4793
4794 #[simd_test(enable = "avx2")]
4795 const unsafe fn test_mm256_min_epi16() {
4796 let a = _mm256_set1_epi16(2);
4797 let b = _mm256_set1_epi16(4);
4798 let r = _mm256_min_epi16(a, b);
4799 assert_eq_m256i(r, a);
4800 }
4801
4802 #[simd_test(enable = "avx2")]
4803 const unsafe fn test_mm256_min_epi32() {
4804 let a = _mm256_set1_epi32(2);
4805 let b = _mm256_set1_epi32(4);
4806 let r = _mm256_min_epi32(a, b);
4807 assert_eq_m256i(r, a);
4808 }
4809
4810 #[simd_test(enable = "avx2")]
4811 const unsafe fn test_mm256_min_epi8() {
4812 let a = _mm256_set1_epi8(2);
4813 let b = _mm256_set1_epi8(4);
4814 let r = _mm256_min_epi8(a, b);
4815 assert_eq_m256i(r, a);
4816 }
4817
4818 #[simd_test(enable = "avx2")]
4819 const unsafe fn test_mm256_min_epu16() {
4820 let a = _mm256_set1_epi16(2);
4821 let b = _mm256_set1_epi16(4);
4822 let r = _mm256_min_epu16(a, b);
4823 assert_eq_m256i(r, a);
4824 }
4825
4826 #[simd_test(enable = "avx2")]
4827 const unsafe fn test_mm256_min_epu32() {
4828 let a = _mm256_set1_epi32(2);
4829 let b = _mm256_set1_epi32(4);
4830 let r = _mm256_min_epu32(a, b);
4831 assert_eq_m256i(r, a);
4832 }
4833
4834 #[simd_test(enable = "avx2")]
4835 const unsafe fn test_mm256_min_epu8() {
4836 let a = _mm256_set1_epi8(2);
4837 let b = _mm256_set1_epi8(4);
4838 let r = _mm256_min_epu8(a, b);
4839 assert_eq_m256i(r, a);
4840 }
4841
4842 #[simd_test(enable = "avx2")]
4843 const unsafe fn test_mm256_movemask_epi8() {
4844 let a = _mm256_set1_epi8(-1);
4845 let r = _mm256_movemask_epi8(a);
4846 let e = -1;
4847 assert_eq!(r, e);
4848 }
4849
4850 #[simd_test(enable = "avx2")]
4851 unsafe fn test_mm256_mpsadbw_epu8() {
4852 let a = _mm256_set1_epi8(2);
4853 let b = _mm256_set1_epi8(4);
4854 let r = _mm256_mpsadbw_epu8::<0>(a, b);
4855 let e = _mm256_set1_epi16(8);
4856 assert_eq_m256i(r, e);
4857 }
4858
4859 #[simd_test(enable = "avx2")]
4860 const unsafe fn test_mm256_mul_epi32() {
4861 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4862 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4863 let r = _mm256_mul_epi32(a, b);
4864 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4865 assert_eq_m256i(r, e);
4866 }
4867
4868 #[simd_test(enable = "avx2")]
4869 const unsafe fn test_mm256_mul_epu32() {
4870 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4871 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4872 let r = _mm256_mul_epu32(a, b);
4873 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4874 assert_eq_m256i(r, e);
4875 }
4876
4877 #[simd_test(enable = "avx2")]
4878 const unsafe fn test_mm256_mulhi_epi16() {
4879 let a = _mm256_set1_epi16(6535);
4880 let b = _mm256_set1_epi16(6535);
4881 let r = _mm256_mulhi_epi16(a, b);
4882 let e = _mm256_set1_epi16(651);
4883 assert_eq_m256i(r, e);
4884 }
4885
4886 #[simd_test(enable = "avx2")]
4887 const unsafe fn test_mm256_mulhi_epu16() {
4888 let a = _mm256_set1_epi16(6535);
4889 let b = _mm256_set1_epi16(6535);
4890 let r = _mm256_mulhi_epu16(a, b);
4891 let e = _mm256_set1_epi16(651);
4892 assert_eq_m256i(r, e);
4893 }
4894
4895 #[simd_test(enable = "avx2")]
4896 const unsafe fn test_mm256_mullo_epi16() {
4897 let a = _mm256_set1_epi16(2);
4898 let b = _mm256_set1_epi16(4);
4899 let r = _mm256_mullo_epi16(a, b);
4900 let e = _mm256_set1_epi16(8);
4901 assert_eq_m256i(r, e);
4902 }
4903
4904 #[simd_test(enable = "avx2")]
4905 const unsafe fn test_mm256_mullo_epi32() {
4906 let a = _mm256_set1_epi32(2);
4907 let b = _mm256_set1_epi32(4);
4908 let r = _mm256_mullo_epi32(a, b);
4909 let e = _mm256_set1_epi32(8);
4910 assert_eq_m256i(r, e);
4911 }
4912
4913 #[simd_test(enable = "avx2")]
4914 unsafe fn test_mm256_mulhrs_epi16() {
4915 let a = _mm256_set1_epi16(2);
4916 let b = _mm256_set1_epi16(4);
4917 let r = _mm256_mullo_epi16(a, b);
4918 let e = _mm256_set1_epi16(8);
4919 assert_eq_m256i(r, e);
4920 }
4921
4922 #[simd_test(enable = "avx2")]
4923 const unsafe fn test_mm256_or_si256() {
4924 let a = _mm256_set1_epi8(-1);
4925 let b = _mm256_set1_epi8(0);
4926 let r = _mm256_or_si256(a, b);
4927 assert_eq_m256i(r, a);
4928 }
4929
4930 #[simd_test(enable = "avx2")]
4931 unsafe fn test_mm256_packs_epi16() {
4932 let a = _mm256_set1_epi16(2);
4933 let b = _mm256_set1_epi16(4);
4934 let r = _mm256_packs_epi16(a, b);
4935 #[rustfmt::skip]
4936 let e = _mm256_setr_epi8(
4937 2, 2, 2, 2, 2, 2, 2, 2,
4938 4, 4, 4, 4, 4, 4, 4, 4,
4939 2, 2, 2, 2, 2, 2, 2, 2,
4940 4, 4, 4, 4, 4, 4, 4, 4,
4941 );
4942
4943 assert_eq_m256i(r, e);
4944 }
4945
4946 #[simd_test(enable = "avx2")]
4947 unsafe fn test_mm256_packs_epi32() {
4948 let a = _mm256_set1_epi32(2);
4949 let b = _mm256_set1_epi32(4);
4950 let r = _mm256_packs_epi32(a, b);
4951 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4952
4953 assert_eq_m256i(r, e);
4954 }
4955
4956 #[simd_test(enable = "avx2")]
4957 unsafe fn test_mm256_packus_epi16() {
4958 let a = _mm256_set1_epi16(2);
4959 let b = _mm256_set1_epi16(4);
4960 let r = _mm256_packus_epi16(a, b);
4961 #[rustfmt::skip]
4962 let e = _mm256_setr_epi8(
4963 2, 2, 2, 2, 2, 2, 2, 2,
4964 4, 4, 4, 4, 4, 4, 4, 4,
4965 2, 2, 2, 2, 2, 2, 2, 2,
4966 4, 4, 4, 4, 4, 4, 4, 4,
4967 );
4968
4969 assert_eq_m256i(r, e);
4970 }
4971
4972 #[simd_test(enable = "avx2")]
4973 unsafe fn test_mm256_packus_epi32() {
4974 let a = _mm256_set1_epi32(2);
4975 let b = _mm256_set1_epi32(4);
4976 let r = _mm256_packus_epi32(a, b);
4977 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4978
4979 assert_eq_m256i(r, e);
4980 }
4981
4982 #[simd_test(enable = "avx2")]
4983 unsafe fn test_mm256_sad_epu8() {
4984 let a = _mm256_set1_epi8(2);
4985 let b = _mm256_set1_epi8(4);
4986 let r = _mm256_sad_epu8(a, b);
4987 let e = _mm256_set1_epi64x(16);
4988 assert_eq_m256i(r, e);
4989 }
4990
4991 #[simd_test(enable = "avx2")]
4992 const unsafe fn test_mm256_shufflehi_epi16() {
4993 #[rustfmt::skip]
4994 let a = _mm256_setr_epi16(
4995 0, 1, 2, 3, 11, 22, 33, 44,
4996 4, 5, 6, 7, 55, 66, 77, 88,
4997 );
4998 #[rustfmt::skip]
4999 let e = _mm256_setr_epi16(
5000 0, 1, 2, 3, 44, 22, 22, 11,
5001 4, 5, 6, 7, 88, 66, 66, 55,
5002 );
5003 let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
5004 assert_eq_m256i(r, e);
5005 }
5006
5007 #[simd_test(enable = "avx2")]
5008 const unsafe fn test_mm256_shufflelo_epi16() {
5009 #[rustfmt::skip]
5010 let a = _mm256_setr_epi16(
5011 11, 22, 33, 44, 0, 1, 2, 3,
5012 55, 66, 77, 88, 4, 5, 6, 7,
5013 );
5014 #[rustfmt::skip]
5015 let e = _mm256_setr_epi16(
5016 44, 22, 22, 11, 0, 1, 2, 3,
5017 88, 66, 66, 55, 4, 5, 6, 7,
5018 );
5019 let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
5020 assert_eq_m256i(r, e);
5021 }
5022
5023 #[simd_test(enable = "avx2")]
5024 unsafe fn test_mm256_sign_epi16() {
5025 let a = _mm256_set1_epi16(2);
5026 let b = _mm256_set1_epi16(-1);
5027 let r = _mm256_sign_epi16(a, b);
5028 let e = _mm256_set1_epi16(-2);
5029 assert_eq_m256i(r, e);
5030 }
5031
5032 #[simd_test(enable = "avx2")]
5033 unsafe fn test_mm256_sign_epi32() {
5034 let a = _mm256_set1_epi32(2);
5035 let b = _mm256_set1_epi32(-1);
5036 let r = _mm256_sign_epi32(a, b);
5037 let e = _mm256_set1_epi32(-2);
5038 assert_eq_m256i(r, e);
5039 }
5040
5041 #[simd_test(enable = "avx2")]
5042 unsafe fn test_mm256_sign_epi8() {
5043 let a = _mm256_set1_epi8(2);
5044 let b = _mm256_set1_epi8(-1);
5045 let r = _mm256_sign_epi8(a, b);
5046 let e = _mm256_set1_epi8(-2);
5047 assert_eq_m256i(r, e);
5048 }
5049
5050 #[simd_test(enable = "avx2")]
5051 unsafe fn test_mm256_sll_epi16() {
5052 let a = _mm256_set1_epi16(0xFF);
5053 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5054 let r = _mm256_sll_epi16(a, b);
5055 assert_eq_m256i(r, _mm256_set1_epi16(0xFF0));
5056 }
5057
5058 #[simd_test(enable = "avx2")]
5059 unsafe fn test_mm256_sll_epi32() {
5060 let a = _mm256_set1_epi32(0xFFFF);
5061 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5062 let r = _mm256_sll_epi32(a, b);
5063 assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
5064 }
5065
5066 #[simd_test(enable = "avx2")]
5067 unsafe fn test_mm256_sll_epi64() {
5068 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5069 let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
5070 let r = _mm256_sll_epi64(a, b);
5071 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0));
5072 }
5073
5074 #[simd_test(enable = "avx2")]
5075 const unsafe fn test_mm256_slli_epi16() {
5076 assert_eq_m256i(
5077 _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5078 _mm256_set1_epi16(0xFF0),
5079 );
5080 }
5081
5082 #[simd_test(enable = "avx2")]
5083 const unsafe fn test_mm256_slli_epi32() {
5084 assert_eq_m256i(
5085 _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5086 _mm256_set1_epi32(0xFFFF0),
5087 );
5088 }
5089
5090 #[simd_test(enable = "avx2")]
5091 const unsafe fn test_mm256_slli_epi64() {
5092 assert_eq_m256i(
5093 _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5094 _mm256_set1_epi64x(0xFFFFFFFF0),
5095 );
5096 }
5097
5098 #[simd_test(enable = "avx2")]
5099 const unsafe fn test_mm256_slli_si256() {
5100 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5101 let r = _mm256_slli_si256::<3>(a);
5102 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
5103 }
5104
5105 #[simd_test(enable = "avx2")]
5106 const unsafe fn test_mm_sllv_epi32() {
5107 let a = _mm_set1_epi32(2);
5108 let b = _mm_set1_epi32(1);
5109 let r = _mm_sllv_epi32(a, b);
5110 let e = _mm_set1_epi32(4);
5111 assert_eq_m128i(r, e);
5112 }
5113
5114 #[simd_test(enable = "avx2")]
5115 const unsafe fn test_mm256_sllv_epi32() {
5116 let a = _mm256_set1_epi32(2);
5117 let b = _mm256_set1_epi32(1);
5118 let r = _mm256_sllv_epi32(a, b);
5119 let e = _mm256_set1_epi32(4);
5120 assert_eq_m256i(r, e);
5121 }
5122
5123 #[simd_test(enable = "avx2")]
5124 const unsafe fn test_mm_sllv_epi64() {
5125 let a = _mm_set1_epi64x(2);
5126 let b = _mm_set1_epi64x(1);
5127 let r = _mm_sllv_epi64(a, b);
5128 let e = _mm_set1_epi64x(4);
5129 assert_eq_m128i(r, e);
5130 }
5131
5132 #[simd_test(enable = "avx2")]
5133 const unsafe fn test_mm256_sllv_epi64() {
5134 let a = _mm256_set1_epi64x(2);
5135 let b = _mm256_set1_epi64x(1);
5136 let r = _mm256_sllv_epi64(a, b);
5137 let e = _mm256_set1_epi64x(4);
5138 assert_eq_m256i(r, e);
5139 }
5140
5141 #[simd_test(enable = "avx2")]
5142 unsafe fn test_mm256_sra_epi16() {
5143 let a = _mm256_set1_epi16(-1);
5144 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5145 let r = _mm256_sra_epi16(a, b);
5146 assert_eq_m256i(r, _mm256_set1_epi16(-1));
5147 }
5148
5149 #[simd_test(enable = "avx2")]
5150 unsafe fn test_mm256_sra_epi32() {
5151 let a = _mm256_set1_epi32(-1);
5152 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
5153 let r = _mm256_sra_epi32(a, b);
5154 assert_eq_m256i(r, _mm256_set1_epi32(-1));
5155 }
5156
5157 #[simd_test(enable = "avx2")]
5158 const unsafe fn test_mm256_srai_epi16() {
5159 assert_eq_m256i(
5160 _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
5161 _mm256_set1_epi16(-1),
5162 );
5163 }
5164
5165 #[simd_test(enable = "avx2")]
5166 const unsafe fn test_mm256_srai_epi32() {
5167 assert_eq_m256i(
5168 _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
5169 _mm256_set1_epi32(-1),
5170 );
5171 }
5172
5173 #[simd_test(enable = "avx2")]
5174 const unsafe fn test_mm_srav_epi32() {
5175 let a = _mm_set1_epi32(4);
5176 let count = _mm_set1_epi32(1);
5177 let r = _mm_srav_epi32(a, count);
5178 let e = _mm_set1_epi32(2);
5179 assert_eq_m128i(r, e);
5180 }
5181
5182 #[simd_test(enable = "avx2")]
5183 const unsafe fn test_mm256_srav_epi32() {
5184 let a = _mm256_set1_epi32(4);
5185 let count = _mm256_set1_epi32(1);
5186 let r = _mm256_srav_epi32(a, count);
5187 let e = _mm256_set1_epi32(2);
5188 assert_eq_m256i(r, e);
5189 }
5190
5191 #[simd_test(enable = "avx2")]
5192 const unsafe fn test_mm256_srli_si256() {
5193 #[rustfmt::skip]
5194 let a = _mm256_setr_epi8(
5195 1, 2, 3, 4, 5, 6, 7, 8,
5196 9, 10, 11, 12, 13, 14, 15, 16,
5197 17, 18, 19, 20, 21, 22, 23, 24,
5198 25, 26, 27, 28, 29, 30, 31, 32,
5199 );
5200 let r = _mm256_srli_si256::<3>(a);
5201 #[rustfmt::skip]
5202 let e = _mm256_setr_epi8(
5203 4, 5, 6, 7, 8, 9, 10, 11,
5204 12, 13, 14, 15, 16, 0, 0, 0,
5205 20, 21, 22, 23, 24, 25, 26, 27,
5206 28, 29, 30, 31, 32, 0, 0, 0,
5207 );
5208 assert_eq_m256i(r, e);
5209 }
5210
5211 #[simd_test(enable = "avx2")]
5212 unsafe fn test_mm256_srl_epi16() {
5213 let a = _mm256_set1_epi16(0xFF);
5214 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5215 let r = _mm256_srl_epi16(a, b);
5216 assert_eq_m256i(r, _mm256_set1_epi16(0xF));
5217 }
5218
5219 #[simd_test(enable = "avx2")]
5220 unsafe fn test_mm256_srl_epi32() {
5221 let a = _mm256_set1_epi32(0xFFFF);
5222 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5223 let r = _mm256_srl_epi32(a, b);
5224 assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
5225 }
5226
5227 #[simd_test(enable = "avx2")]
5228 unsafe fn test_mm256_srl_epi64() {
5229 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5230 let b = _mm_setr_epi64x(4, 0);
5231 let r = _mm256_srl_epi64(a, b);
5232 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF));
5233 }
5234
5235 #[simd_test(enable = "avx2")]
5236 const unsafe fn test_mm256_srli_epi16() {
5237 assert_eq_m256i(
5238 _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5239 _mm256_set1_epi16(0xF),
5240 );
5241 }
5242
5243 #[simd_test(enable = "avx2")]
5244 const unsafe fn test_mm256_srli_epi32() {
5245 assert_eq_m256i(
5246 _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5247 _mm256_set1_epi32(0xFFF),
5248 );
5249 }
5250
5251 #[simd_test(enable = "avx2")]
5252 const unsafe fn test_mm256_srli_epi64() {
5253 assert_eq_m256i(
5254 _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5255 _mm256_set1_epi64x(0xFFFFFFF),
5256 );
5257 }
5258
5259 #[simd_test(enable = "avx2")]
5260 const unsafe fn test_mm_srlv_epi32() {
5261 let a = _mm_set1_epi32(2);
5262 let count = _mm_set1_epi32(1);
5263 let r = _mm_srlv_epi32(a, count);
5264 let e = _mm_set1_epi32(1);
5265 assert_eq_m128i(r, e);
5266 }
5267
5268 #[simd_test(enable = "avx2")]
5269 const unsafe fn test_mm256_srlv_epi32() {
5270 let a = _mm256_set1_epi32(2);
5271 let count = _mm256_set1_epi32(1);
5272 let r = _mm256_srlv_epi32(a, count);
5273 let e = _mm256_set1_epi32(1);
5274 assert_eq_m256i(r, e);
5275 }
5276
5277 #[simd_test(enable = "avx2")]
5278 const unsafe fn test_mm_srlv_epi64() {
5279 let a = _mm_set1_epi64x(2);
5280 let count = _mm_set1_epi64x(1);
5281 let r = _mm_srlv_epi64(a, count);
5282 let e = _mm_set1_epi64x(1);
5283 assert_eq_m128i(r, e);
5284 }
5285
5286 #[simd_test(enable = "avx2")]
5287 const unsafe fn test_mm256_srlv_epi64() {
5288 let a = _mm256_set1_epi64x(2);
5289 let count = _mm256_set1_epi64x(1);
5290 let r = _mm256_srlv_epi64(a, count);
5291 let e = _mm256_set1_epi64x(1);
5292 assert_eq_m256i(r, e);
5293 }
5294
5295 #[simd_test(enable = "avx2")]
5296 unsafe fn test_mm256_stream_load_si256() {
5297 let a = _mm256_set_epi64x(5, 6, 7, 8);
5298 let r = _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _);
5299 assert_eq_m256i(a, r);
5300 }
5301
5302 #[simd_test(enable = "avx2")]
5303 const unsafe fn test_mm256_sub_epi16() {
5304 let a = _mm256_set1_epi16(4);
5305 let b = _mm256_set1_epi16(2);
5306 let r = _mm256_sub_epi16(a, b);
5307 assert_eq_m256i(r, b);
5308 }
5309
5310 #[simd_test(enable = "avx2")]
5311 const unsafe fn test_mm256_sub_epi32() {
5312 let a = _mm256_set1_epi32(4);
5313 let b = _mm256_set1_epi32(2);
5314 let r = _mm256_sub_epi32(a, b);
5315 assert_eq_m256i(r, b);
5316 }
5317
5318 #[simd_test(enable = "avx2")]
5319 const unsafe fn test_mm256_sub_epi64() {
5320 let a = _mm256_set1_epi64x(4);
5321 let b = _mm256_set1_epi64x(2);
5322 let r = _mm256_sub_epi64(a, b);
5323 assert_eq_m256i(r, b);
5324 }
5325
5326 #[simd_test(enable = "avx2")]
5327 const unsafe fn test_mm256_sub_epi8() {
5328 let a = _mm256_set1_epi8(4);
5329 let b = _mm256_set1_epi8(2);
5330 let r = _mm256_sub_epi8(a, b);
5331 assert_eq_m256i(r, b);
5332 }
5333
5334 #[simd_test(enable = "avx2")]
5335 const unsafe fn test_mm256_subs_epi16() {
5336 let a = _mm256_set1_epi16(4);
5337 let b = _mm256_set1_epi16(2);
5338 let r = _mm256_subs_epi16(a, b);
5339 assert_eq_m256i(r, b);
5340 }
5341
5342 #[simd_test(enable = "avx2")]
5343 const unsafe fn test_mm256_subs_epi8() {
5344 let a = _mm256_set1_epi8(4);
5345 let b = _mm256_set1_epi8(2);
5346 let r = _mm256_subs_epi8(a, b);
5347 assert_eq_m256i(r, b);
5348 }
5349
5350 #[simd_test(enable = "avx2")]
5351 const unsafe fn test_mm256_subs_epu16() {
5352 let a = _mm256_set1_epi16(4);
5353 let b = _mm256_set1_epi16(2);
5354 let r = _mm256_subs_epu16(a, b);
5355 assert_eq_m256i(r, b);
5356 }
5357
5358 #[simd_test(enable = "avx2")]
5359 const unsafe fn test_mm256_subs_epu8() {
5360 let a = _mm256_set1_epi8(4);
5361 let b = _mm256_set1_epi8(2);
5362 let r = _mm256_subs_epu8(a, b);
5363 assert_eq_m256i(r, b);
5364 }
5365
5366 #[simd_test(enable = "avx2")]
5367 const unsafe fn test_mm256_xor_si256() {
5368 let a = _mm256_set1_epi8(5);
5369 let b = _mm256_set1_epi8(3);
5370 let r = _mm256_xor_si256(a, b);
5371 assert_eq_m256i(r, _mm256_set1_epi8(6));
5372 }
5373
5374 #[simd_test(enable = "avx2")]
5375 const unsafe fn test_mm256_alignr_epi8() {
5376 #[rustfmt::skip]
5377 let a = _mm256_setr_epi8(
5378 1, 2, 3, 4, 5, 6, 7, 8,
5379 9, 10, 11, 12, 13, 14, 15, 16,
5380 17, 18, 19, 20, 21, 22, 23, 24,
5381 25, 26, 27, 28, 29, 30, 31, 32,
5382 );
5383 #[rustfmt::skip]
5384 let b = _mm256_setr_epi8(
5385 -1, -2, -3, -4, -5, -6, -7, -8,
5386 -9, -10, -11, -12, -13, -14, -15, -16,
5387 -17, -18, -19, -20, -21, -22, -23, -24,
5388 -25, -26, -27, -28, -29, -30, -31, -32,
5389 );
5390 let r = _mm256_alignr_epi8::<33>(a, b);
5391 assert_eq_m256i(r, _mm256_set1_epi8(0));
5392
5393 let r = _mm256_alignr_epi8::<17>(a, b);
5394 #[rustfmt::skip]
5395 let expected = _mm256_setr_epi8(
5396 2, 3, 4, 5, 6, 7, 8, 9,
5397 10, 11, 12, 13, 14, 15, 16, 0,
5398 18, 19, 20, 21, 22, 23, 24, 25,
5399 26, 27, 28, 29, 30, 31, 32, 0,
5400 );
5401 assert_eq_m256i(r, expected);
5402
5403 let r = _mm256_alignr_epi8::<4>(a, b);
5404 #[rustfmt::skip]
5405 let expected = _mm256_setr_epi8(
5406 -5, -6, -7, -8, -9, -10, -11, -12,
5407 -13, -14, -15, -16, 1, 2, 3, 4,
5408 -21, -22, -23, -24, -25, -26, -27, -28,
5409 -29, -30, -31, -32, 17, 18, 19, 20,
5410 );
5411 assert_eq_m256i(r, expected);
5412
5413 let r = _mm256_alignr_epi8::<15>(a, b);
5414 #[rustfmt::skip]
5415 let expected = _mm256_setr_epi8(
5416 -16, 1, 2, 3, 4, 5, 6, 7,
5417 8, 9, 10, 11, 12, 13, 14, 15,
5418 -32, 17, 18, 19, 20, 21, 22, 23,
5419 24, 25, 26, 27, 28, 29, 30, 31,
5420 );
5421 assert_eq_m256i(r, expected);
5422
5423 let r = _mm256_alignr_epi8::<0>(a, b);
5424 assert_eq_m256i(r, b);
5425
5426 let r = _mm256_alignr_epi8::<16>(a, b);
5427 assert_eq_m256i(r, a);
5428 }
5429
5430 #[simd_test(enable = "avx2")]
5431 unsafe fn test_mm256_shuffle_epi8() {
5432 #[rustfmt::skip]
5433 let a = _mm256_setr_epi8(
5434 1, 2, 3, 4, 5, 6, 7, 8,
5435 9, 10, 11, 12, 13, 14, 15, 16,
5436 17, 18, 19, 20, 21, 22, 23, 24,
5437 25, 26, 27, 28, 29, 30, 31, 32,
5438 );
5439 #[rustfmt::skip]
5440 let b = _mm256_setr_epi8(
5441 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5442 12, 5, 5, 10, 4, 1, 8, 0,
5443 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5444 12, 5, 5, 10, 4, 1, 8, 0,
5445 );
5446 #[rustfmt::skip]
5447 let expected = _mm256_setr_epi8(
5448 5, 0, 5, 4, 9, 13, 7, 4,
5449 13, 6, 6, 11, 5, 2, 9, 1,
5450 21, 0, 21, 20, 25, 29, 23, 20,
5451 29, 22, 22, 27, 21, 18, 25, 17,
5452 );
5453 let r = _mm256_shuffle_epi8(a, b);
5454 assert_eq_m256i(r, expected);
5455 }
5456
5457 #[simd_test(enable = "avx2")]
5458 unsafe fn test_mm256_permutevar8x32_epi32() {
5459 let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5460 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5461 let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5462 let r = _mm256_permutevar8x32_epi32(a, b);
5463 assert_eq_m256i(r, expected);
5464 }
5465
5466 #[simd_test(enable = "avx2")]
5467 const unsafe fn test_mm256_permute4x64_epi64() {
5468 let a = _mm256_setr_epi64x(100, 200, 300, 400);
5469 let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5470 let r = _mm256_permute4x64_epi64::<0b00010011>(a);
5471 assert_eq_m256i(r, expected);
5472 }
5473
5474 #[simd_test(enable = "avx2")]
5475 const unsafe fn test_mm256_permute2x128_si256() {
5476 let a = _mm256_setr_epi64x(100, 200, 500, 600);
5477 let b = _mm256_setr_epi64x(300, 400, 700, 800);
5478 let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
5479 let e = _mm256_setr_epi64x(700, 800, 500, 600);
5480 assert_eq_m256i(r, e);
5481 }
5482
5483 #[simd_test(enable = "avx2")]
5484 const unsafe fn test_mm256_permute4x64_pd() {
5485 let a = _mm256_setr_pd(1., 2., 3., 4.);
5486 let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
5487 let e = _mm256_setr_pd(4., 1., 2., 1.);
5488 assert_eq_m256d(r, e);
5489 }
5490
5491 #[simd_test(enable = "avx2")]
5492 unsafe fn test_mm256_permutevar8x32_ps() {
5493 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5494 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5495 let r = _mm256_permutevar8x32_ps(a, b);
5496 let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5497 assert_eq_m256(r, e);
5498 }
5499
5500 #[simd_test(enable = "avx2")]
5501 unsafe fn test_mm_i32gather_epi32() {
5502 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5503 let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5505 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5506 }
5507
5508 #[simd_test(enable = "avx2")]
5509 unsafe fn test_mm_mask_i32gather_epi32() {
5510 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5511 let r = _mm_mask_i32gather_epi32::<4>(
5513 _mm_set1_epi32(256),
5514 arr.as_ptr(),
5515 _mm_setr_epi32(0, 16, 64, 96),
5516 _mm_setr_epi32(-1, -1, -1, 0),
5517 );
5518 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5519 }
5520
5521 #[simd_test(enable = "avx2")]
5522 unsafe fn test_mm256_i32gather_epi32() {
5523 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5524 let r =
5526 _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5527 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5528 }
5529
5530 #[simd_test(enable = "avx2")]
5531 unsafe fn test_mm256_mask_i32gather_epi32() {
5532 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5533 let r = _mm256_mask_i32gather_epi32::<4>(
5535 _mm256_set1_epi32(256),
5536 arr.as_ptr(),
5537 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5538 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5539 );
5540 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5541 }
5542
5543 #[simd_test(enable = "avx2")]
5544 unsafe fn test_mm_i32gather_ps() {
5545 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5546 let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5548 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5549 }
5550
5551 #[simd_test(enable = "avx2")]
5552 unsafe fn test_mm_mask_i32gather_ps() {
5553 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5554 let r = _mm_mask_i32gather_ps::<4>(
5556 _mm_set1_ps(256.0),
5557 arr.as_ptr(),
5558 _mm_setr_epi32(0, 16, 64, 96),
5559 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5560 );
5561 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5562 }
5563
5564 #[simd_test(enable = "avx2")]
5565 unsafe fn test_mm256_i32gather_ps() {
5566 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5567 let r =
5569 _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5570 assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5571 }
5572
5573 #[simd_test(enable = "avx2")]
5574 unsafe fn test_mm256_mask_i32gather_ps() {
5575 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5576 let r = _mm256_mask_i32gather_ps::<4>(
5578 _mm256_set1_ps(256.0),
5579 arr.as_ptr(),
5580 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5581 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5582 );
5583 assert_eq_m256(
5584 r,
5585 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5586 );
5587 }
5588
5589 #[simd_test(enable = "avx2")]
5590 unsafe fn test_mm_i32gather_epi64() {
5591 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5592 let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5594 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5595 }
5596
5597 #[simd_test(enable = "avx2")]
5598 unsafe fn test_mm_mask_i32gather_epi64() {
5599 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5600 let r = _mm_mask_i32gather_epi64::<8>(
5602 _mm_set1_epi64x(256),
5603 arr.as_ptr(),
5604 _mm_setr_epi32(16, 16, 16, 16),
5605 _mm_setr_epi64x(-1, 0),
5606 );
5607 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5608 }
5609
5610 #[simd_test(enable = "avx2")]
5611 unsafe fn test_mm256_i32gather_epi64() {
5612 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5613 let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5615 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5616 }
5617
5618 #[simd_test(enable = "avx2")]
5619 unsafe fn test_mm256_mask_i32gather_epi64() {
5620 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5621 let r = _mm256_mask_i32gather_epi64::<8>(
5623 _mm256_set1_epi64x(256),
5624 arr.as_ptr(),
5625 _mm_setr_epi32(0, 16, 64, 96),
5626 _mm256_setr_epi64x(-1, -1, -1, 0),
5627 );
5628 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5629 }
5630
5631 #[simd_test(enable = "avx2")]
5632 unsafe fn test_mm_i32gather_pd() {
5633 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5634 let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5636 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5637 }
5638
5639 #[simd_test(enable = "avx2")]
5640 unsafe fn test_mm_mask_i32gather_pd() {
5641 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5642 let r = _mm_mask_i32gather_pd::<8>(
5644 _mm_set1_pd(256.0),
5645 arr.as_ptr(),
5646 _mm_setr_epi32(16, 16, 16, 16),
5647 _mm_setr_pd(-1.0, 0.0),
5648 );
5649 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5650 }
5651
5652 #[simd_test(enable = "avx2")]
5653 unsafe fn test_mm256_i32gather_pd() {
5654 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5655 let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5657 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5658 }
5659
5660 #[simd_test(enable = "avx2")]
5661 unsafe fn test_mm256_mask_i32gather_pd() {
5662 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5663 let r = _mm256_mask_i32gather_pd::<8>(
5665 _mm256_set1_pd(256.0),
5666 arr.as_ptr(),
5667 _mm_setr_epi32(0, 16, 64, 96),
5668 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5669 );
5670 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5671 }
5672
5673 #[simd_test(enable = "avx2")]
5674 unsafe fn test_mm_i64gather_epi32() {
5675 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5676 let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5678 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
5679 }
5680
5681 #[simd_test(enable = "avx2")]
5682 unsafe fn test_mm_mask_i64gather_epi32() {
5683 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5684 let r = _mm_mask_i64gather_epi32::<4>(
5686 _mm_set1_epi32(256),
5687 arr.as_ptr(),
5688 _mm_setr_epi64x(0, 16),
5689 _mm_setr_epi32(-1, 0, -1, 0),
5690 );
5691 assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
5692 }
5693
5694 #[simd_test(enable = "avx2")]
5695 unsafe fn test_mm256_i64gather_epi32() {
5696 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5697 let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5699 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5700 }
5701
5702 #[simd_test(enable = "avx2")]
5703 unsafe fn test_mm256_mask_i64gather_epi32() {
5704 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5705 let r = _mm256_mask_i64gather_epi32::<4>(
5707 _mm_set1_epi32(256),
5708 arr.as_ptr(),
5709 _mm256_setr_epi64x(0, 16, 64, 96),
5710 _mm_setr_epi32(-1, -1, -1, 0),
5711 );
5712 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5713 }
5714
5715 #[simd_test(enable = "avx2")]
5716 unsafe fn test_mm_i64gather_ps() {
5717 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5718 let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5720 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5721 }
5722
5723 #[simd_test(enable = "avx2")]
5724 unsafe fn test_mm_mask_i64gather_ps() {
5725 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5726 let r = _mm_mask_i64gather_ps::<4>(
5728 _mm_set1_ps(256.0),
5729 arr.as_ptr(),
5730 _mm_setr_epi64x(0, 16),
5731 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5732 );
5733 assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5734 }
5735
5736 #[simd_test(enable = "avx2")]
5737 unsafe fn test_mm256_i64gather_ps() {
5738 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5739 let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5741 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5742 }
5743
5744 #[simd_test(enable = "avx2")]
5745 unsafe fn test_mm256_mask_i64gather_ps() {
5746 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5747 let r = _mm256_mask_i64gather_ps::<4>(
5749 _mm_set1_ps(256.0),
5750 arr.as_ptr(),
5751 _mm256_setr_epi64x(0, 16, 64, 96),
5752 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5753 );
5754 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5755 }
5756
5757 #[simd_test(enable = "avx2")]
5758 unsafe fn test_mm_i64gather_epi64() {
5759 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5760 let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5762 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5763 }
5764
5765 #[simd_test(enable = "avx2")]
5766 unsafe fn test_mm_mask_i64gather_epi64() {
5767 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5768 let r = _mm_mask_i64gather_epi64::<8>(
5770 _mm_set1_epi64x(256),
5771 arr.as_ptr(),
5772 _mm_setr_epi64x(16, 16),
5773 _mm_setr_epi64x(-1, 0),
5774 );
5775 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5776 }
5777
5778 #[simd_test(enable = "avx2")]
5779 unsafe fn test_mm256_i64gather_epi64() {
5780 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5781 let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5783 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5784 }
5785
5786 #[simd_test(enable = "avx2")]
5787 unsafe fn test_mm256_mask_i64gather_epi64() {
5788 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5789 let r = _mm256_mask_i64gather_epi64::<8>(
5791 _mm256_set1_epi64x(256),
5792 arr.as_ptr(),
5793 _mm256_setr_epi64x(0, 16, 64, 96),
5794 _mm256_setr_epi64x(-1, -1, -1, 0),
5795 );
5796 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5797 }
5798
5799 #[simd_test(enable = "avx2")]
5800 unsafe fn test_mm_i64gather_pd() {
5801 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5802 let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5804 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5805 }
5806
5807 #[simd_test(enable = "avx2")]
5808 unsafe fn test_mm_mask_i64gather_pd() {
5809 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5810 let r = _mm_mask_i64gather_pd::<8>(
5812 _mm_set1_pd(256.0),
5813 arr.as_ptr(),
5814 _mm_setr_epi64x(16, 16),
5815 _mm_setr_pd(-1.0, 0.0),
5816 );
5817 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5818 }
5819
5820 #[simd_test(enable = "avx2")]
5821 unsafe fn test_mm256_i64gather_pd() {
5822 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5823 let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5825 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5826 }
5827
5828 #[simd_test(enable = "avx2")]
5829 unsafe fn test_mm256_mask_i64gather_pd() {
5830 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5831 let r = _mm256_mask_i64gather_pd::<8>(
5833 _mm256_set1_pd(256.0),
5834 arr.as_ptr(),
5835 _mm256_setr_epi64x(0, 16, 64, 96),
5836 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5837 );
5838 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5839 }
5840
5841 #[simd_test(enable = "avx2")]
5842 const unsafe fn test_mm256_extract_epi8() {
5843 #[rustfmt::skip]
5844 let a = _mm256_setr_epi8(
5845 -1, 1, 2, 3, 4, 5, 6, 7,
5846 8, 9, 10, 11, 12, 13, 14, 15,
5847 16, 17, 18, 19, 20, 21, 22, 23,
5848 24, 25, 26, 27, 28, 29, 30, 31
5849 );
5850 let r1 = _mm256_extract_epi8::<0>(a);
5851 let r2 = _mm256_extract_epi8::<3>(a);
5852 assert_eq!(r1, 0xFF);
5853 assert_eq!(r2, 3);
5854 }
5855
5856 #[simd_test(enable = "avx2")]
5857 const unsafe fn test_mm256_extract_epi16() {
5858 #[rustfmt::skip]
5859 let a = _mm256_setr_epi16(
5860 -1, 1, 2, 3, 4, 5, 6, 7,
5861 8, 9, 10, 11, 12, 13, 14, 15,
5862 );
5863 let r1 = _mm256_extract_epi16::<0>(a);
5864 let r2 = _mm256_extract_epi16::<3>(a);
5865 assert_eq!(r1, 0xFFFF);
5866 assert_eq!(r2, 3);
5867 }
5868}