Skip to main content

core/stdarch/crates/core_arch/src/x86/
avx512bw.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4    ptr,
5};
6
7#[cfg(test)]
8use stdarch_test::assert_instr;
9
10/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
11///
12/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi16&expand=30)
13#[inline]
14#[target_feature(enable = "avx512bw")]
15#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16#[cfg_attr(test, assert_instr(vpabsw))]
17#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
18pub const fn _mm512_abs_epi16(a: __m512i) -> __m512i {
19    unsafe {
20        let a = a.as_i16x32();
21        let cmp: i16x32 = simd_gt(a, i16x32::ZERO);
22        transmute(simd_select(cmp, a, simd_neg(a)))
23    }
24}
25
26/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27///
28/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi16&expand=31)
29#[inline]
30#[target_feature(enable = "avx512bw")]
31#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32#[cfg_attr(test, assert_instr(vpabsw))]
33#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34pub const fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
35    unsafe {
36        let abs = _mm512_abs_epi16(a).as_i16x32();
37        transmute(simd_select_bitmask(k, abs, src.as_i16x32()))
38    }
39}
40
41/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
42///
43/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi16&expand=32)
44#[inline]
45#[target_feature(enable = "avx512bw")]
46#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
47#[cfg_attr(test, assert_instr(vpabsw))]
48#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
49pub const fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
50    unsafe {
51        let abs = _mm512_abs_epi16(a).as_i16x32();
52        transmute(simd_select_bitmask(k, abs, i16x32::ZERO))
53    }
54}
55
56/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
57///
58/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi16&expand=28)
59#[inline]
60#[target_feature(enable = "avx512bw,avx512vl")]
61#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
62#[cfg_attr(test, assert_instr(vpabsw))]
63#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
64pub const fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
65    unsafe {
66        let abs = _mm256_abs_epi16(a).as_i16x16();
67        transmute(simd_select_bitmask(k, abs, src.as_i16x16()))
68    }
69}
70
71/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
72///
73/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi16&expand=29)
74#[inline]
75#[target_feature(enable = "avx512bw,avx512vl")]
76#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
77#[cfg_attr(test, assert_instr(vpabsw))]
78#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
79pub const fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
80    unsafe {
81        let abs = _mm256_abs_epi16(a).as_i16x16();
82        transmute(simd_select_bitmask(k, abs, i16x16::ZERO))
83    }
84}
85
86/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
87///
88/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi16&expand=25)
89#[inline]
90#[target_feature(enable = "avx512bw,avx512vl")]
91#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
92#[cfg_attr(test, assert_instr(vpabsw))]
93#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
94pub const fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
95    unsafe {
96        let abs = _mm_abs_epi16(a).as_i16x8();
97        transmute(simd_select_bitmask(k, abs, src.as_i16x8()))
98    }
99}
100
101/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
102///
103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi16&expand=26)
104#[inline]
105#[target_feature(enable = "avx512bw,avx512vl")]
106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
107#[cfg_attr(test, assert_instr(vpabsw))]
108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
109pub const fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
110    unsafe {
111        let abs = _mm_abs_epi16(a).as_i16x8();
112        transmute(simd_select_bitmask(k, abs, i16x8::ZERO))
113    }
114}
115
116/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi8&expand=57)
119#[inline]
120#[target_feature(enable = "avx512bw")]
121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
122#[cfg_attr(test, assert_instr(vpabsb))]
123#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
124pub const fn _mm512_abs_epi8(a: __m512i) -> __m512i {
125    unsafe {
126        let a = a.as_i8x64();
127        let cmp: i8x64 = simd_gt(a, i8x64::ZERO);
128        transmute(simd_select(cmp, a, simd_neg(a)))
129    }
130}
131
132/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
133///
134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi8&expand=58)
135#[inline]
136#[target_feature(enable = "avx512bw")]
137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
138#[cfg_attr(test, assert_instr(vpabsb))]
139#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
140pub const fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
141    unsafe {
142        let abs = _mm512_abs_epi8(a).as_i8x64();
143        transmute(simd_select_bitmask(k, abs, src.as_i8x64()))
144    }
145}
146
147/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
148///
149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi8&expand=59)
150#[inline]
151#[target_feature(enable = "avx512bw")]
152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
153#[cfg_attr(test, assert_instr(vpabsb))]
154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
155pub const fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
156    unsafe {
157        let abs = _mm512_abs_epi8(a).as_i8x64();
158        transmute(simd_select_bitmask(k, abs, i8x64::ZERO))
159    }
160}
161
162/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
163///
164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi8&expand=55)
165#[inline]
166#[target_feature(enable = "avx512bw,avx512vl")]
167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
168#[cfg_attr(test, assert_instr(vpabsb))]
169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
170pub const fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
171    unsafe {
172        let abs = _mm256_abs_epi8(a).as_i8x32();
173        transmute(simd_select_bitmask(k, abs, src.as_i8x32()))
174    }
175}
176
177/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
178///
179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi8&expand=56)
180#[inline]
181#[target_feature(enable = "avx512bw,avx512vl")]
182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
183#[cfg_attr(test, assert_instr(vpabsb))]
184#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
185pub const fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
186    unsafe {
187        let abs = _mm256_abs_epi8(a).as_i8x32();
188        transmute(simd_select_bitmask(k, abs, i8x32::ZERO))
189    }
190}
191
192/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set)
193///
194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi8&expand=52)
195#[inline]
196#[target_feature(enable = "avx512bw,avx512vl")]
197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
198#[cfg_attr(test, assert_instr(vpabsb))]
199#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
200pub const fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
201    unsafe {
202        let abs = _mm_abs_epi8(a).as_i8x16();
203        transmute(simd_select_bitmask(k, abs, src.as_i8x16()))
204    }
205}
206
207/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
208///
209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi8&expand=53)
210#[inline]
211#[target_feature(enable = "avx512bw,avx512vl")]
212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
213#[cfg_attr(test, assert_instr(vpabsb))]
214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
215pub const fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
216    unsafe {
217        let abs = _mm_abs_epi8(a).as_i8x16();
218        transmute(simd_select_bitmask(k, abs, i8x16::ZERO))
219    }
220}
221
222/// Add packed 16-bit integers in a and b, and store the results in dst.
223///
224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi16&expand=91)
225#[inline]
226#[target_feature(enable = "avx512bw")]
227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
228#[cfg_attr(test, assert_instr(vpaddw))]
229#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
230pub const fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
231    unsafe { transmute(simd_add(a.as_i16x32(), b.as_i16x32())) }
232}
233
234/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
235///
236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi16&expand=92)
237#[inline]
238#[target_feature(enable = "avx512bw")]
239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
240#[cfg_attr(test, assert_instr(vpaddw))]
241#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
242pub const fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
243    unsafe {
244        let add = _mm512_add_epi16(a, b).as_i16x32();
245        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
246    }
247}
248
249/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
250///
251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi16&expand=93)
252#[inline]
253#[target_feature(enable = "avx512bw")]
254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
255#[cfg_attr(test, assert_instr(vpaddw))]
256#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
257pub const fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
258    unsafe {
259        let add = _mm512_add_epi16(a, b).as_i16x32();
260        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
261    }
262}
263
264/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
265///
266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi16&expand=89)
267#[inline]
268#[target_feature(enable = "avx512bw,avx512vl")]
269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
270#[cfg_attr(test, assert_instr(vpaddw))]
271#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
272pub const fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
273    unsafe {
274        let add = _mm256_add_epi16(a, b).as_i16x16();
275        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
276    }
277}
278
279/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
280///
281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi16&expand=90)
282#[inline]
283#[target_feature(enable = "avx512bw,avx512vl")]
284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
285#[cfg_attr(test, assert_instr(vpaddw))]
286#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
287pub const fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
288    unsafe {
289        let add = _mm256_add_epi16(a, b).as_i16x16();
290        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
291    }
292}
293
294/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
295///
296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi16&expand=86)
297#[inline]
298#[target_feature(enable = "avx512bw,avx512vl")]
299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
300#[cfg_attr(test, assert_instr(vpaddw))]
301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
302pub const fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
303    unsafe {
304        let add = _mm_add_epi16(a, b).as_i16x8();
305        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
306    }
307}
308
309/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
310///
311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi16&expand=87)
312#[inline]
313#[target_feature(enable = "avx512bw,avx512vl")]
314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
315#[cfg_attr(test, assert_instr(vpaddw))]
316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317pub const fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
318    unsafe {
319        let add = _mm_add_epi16(a, b).as_i16x8();
320        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
321    }
322}
323
324/// Add packed 8-bit integers in a and b, and store the results in dst.
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi8&expand=118)
327#[inline]
328#[target_feature(enable = "avx512bw")]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[cfg_attr(test, assert_instr(vpaddb))]
331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332pub const fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
333    unsafe { transmute(simd_add(a.as_i8x64(), b.as_i8x64())) }
334}
335
336/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
337///
338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi8&expand=119)
339#[inline]
340#[target_feature(enable = "avx512bw")]
341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
342#[cfg_attr(test, assert_instr(vpaddb))]
343#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
344pub const fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
345    unsafe {
346        let add = _mm512_add_epi8(a, b).as_i8x64();
347        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
348    }
349}
350
351/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
352///
353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi8&expand=120)
354#[inline]
355#[target_feature(enable = "avx512bw")]
356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
357#[cfg_attr(test, assert_instr(vpaddb))]
358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
359pub const fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
360    unsafe {
361        let add = _mm512_add_epi8(a, b).as_i8x64();
362        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
363    }
364}
365
366/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
367///
368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi8&expand=116)
369#[inline]
370#[target_feature(enable = "avx512bw,avx512vl")]
371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
372#[cfg_attr(test, assert_instr(vpaddb))]
373#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
374pub const fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
375    unsafe {
376        let add = _mm256_add_epi8(a, b).as_i8x32();
377        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
378    }
379}
380
381/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
382///
383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi8&expand=117)
384#[inline]
385#[target_feature(enable = "avx512bw,avx512vl")]
386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
387#[cfg_attr(test, assert_instr(vpaddb))]
388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
389pub const fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
390    unsafe {
391        let add = _mm256_add_epi8(a, b).as_i8x32();
392        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
393    }
394}
395
396/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi8&expand=113)
399#[inline]
400#[target_feature(enable = "avx512bw,avx512vl")]
401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
402#[cfg_attr(test, assert_instr(vpaddb))]
403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
404pub const fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
405    unsafe {
406        let add = _mm_add_epi8(a, b).as_i8x16();
407        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
408    }
409}
410
411/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
412///
413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi8&expand=114)
414#[inline]
415#[target_feature(enable = "avx512bw,avx512vl")]
416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
417#[cfg_attr(test, assert_instr(vpaddb))]
418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
419pub const fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
420    unsafe {
421        let add = _mm_add_epi8(a, b).as_i8x16();
422        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
423    }
424}
425
426/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst.
427///
428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu16&expand=197)
429#[inline]
430#[target_feature(enable = "avx512bw")]
431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
432#[cfg_attr(test, assert_instr(vpaddusw))]
433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
434pub const fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
435    unsafe { transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32())) }
436}
437
438/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
439///
440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu16&expand=198)
441#[inline]
442#[target_feature(enable = "avx512bw")]
443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
444#[cfg_attr(test, assert_instr(vpaddusw))]
445#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
446pub const fn _mm512_mask_adds_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
447    unsafe {
448        let add = _mm512_adds_epu16(a, b).as_u16x32();
449        transmute(simd_select_bitmask(k, add, src.as_u16x32()))
450    }
451}
452
453/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
454///
455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu16&expand=199)
456#[inline]
457#[target_feature(enable = "avx512bw")]
458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
459#[cfg_attr(test, assert_instr(vpaddusw))]
460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
461pub const fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
462    unsafe {
463        let add = _mm512_adds_epu16(a, b).as_u16x32();
464        transmute(simd_select_bitmask(k, add, u16x32::ZERO))
465    }
466}
467
468/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
469///
470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu16&expand=195)
471#[inline]
472#[target_feature(enable = "avx512bw,avx512vl")]
473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
474#[cfg_attr(test, assert_instr(vpaddusw))]
475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
476pub const fn _mm256_mask_adds_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
477    unsafe {
478        let add = _mm256_adds_epu16(a, b).as_u16x16();
479        transmute(simd_select_bitmask(k, add, src.as_u16x16()))
480    }
481}
482
483/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
484///
485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu16&expand=196)
486#[inline]
487#[target_feature(enable = "avx512bw,avx512vl")]
488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
489#[cfg_attr(test, assert_instr(vpaddusw))]
490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
491pub const fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
492    unsafe {
493        let add = _mm256_adds_epu16(a, b).as_u16x16();
494        transmute(simd_select_bitmask(k, add, u16x16::ZERO))
495    }
496}
497
498/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
499///
500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu16&expand=192)
501#[inline]
502#[target_feature(enable = "avx512bw,avx512vl")]
503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
504#[cfg_attr(test, assert_instr(vpaddusw))]
505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
506pub const fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
507    unsafe {
508        let add = _mm_adds_epu16(a, b).as_u16x8();
509        transmute(simd_select_bitmask(k, add, src.as_u16x8()))
510    }
511}
512
513/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu16&expand=193)
516#[inline]
517#[target_feature(enable = "avx512bw,avx512vl")]
518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
519#[cfg_attr(test, assert_instr(vpaddusw))]
520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
521pub const fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
522    unsafe {
523        let add = _mm_adds_epu16(a, b).as_u16x8();
524        transmute(simd_select_bitmask(k, add, u16x8::ZERO))
525    }
526}
527
528/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst.
529///
530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu8&expand=206)
531#[inline]
532#[target_feature(enable = "avx512bw")]
533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
534#[cfg_attr(test, assert_instr(vpaddusb))]
535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
536pub const fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
537    unsafe { transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64())) }
538}
539
540/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
541///
542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu8&expand=207)
543#[inline]
544#[target_feature(enable = "avx512bw")]
545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
546#[cfg_attr(test, assert_instr(vpaddusb))]
547#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
548pub const fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
549    unsafe {
550        let add = _mm512_adds_epu8(a, b).as_u8x64();
551        transmute(simd_select_bitmask(k, add, src.as_u8x64()))
552    }
553}
554
555/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
556///
557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu8&expand=208)
558#[inline]
559#[target_feature(enable = "avx512bw")]
560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
561#[cfg_attr(test, assert_instr(vpaddusb))]
562#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
563pub const fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
564    unsafe {
565        let add = _mm512_adds_epu8(a, b).as_u8x64();
566        transmute(simd_select_bitmask(k, add, u8x64::ZERO))
567    }
568}
569
570/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
571///
572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu8&expand=204)
573#[inline]
574#[target_feature(enable = "avx512bw,avx512vl")]
575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
576#[cfg_attr(test, assert_instr(vpaddusb))]
577#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
578pub const fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
579    unsafe {
580        let add = _mm256_adds_epu8(a, b).as_u8x32();
581        transmute(simd_select_bitmask(k, add, src.as_u8x32()))
582    }
583}
584
585/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
586///
587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu8&expand=205)
588#[inline]
589#[target_feature(enable = "avx512bw,avx512vl")]
590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
591#[cfg_attr(test, assert_instr(vpaddusb))]
592#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
593pub const fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
594    unsafe {
595        let add = _mm256_adds_epu8(a, b).as_u8x32();
596        transmute(simd_select_bitmask(k, add, u8x32::ZERO))
597    }
598}
599
600/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
601///
602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu8&expand=201)
603#[inline]
604#[target_feature(enable = "avx512bw,avx512vl")]
605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
606#[cfg_attr(test, assert_instr(vpaddusb))]
607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
608pub const fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
609    unsafe {
610        let add = _mm_adds_epu8(a, b).as_u8x16();
611        transmute(simd_select_bitmask(k, add, src.as_u8x16()))
612    }
613}
614
615/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
616///
617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu8&expand=202)
618#[inline]
619#[target_feature(enable = "avx512bw,avx512vl")]
620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
621#[cfg_attr(test, assert_instr(vpaddusb))]
622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
623pub const fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
624    unsafe {
625        let add = _mm_adds_epu8(a, b).as_u8x16();
626        transmute(simd_select_bitmask(k, add, u8x16::ZERO))
627    }
628}
629
630/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst.
631///
632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi16&expand=179)
633#[inline]
634#[target_feature(enable = "avx512bw")]
635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
636#[cfg_attr(test, assert_instr(vpaddsw))]
637#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
638pub const fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
639    unsafe { transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32())) }
640}
641
642/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
643///
644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi16&expand=180)
645#[inline]
646#[target_feature(enable = "avx512bw")]
647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
648#[cfg_attr(test, assert_instr(vpaddsw))]
649#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
650pub const fn _mm512_mask_adds_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
651    unsafe {
652        let add = _mm512_adds_epi16(a, b).as_i16x32();
653        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
654    }
655}
656
657/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
658///
659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi16&expand=181)
660#[inline]
661#[target_feature(enable = "avx512bw")]
662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
663#[cfg_attr(test, assert_instr(vpaddsw))]
664#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
665pub const fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
666    unsafe {
667        let add = _mm512_adds_epi16(a, b).as_i16x32();
668        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
669    }
670}
671
672/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
673///
674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi16&expand=177)
675#[inline]
676#[target_feature(enable = "avx512bw,avx512vl")]
677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
678#[cfg_attr(test, assert_instr(vpaddsw))]
679#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
680pub const fn _mm256_mask_adds_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
681    unsafe {
682        let add = _mm256_adds_epi16(a, b).as_i16x16();
683        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
684    }
685}
686
687/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
688///
689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi16&expand=178)
690#[inline]
691#[target_feature(enable = "avx512bw,avx512vl")]
692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
693#[cfg_attr(test, assert_instr(vpaddsw))]
694#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
695pub const fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
696    unsafe {
697        let add = _mm256_adds_epi16(a, b).as_i16x16();
698        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
699    }
700}
701
702/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
703///
704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi16&expand=174)
705#[inline]
706#[target_feature(enable = "avx512bw,avx512vl")]
707#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
708#[cfg_attr(test, assert_instr(vpaddsw))]
709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
710pub const fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
711    unsafe {
712        let add = _mm_adds_epi16(a, b).as_i16x8();
713        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
714    }
715}
716
717/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
718///
719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi16&expand=175)
720#[inline]
721#[target_feature(enable = "avx512bw,avx512vl")]
722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
723#[cfg_attr(test, assert_instr(vpaddsw))]
724#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
725pub const fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
726    unsafe {
727        let add = _mm_adds_epi16(a, b).as_i16x8();
728        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
729    }
730}
731
732/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst.
733///
734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi8&expand=188)
735#[inline]
736#[target_feature(enable = "avx512bw")]
737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
738#[cfg_attr(test, assert_instr(vpaddsb))]
739#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
740pub const fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
741    unsafe { transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64())) }
742}
743
744/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
745///
746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi8&expand=189)
747#[inline]
748#[target_feature(enable = "avx512bw")]
749#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
750#[cfg_attr(test, assert_instr(vpaddsb))]
751#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
752pub const fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
753    unsafe {
754        let add = _mm512_adds_epi8(a, b).as_i8x64();
755        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
756    }
757}
758
759/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi8&expand=190)
762#[inline]
763#[target_feature(enable = "avx512bw")]
764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
765#[cfg_attr(test, assert_instr(vpaddsb))]
766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
767pub const fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
768    unsafe {
769        let add = _mm512_adds_epi8(a, b).as_i8x64();
770        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
771    }
772}
773
774/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
775///
776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi8&expand=186)
777#[inline]
778#[target_feature(enable = "avx512bw,avx512vl")]
779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
780#[cfg_attr(test, assert_instr(vpaddsb))]
781#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
782pub const fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
783    unsafe {
784        let add = _mm256_adds_epi8(a, b).as_i8x32();
785        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
786    }
787}
788
789/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
790///
791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi8&expand=187)
792#[inline]
793#[target_feature(enable = "avx512bw,avx512vl")]
794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
795#[cfg_attr(test, assert_instr(vpaddsb))]
796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
797pub const fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
798    unsafe {
799        let add = _mm256_adds_epi8(a, b).as_i8x32();
800        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
801    }
802}
803
804/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
805///
806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi8&expand=183)
807#[inline]
808#[target_feature(enable = "avx512bw,avx512vl")]
809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
810#[cfg_attr(test, assert_instr(vpaddsb))]
811#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
812pub const fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
813    unsafe {
814        let add = _mm_adds_epi8(a, b).as_i8x16();
815        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
816    }
817}
818
819/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
820///
821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi8&expand=184)
822#[inline]
823#[target_feature(enable = "avx512bw,avx512vl")]
824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
825#[cfg_attr(test, assert_instr(vpaddsb))]
826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
827pub const fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
828    unsafe {
829        let add = _mm_adds_epi8(a, b).as_i8x16();
830        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
831    }
832}
833
834/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst.
835///
836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi16&expand=5685)
837#[inline]
838#[target_feature(enable = "avx512bw")]
839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
840#[cfg_attr(test, assert_instr(vpsubw))]
841#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
842pub const fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
843    unsafe { transmute(simd_sub(a.as_i16x32(), b.as_i16x32())) }
844}
845
846/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
847///
848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi16&expand=5683)
849#[inline]
850#[target_feature(enable = "avx512bw")]
851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
852#[cfg_attr(test, assert_instr(vpsubw))]
853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
854pub const fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
855    unsafe {
856        let sub = _mm512_sub_epi16(a, b).as_i16x32();
857        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
858    }
859}
860
861/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
862///
863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi16&expand=5684)
864#[inline]
865#[target_feature(enable = "avx512bw")]
866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
867#[cfg_attr(test, assert_instr(vpsubw))]
868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
869pub const fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
870    unsafe {
871        let sub = _mm512_sub_epi16(a, b).as_i16x32();
872        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
873    }
874}
875
876/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
877///
878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi16&expand=5680)
879#[inline]
880#[target_feature(enable = "avx512bw,avx512vl")]
881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
882#[cfg_attr(test, assert_instr(vpsubw))]
883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
884pub const fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
885    unsafe {
886        let sub = _mm256_sub_epi16(a, b).as_i16x16();
887        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
888    }
889}
890
891/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
892///
893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi16&expand=5681)
894#[inline]
895#[target_feature(enable = "avx512bw,avx512vl")]
896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
897#[cfg_attr(test, assert_instr(vpsubw))]
898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
899pub const fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
900    unsafe {
901        let sub = _mm256_sub_epi16(a, b).as_i16x16();
902        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
903    }
904}
905
906/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
907///
908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi16&expand=5677)
909#[inline]
910#[target_feature(enable = "avx512bw,avx512vl")]
911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
912#[cfg_attr(test, assert_instr(vpsubw))]
913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
914pub const fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
915    unsafe {
916        let sub = _mm_sub_epi16(a, b).as_i16x8();
917        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
918    }
919}
920
921/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
922///
923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi16&expand=5678)
924#[inline]
925#[target_feature(enable = "avx512bw,avx512vl")]
926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
927#[cfg_attr(test, assert_instr(vpsubw))]
928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
929pub const fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
930    unsafe {
931        let sub = _mm_sub_epi16(a, b).as_i16x8();
932        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
933    }
934}
935
936/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst.
937///
938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi8&expand=5712)
939#[inline]
940#[target_feature(enable = "avx512bw")]
941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
942#[cfg_attr(test, assert_instr(vpsubb))]
943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
944pub const fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
945    unsafe { transmute(simd_sub(a.as_i8x64(), b.as_i8x64())) }
946}
947
948/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
949///
950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi8&expand=5710)
951#[inline]
952#[target_feature(enable = "avx512bw")]
953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
954#[cfg_attr(test, assert_instr(vpsubb))]
955#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
956pub const fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
957    unsafe {
958        let sub = _mm512_sub_epi8(a, b).as_i8x64();
959        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
960    }
961}
962
963/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi8&expand=5711)
966#[inline]
967#[target_feature(enable = "avx512bw")]
968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
969#[cfg_attr(test, assert_instr(vpsubb))]
970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
971pub const fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
972    unsafe {
973        let sub = _mm512_sub_epi8(a, b).as_i8x64();
974        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
975    }
976}
977
978/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
979///
980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi8&expand=5707)
981#[inline]
982#[target_feature(enable = "avx512bw,avx512vl")]
983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
984#[cfg_attr(test, assert_instr(vpsubb))]
985#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
986pub const fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
987    unsafe {
988        let sub = _mm256_sub_epi8(a, b).as_i8x32();
989        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
990    }
991}
992
993/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
994///
995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi8&expand=5708)
996#[inline]
997#[target_feature(enable = "avx512bw,avx512vl")]
998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
999#[cfg_attr(test, assert_instr(vpsubb))]
1000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1001pub const fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1002    unsafe {
1003        let sub = _mm256_sub_epi8(a, b).as_i8x32();
1004        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1005    }
1006}
1007
1008/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1009///
1010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi8&expand=5704)
1011#[inline]
1012#[target_feature(enable = "avx512bw,avx512vl")]
1013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1014#[cfg_attr(test, assert_instr(vpsubb))]
1015#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1016pub const fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1017    unsafe {
1018        let sub = _mm_sub_epi8(a, b).as_i8x16();
1019        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1020    }
1021}
1022
1023/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1024///
1025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi8&expand=5705)
1026#[inline]
1027#[target_feature(enable = "avx512bw,avx512vl")]
1028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1029#[cfg_attr(test, assert_instr(vpsubb))]
1030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1031pub const fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1032    unsafe {
1033        let sub = _mm_sub_epi8(a, b).as_i8x16();
1034        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1035    }
1036}
1037
1038/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst.
1039///
1040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu16&expand=5793)
1041#[inline]
1042#[target_feature(enable = "avx512bw")]
1043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1044#[cfg_attr(test, assert_instr(vpsubusw))]
1045#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1046pub const fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
1047    unsafe { transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32())) }
1048}
1049
1050/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1051///
1052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu16&expand=5791)
1053#[inline]
1054#[target_feature(enable = "avx512bw")]
1055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1056#[cfg_attr(test, assert_instr(vpsubusw))]
1057#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1058pub const fn _mm512_mask_subs_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1059    unsafe {
1060        let sub = _mm512_subs_epu16(a, b).as_u16x32();
1061        transmute(simd_select_bitmask(k, sub, src.as_u16x32()))
1062    }
1063}
1064
1065/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1066///
1067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu16&expand=5792)
1068#[inline]
1069#[target_feature(enable = "avx512bw")]
1070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1071#[cfg_attr(test, assert_instr(vpsubusw))]
1072#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1073pub const fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1074    unsafe {
1075        let sub = _mm512_subs_epu16(a, b).as_u16x32();
1076        transmute(simd_select_bitmask(k, sub, u16x32::ZERO))
1077    }
1078}
1079
1080/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1081///
1082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu16&expand=5788)
1083#[inline]
1084#[target_feature(enable = "avx512bw,avx512vl")]
1085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1086#[cfg_attr(test, assert_instr(vpsubusw))]
1087#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1088pub const fn _mm256_mask_subs_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1089    unsafe {
1090        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1091        transmute(simd_select_bitmask(k, sub, src.as_u16x16()))
1092    }
1093}
1094
1095/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1096///
1097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu16&expand=5789)
1098#[inline]
1099#[target_feature(enable = "avx512bw,avx512vl")]
1100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1101#[cfg_attr(test, assert_instr(vpsubusw))]
1102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1103pub const fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1104    unsafe {
1105        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1106        transmute(simd_select_bitmask(k, sub, u16x16::ZERO))
1107    }
1108}
1109
1110/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1111///
1112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu16&expand=5785)
1113#[inline]
1114#[target_feature(enable = "avx512bw,avx512vl")]
1115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1116#[cfg_attr(test, assert_instr(vpsubusw))]
1117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1118pub const fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1119    unsafe {
1120        let sub = _mm_subs_epu16(a, b).as_u16x8();
1121        transmute(simd_select_bitmask(k, sub, src.as_u16x8()))
1122    }
1123}
1124
1125/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1126///
1127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu16&expand=5786)
1128#[inline]
1129#[target_feature(enable = "avx512bw,avx512vl")]
1130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1131#[cfg_attr(test, assert_instr(vpsubusw))]
1132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1133pub const fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1134    unsafe {
1135        let sub = _mm_subs_epu16(a, b).as_u16x8();
1136        transmute(simd_select_bitmask(k, sub, u16x8::ZERO))
1137    }
1138}
1139
1140/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst.
1141///
1142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu8&expand=5802)
1143#[inline]
1144#[target_feature(enable = "avx512bw")]
1145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1146#[cfg_attr(test, assert_instr(vpsubusb))]
1147#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1148pub const fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
1149    unsafe { transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64())) }
1150}
1151
1152/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1153///
1154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu8&expand=5800)
1155#[inline]
1156#[target_feature(enable = "avx512bw")]
1157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1158#[cfg_attr(test, assert_instr(vpsubusb))]
1159#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1160pub const fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1161    unsafe {
1162        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1163        transmute(simd_select_bitmask(k, sub, src.as_u8x64()))
1164    }
1165}
1166
1167/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1168///
1169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu8&expand=5801)
1170#[inline]
1171#[target_feature(enable = "avx512bw")]
1172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1173#[cfg_attr(test, assert_instr(vpsubusb))]
1174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1175pub const fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1176    unsafe {
1177        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1178        transmute(simd_select_bitmask(k, sub, u8x64::ZERO))
1179    }
1180}
1181
1182/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1183///
1184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu8&expand=5797)
1185#[inline]
1186#[target_feature(enable = "avx512bw,avx512vl")]
1187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1188#[cfg_attr(test, assert_instr(vpsubusb))]
1189#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1190pub const fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1191    unsafe {
1192        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1193        transmute(simd_select_bitmask(k, sub, src.as_u8x32()))
1194    }
1195}
1196
1197/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1198///
1199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu8&expand=5798)
1200#[inline]
1201#[target_feature(enable = "avx512bw,avx512vl")]
1202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1203#[cfg_attr(test, assert_instr(vpsubusb))]
1204#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1205pub const fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1206    unsafe {
1207        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1208        transmute(simd_select_bitmask(k, sub, u8x32::ZERO))
1209    }
1210}
1211
1212/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1213///
1214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu8&expand=5794)
1215#[inline]
1216#[target_feature(enable = "avx512bw,avx512vl")]
1217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1218#[cfg_attr(test, assert_instr(vpsubusb))]
1219#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1220pub const fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1221    unsafe {
1222        let sub = _mm_subs_epu8(a, b).as_u8x16();
1223        transmute(simd_select_bitmask(k, sub, src.as_u8x16()))
1224    }
1225}
1226
1227/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1228///
1229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu8&expand=5795)
1230#[inline]
1231#[target_feature(enable = "avx512bw,avx512vl")]
1232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1233#[cfg_attr(test, assert_instr(vpsubusb))]
1234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1235pub const fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1236    unsafe {
1237        let sub = _mm_subs_epu8(a, b).as_u8x16();
1238        transmute(simd_select_bitmask(k, sub, u8x16::ZERO))
1239    }
1240}
1241
1242/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst.
1243///
1244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi16&expand=5775)
1245#[inline]
1246#[target_feature(enable = "avx512bw")]
1247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1248#[cfg_attr(test, assert_instr(vpsubsw))]
1249#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1250pub const fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
1251    unsafe { transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32())) }
1252}
1253
1254/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1255///
1256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi16&expand=5773)
1257#[inline]
1258#[target_feature(enable = "avx512bw")]
1259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1260#[cfg_attr(test, assert_instr(vpsubsw))]
1261#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1262pub const fn _mm512_mask_subs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1263    unsafe {
1264        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1265        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
1266    }
1267}
1268
1269/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1270///
1271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi16&expand=5774)
1272#[inline]
1273#[target_feature(enable = "avx512bw")]
1274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1275#[cfg_attr(test, assert_instr(vpsubsw))]
1276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1277pub const fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1278    unsafe {
1279        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1280        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
1281    }
1282}
1283
1284/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1285///
1286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi16&expand=5770)
1287#[inline]
1288#[target_feature(enable = "avx512bw,avx512vl")]
1289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1290#[cfg_attr(test, assert_instr(vpsubsw))]
1291#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1292pub const fn _mm256_mask_subs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1293    unsafe {
1294        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1295        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
1296    }
1297}
1298
1299/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1300///
1301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi16&expand=5771)
1302#[inline]
1303#[target_feature(enable = "avx512bw,avx512vl")]
1304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1305#[cfg_attr(test, assert_instr(vpsubsw))]
1306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1307pub const fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1308    unsafe {
1309        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1310        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
1311    }
1312}
1313
1314/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1315///
1316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi16&expand=5767)
1317#[inline]
1318#[target_feature(enable = "avx512bw,avx512vl")]
1319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1320#[cfg_attr(test, assert_instr(vpsubsw))]
1321#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1322pub const fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1323    unsafe {
1324        let sub = _mm_subs_epi16(a, b).as_i16x8();
1325        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
1326    }
1327}
1328
1329/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1330///
1331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi16&expand=5768)
1332#[inline]
1333#[target_feature(enable = "avx512bw,avx512vl")]
1334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1335#[cfg_attr(test, assert_instr(vpsubsw))]
1336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1337pub const fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1338    unsafe {
1339        let sub = _mm_subs_epi16(a, b).as_i16x8();
1340        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
1341    }
1342}
1343
1344/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst.
1345///
1346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi8&expand=5784)
1347#[inline]
1348#[target_feature(enable = "avx512bw")]
1349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1350#[cfg_attr(test, assert_instr(vpsubsb))]
1351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1352pub const fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
1353    unsafe { transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64())) }
1354}
1355
1356/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1357///
1358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi8&expand=5782)
1359#[inline]
1360#[target_feature(enable = "avx512bw")]
1361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1362#[cfg_attr(test, assert_instr(vpsubsb))]
1363#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1364pub const fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1365    unsafe {
1366        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1367        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
1368    }
1369}
1370
1371/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1372///
1373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi8&expand=5783)
1374#[inline]
1375#[target_feature(enable = "avx512bw")]
1376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1377#[cfg_attr(test, assert_instr(vpsubsb))]
1378#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1379pub const fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1380    unsafe {
1381        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1382        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
1383    }
1384}
1385
1386/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1387///
1388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi8&expand=5779)
1389#[inline]
1390#[target_feature(enable = "avx512bw,avx512vl")]
1391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1392#[cfg_attr(test, assert_instr(vpsubsb))]
1393#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1394pub const fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1395    unsafe {
1396        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1397        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
1398    }
1399}
1400
1401/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1402///
1403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi8&expand=5780)
1404#[inline]
1405#[target_feature(enable = "avx512bw,avx512vl")]
1406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1407#[cfg_attr(test, assert_instr(vpsubsb))]
1408#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1409pub const fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1410    unsafe {
1411        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1412        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1413    }
1414}
1415
1416/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1417///
1418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi8&expand=5776)
1419#[inline]
1420#[target_feature(enable = "avx512bw,avx512vl")]
1421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1422#[cfg_attr(test, assert_instr(vpsubsb))]
1423#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1424pub const fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1425    unsafe {
1426        let sub = _mm_subs_epi8(a, b).as_i8x16();
1427        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1428    }
1429}
1430
1431/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1432///
1433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi8&expand=5777)
1434#[inline]
1435#[target_feature(enable = "avx512bw,avx512vl")]
1436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1437#[cfg_attr(test, assert_instr(vpsubsb))]
1438#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1439pub const fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1440    unsafe {
1441        let sub = _mm_subs_epi8(a, b).as_i8x16();
1442        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1443    }
1444}
1445
1446/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1447///
1448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epu16&expand=3973)
1449#[inline]
1450#[target_feature(enable = "avx512bw")]
1451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1452#[cfg_attr(test, assert_instr(vpmulhuw))]
1453#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1454pub const fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
1455    unsafe {
1456        let a = simd_cast::<_, u32x32>(a.as_u16x32());
1457        let b = simd_cast::<_, u32x32>(b.as_u16x32());
1458        let r = simd_shr(simd_mul(a, b), u32x32::splat(16));
1459        transmute(simd_cast::<u32x32, u16x32>(r))
1460    }
1461}
1462
1463/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1464///
1465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epu16&expand=3971)
1466#[inline]
1467#[target_feature(enable = "avx512bw")]
1468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1469#[cfg_attr(test, assert_instr(vpmulhuw))]
1470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1471pub const fn _mm512_mask_mulhi_epu16(
1472    src: __m512i,
1473    k: __mmask32,
1474    a: __m512i,
1475    b: __m512i,
1476) -> __m512i {
1477    unsafe {
1478        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1479        transmute(simd_select_bitmask(k, mul, src.as_u16x32()))
1480    }
1481}
1482
1483/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1484///
1485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epu16&expand=3972)
1486#[inline]
1487#[target_feature(enable = "avx512bw")]
1488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1489#[cfg_attr(test, assert_instr(vpmulhuw))]
1490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1491pub const fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1492    unsafe {
1493        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1494        transmute(simd_select_bitmask(k, mul, u16x32::ZERO))
1495    }
1496}
1497
1498/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1499///
1500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epu16&expand=3968)
1501#[inline]
1502#[target_feature(enable = "avx512bw,avx512vl")]
1503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1504#[cfg_attr(test, assert_instr(vpmulhuw))]
1505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1506pub const fn _mm256_mask_mulhi_epu16(
1507    src: __m256i,
1508    k: __mmask16,
1509    a: __m256i,
1510    b: __m256i,
1511) -> __m256i {
1512    unsafe {
1513        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1514        transmute(simd_select_bitmask(k, mul, src.as_u16x16()))
1515    }
1516}
1517
1518/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1519///
1520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epu16&expand=3969)
1521#[inline]
1522#[target_feature(enable = "avx512bw,avx512vl")]
1523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1524#[cfg_attr(test, assert_instr(vpmulhuw))]
1525#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1526pub const fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1527    unsafe {
1528        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1529        transmute(simd_select_bitmask(k, mul, u16x16::ZERO))
1530    }
1531}
1532
1533/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1534///
1535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epu16&expand=3965)
1536#[inline]
1537#[target_feature(enable = "avx512bw,avx512vl")]
1538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1539#[cfg_attr(test, assert_instr(vpmulhuw))]
1540#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1541pub const fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1542    unsafe {
1543        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1544        transmute(simd_select_bitmask(k, mul, src.as_u16x8()))
1545    }
1546}
1547
1548/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1549///
1550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epu16&expand=3966)
1551#[inline]
1552#[target_feature(enable = "avx512bw,avx512vl")]
1553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1554#[cfg_attr(test, assert_instr(vpmulhuw))]
1555#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1556pub const fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1557    unsafe {
1558        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1559        transmute(simd_select_bitmask(k, mul, u16x8::ZERO))
1560    }
1561}
1562
1563/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1564///
1565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epi16&expand=3962)
1566#[inline]
1567#[target_feature(enable = "avx512bw")]
1568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1569#[cfg_attr(test, assert_instr(vpmulhw))]
1570#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1571pub const fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
1572    unsafe {
1573        let a = simd_cast::<_, i32x32>(a.as_i16x32());
1574        let b = simd_cast::<_, i32x32>(b.as_i16x32());
1575        let r = simd_shr(simd_mul(a, b), i32x32::splat(16));
1576        transmute(simd_cast::<i32x32, i16x32>(r))
1577    }
1578}
1579
1580/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1581///
1582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epi16&expand=3960)
1583#[inline]
1584#[target_feature(enable = "avx512bw")]
1585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1586#[cfg_attr(test, assert_instr(vpmulhw))]
1587#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1588pub const fn _mm512_mask_mulhi_epi16(
1589    src: __m512i,
1590    k: __mmask32,
1591    a: __m512i,
1592    b: __m512i,
1593) -> __m512i {
1594    unsafe {
1595        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1596        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1597    }
1598}
1599
1600/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1601///
1602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epi16&expand=3961)
1603#[inline]
1604#[target_feature(enable = "avx512bw")]
1605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1606#[cfg_attr(test, assert_instr(vpmulhw))]
1607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1608pub const fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1609    unsafe {
1610        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1611        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1612    }
1613}
1614
1615/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1616///
1617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epi16&expand=3957)
1618#[inline]
1619#[target_feature(enable = "avx512bw,avx512vl")]
1620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1621#[cfg_attr(test, assert_instr(vpmulhw))]
1622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1623pub const fn _mm256_mask_mulhi_epi16(
1624    src: __m256i,
1625    k: __mmask16,
1626    a: __m256i,
1627    b: __m256i,
1628) -> __m256i {
1629    unsafe {
1630        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1631        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1632    }
1633}
1634
1635/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1636///
1637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epi16&expand=3958)
1638#[inline]
1639#[target_feature(enable = "avx512bw,avx512vl")]
1640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1641#[cfg_attr(test, assert_instr(vpmulhw))]
1642#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1643pub const fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1644    unsafe {
1645        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1646        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1647    }
1648}
1649
1650/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1651///
1652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epi16&expand=3954)
1653#[inline]
1654#[target_feature(enable = "avx512bw,avx512vl")]
1655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1656#[cfg_attr(test, assert_instr(vpmulhw))]
1657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1658pub const fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1659    unsafe {
1660        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1661        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1662    }
1663}
1664
1665/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1666///
1667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epi16&expand=3955)
1668#[inline]
1669#[target_feature(enable = "avx512bw,avx512vl")]
1670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1671#[cfg_attr(test, assert_instr(vpmulhw))]
1672#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1673pub const fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1674    unsafe {
1675        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1676        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1677    }
1678}
1679
1680/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst.
1681///
1682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhrs_epi16&expand=3986)
1683#[inline]
1684#[target_feature(enable = "avx512bw")]
1685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1686#[cfg_attr(test, assert_instr(vpmulhrsw))]
1687pub fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i {
1688    unsafe { transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32())) }
1689}
1690
1691/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1692///
1693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhrs_epi16&expand=3984)
1694#[inline]
1695#[target_feature(enable = "avx512bw")]
1696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1697#[cfg_attr(test, assert_instr(vpmulhrsw))]
1698pub fn _mm512_mask_mulhrs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1699    unsafe {
1700        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1701        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1702    }
1703}
1704
1705/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1706///
1707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhrs_epi16&expand=3985)
1708#[inline]
1709#[target_feature(enable = "avx512bw")]
1710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1711#[cfg_attr(test, assert_instr(vpmulhrsw))]
1712pub fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1713    unsafe {
1714        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1715        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1716    }
1717}
1718
1719/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1720///
1721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhrs_epi16&expand=3981)
1722#[inline]
1723#[target_feature(enable = "avx512bw,avx512vl")]
1724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1725#[cfg_attr(test, assert_instr(vpmulhrsw))]
1726pub fn _mm256_mask_mulhrs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1727    unsafe {
1728        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1729        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1730    }
1731}
1732
1733/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1734///
1735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhrs_epi16&expand=3982)
1736#[inline]
1737#[target_feature(enable = "avx512bw,avx512vl")]
1738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1739#[cfg_attr(test, assert_instr(vpmulhrsw))]
1740pub fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1741    unsafe {
1742        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1743        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1744    }
1745}
1746
1747/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1748///
1749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhrs_epi16&expand=3978)
1750#[inline]
1751#[target_feature(enable = "avx512bw,avx512vl")]
1752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1753#[cfg_attr(test, assert_instr(vpmulhrsw))]
1754pub fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1755    unsafe {
1756        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1757        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1758    }
1759}
1760
1761/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1762///
1763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhrs_epi16&expand=3979)
1764#[inline]
1765#[target_feature(enable = "avx512bw,avx512vl")]
1766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1767#[cfg_attr(test, assert_instr(vpmulhrsw))]
1768pub fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1769    unsafe {
1770        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1771        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1772    }
1773}
1774
1775/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst.
1776///
1777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi16&expand=3996)
1778#[inline]
1779#[target_feature(enable = "avx512bw")]
1780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1781#[cfg_attr(test, assert_instr(vpmullw))]
1782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1783pub const fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
1784    unsafe { transmute(simd_mul(a.as_i16x32(), b.as_i16x32())) }
1785}
1786
1787/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi16&expand=3994)
1790#[inline]
1791#[target_feature(enable = "avx512bw")]
1792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1793#[cfg_attr(test, assert_instr(vpmullw))]
1794#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1795pub const fn _mm512_mask_mullo_epi16(
1796    src: __m512i,
1797    k: __mmask32,
1798    a: __m512i,
1799    b: __m512i,
1800) -> __m512i {
1801    unsafe {
1802        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1803        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1804    }
1805}
1806
1807/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1808///
1809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi16&expand=3995)
1810#[inline]
1811#[target_feature(enable = "avx512bw")]
1812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1813#[cfg_attr(test, assert_instr(vpmullw))]
1814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1815pub const fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1816    unsafe {
1817        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1818        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1819    }
1820}
1821
1822/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1823///
1824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi16&expand=3991)
1825#[inline]
1826#[target_feature(enable = "avx512bw,avx512vl")]
1827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1828#[cfg_attr(test, assert_instr(vpmullw))]
1829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1830pub const fn _mm256_mask_mullo_epi16(
1831    src: __m256i,
1832    k: __mmask16,
1833    a: __m256i,
1834    b: __m256i,
1835) -> __m256i {
1836    unsafe {
1837        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1838        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1839    }
1840}
1841
1842/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1843///
1844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi16&expand=3992)
1845#[inline]
1846#[target_feature(enable = "avx512bw,avx512vl")]
1847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1848#[cfg_attr(test, assert_instr(vpmullw))]
1849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1850pub const fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1851    unsafe {
1852        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1853        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1854    }
1855}
1856
1857/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1858///
1859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi16&expand=3988)
1860#[inline]
1861#[target_feature(enable = "avx512bw,avx512vl")]
1862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1863#[cfg_attr(test, assert_instr(vpmullw))]
1864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1865pub const fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1866    unsafe {
1867        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1868        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1869    }
1870}
1871
1872/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1873///
1874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi16&expand=3989)
1875#[inline]
1876#[target_feature(enable = "avx512bw,avx512vl")]
1877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1878#[cfg_attr(test, assert_instr(vpmullw))]
1879#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1880pub const fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1881    unsafe {
1882        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1883        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1884    }
1885}
1886
1887/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst.
1888///
1889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu16&expand=3609)
1890#[inline]
1891#[target_feature(enable = "avx512bw")]
1892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1893#[cfg_attr(test, assert_instr(vpmaxuw))]
1894#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1895pub const fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
1896    unsafe { simd_imax(a.as_u16x32(), b.as_u16x32()).as_m512i() }
1897}
1898
1899/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1900///
1901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu16&expand=3607)
1902#[inline]
1903#[target_feature(enable = "avx512bw")]
1904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1905#[cfg_attr(test, assert_instr(vpmaxuw))]
1906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1907pub const fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1908    unsafe {
1909        let max = _mm512_max_epu16(a, b).as_u16x32();
1910        transmute(simd_select_bitmask(k, max, src.as_u16x32()))
1911    }
1912}
1913
1914/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1915///
1916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu16&expand=3608)
1917#[inline]
1918#[target_feature(enable = "avx512bw")]
1919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1920#[cfg_attr(test, assert_instr(vpmaxuw))]
1921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1922pub const fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1923    unsafe {
1924        let max = _mm512_max_epu16(a, b).as_u16x32();
1925        transmute(simd_select_bitmask(k, max, u16x32::ZERO))
1926    }
1927}
1928
1929/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1930///
1931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu16&expand=3604)
1932#[inline]
1933#[target_feature(enable = "avx512bw,avx512vl")]
1934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1935#[cfg_attr(test, assert_instr(vpmaxuw))]
1936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1937pub const fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1938    unsafe {
1939        let max = _mm256_max_epu16(a, b).as_u16x16();
1940        transmute(simd_select_bitmask(k, max, src.as_u16x16()))
1941    }
1942}
1943
1944/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1945///
1946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu16&expand=3605)
1947#[inline]
1948#[target_feature(enable = "avx512bw,avx512vl")]
1949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1950#[cfg_attr(test, assert_instr(vpmaxuw))]
1951#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1952pub const fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1953    unsafe {
1954        let max = _mm256_max_epu16(a, b).as_u16x16();
1955        transmute(simd_select_bitmask(k, max, u16x16::ZERO))
1956    }
1957}
1958
1959/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1960///
1961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu16&expand=3601)
1962#[inline]
1963#[target_feature(enable = "avx512bw,avx512vl")]
1964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1965#[cfg_attr(test, assert_instr(vpmaxuw))]
1966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1967pub const fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1968    unsafe {
1969        let max = _mm_max_epu16(a, b).as_u16x8();
1970        transmute(simd_select_bitmask(k, max, src.as_u16x8()))
1971    }
1972}
1973
1974/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1975///
1976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu16&expand=3602)
1977#[inline]
1978#[target_feature(enable = "avx512bw,avx512vl")]
1979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1980#[cfg_attr(test, assert_instr(vpmaxuw))]
1981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1982pub const fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1983    unsafe {
1984        let max = _mm_max_epu16(a, b).as_u16x8();
1985        transmute(simd_select_bitmask(k, max, u16x8::ZERO))
1986    }
1987}
1988
1989/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst.
1990///
1991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu8&expand=3636)
1992#[inline]
1993#[target_feature(enable = "avx512bw")]
1994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1995#[cfg_attr(test, assert_instr(vpmaxub))]
1996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1997pub const fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
1998    unsafe { simd_imax(a.as_u8x64(), b.as_u8x64()).as_m512i() }
1999}
2000
2001/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2002///
2003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu8&expand=3634)
2004#[inline]
2005#[target_feature(enable = "avx512bw")]
2006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2007#[cfg_attr(test, assert_instr(vpmaxub))]
2008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2009pub const fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2010    unsafe {
2011        let max = _mm512_max_epu8(a, b).as_u8x64();
2012        transmute(simd_select_bitmask(k, max, src.as_u8x64()))
2013    }
2014}
2015
2016/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2017///
2018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu8&expand=3635)
2019#[inline]
2020#[target_feature(enable = "avx512bw")]
2021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2022#[cfg_attr(test, assert_instr(vpmaxub))]
2023#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2024pub const fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2025    unsafe {
2026        let max = _mm512_max_epu8(a, b).as_u8x64();
2027        transmute(simd_select_bitmask(k, max, u8x64::ZERO))
2028    }
2029}
2030
2031/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2032///
2033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu8&expand=3631)
2034#[inline]
2035#[target_feature(enable = "avx512bw,avx512vl")]
2036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2037#[cfg_attr(test, assert_instr(vpmaxub))]
2038#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2039pub const fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2040    unsafe {
2041        let max = _mm256_max_epu8(a, b).as_u8x32();
2042        transmute(simd_select_bitmask(k, max, src.as_u8x32()))
2043    }
2044}
2045
2046/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2047///
2048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu8&expand=3632)
2049#[inline]
2050#[target_feature(enable = "avx512bw,avx512vl")]
2051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2052#[cfg_attr(test, assert_instr(vpmaxub))]
2053#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2054pub const fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2055    unsafe {
2056        let max = _mm256_max_epu8(a, b).as_u8x32();
2057        transmute(simd_select_bitmask(k, max, u8x32::ZERO))
2058    }
2059}
2060
2061/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2062///
2063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu8&expand=3628)
2064#[inline]
2065#[target_feature(enable = "avx512bw,avx512vl")]
2066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2067#[cfg_attr(test, assert_instr(vpmaxub))]
2068#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2069pub const fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2070    unsafe {
2071        let max = _mm_max_epu8(a, b).as_u8x16();
2072        transmute(simd_select_bitmask(k, max, src.as_u8x16()))
2073    }
2074}
2075
2076/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2077///
2078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu8&expand=3629)
2079#[inline]
2080#[target_feature(enable = "avx512bw,avx512vl")]
2081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2082#[cfg_attr(test, assert_instr(vpmaxub))]
2083#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2084pub const fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2085    unsafe {
2086        let max = _mm_max_epu8(a, b).as_u8x16();
2087        transmute(simd_select_bitmask(k, max, u8x16::ZERO))
2088    }
2089}
2090
2091/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst.
2092///
2093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi16&expand=3573)
2094#[inline]
2095#[target_feature(enable = "avx512bw")]
2096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2097#[cfg_attr(test, assert_instr(vpmaxsw))]
2098#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2099pub const fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
2100    unsafe { simd_imax(a.as_i16x32(), b.as_i16x32()).as_m512i() }
2101}
2102
2103/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2104///
2105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi16&expand=3571)
2106#[inline]
2107#[target_feature(enable = "avx512bw")]
2108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2109#[cfg_attr(test, assert_instr(vpmaxsw))]
2110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2111pub const fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2112    unsafe {
2113        let max = _mm512_max_epi16(a, b).as_i16x32();
2114        transmute(simd_select_bitmask(k, max, src.as_i16x32()))
2115    }
2116}
2117
2118/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2119///
2120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi16&expand=3572)
2121#[inline]
2122#[target_feature(enable = "avx512bw")]
2123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2124#[cfg_attr(test, assert_instr(vpmaxsw))]
2125#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2126pub const fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2127    unsafe {
2128        let max = _mm512_max_epi16(a, b).as_i16x32();
2129        transmute(simd_select_bitmask(k, max, i16x32::ZERO))
2130    }
2131}
2132
2133/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2134///
2135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi16&expand=3568)
2136#[inline]
2137#[target_feature(enable = "avx512bw,avx512vl")]
2138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2139#[cfg_attr(test, assert_instr(vpmaxsw))]
2140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2141pub const fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2142    unsafe {
2143        let max = _mm256_max_epi16(a, b).as_i16x16();
2144        transmute(simd_select_bitmask(k, max, src.as_i16x16()))
2145    }
2146}
2147
2148/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2149///
2150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi16&expand=3569)
2151#[inline]
2152#[target_feature(enable = "avx512bw,avx512vl")]
2153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2154#[cfg_attr(test, assert_instr(vpmaxsw))]
2155#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2156pub const fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2157    unsafe {
2158        let max = _mm256_max_epi16(a, b).as_i16x16();
2159        transmute(simd_select_bitmask(k, max, i16x16::ZERO))
2160    }
2161}
2162
2163/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2164///
2165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi16&expand=3565)
2166#[inline]
2167#[target_feature(enable = "avx512bw,avx512vl")]
2168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2169#[cfg_attr(test, assert_instr(vpmaxsw))]
2170#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2171pub const fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2172    unsafe {
2173        let max = _mm_max_epi16(a, b).as_i16x8();
2174        transmute(simd_select_bitmask(k, max, src.as_i16x8()))
2175    }
2176}
2177
2178/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2179///
2180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi16&expand=3566)
2181#[inline]
2182#[target_feature(enable = "avx512bw,avx512vl")]
2183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2184#[cfg_attr(test, assert_instr(vpmaxsw))]
2185#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2186pub const fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2187    unsafe {
2188        let max = _mm_max_epi16(a, b).as_i16x8();
2189        transmute(simd_select_bitmask(k, max, i16x8::ZERO))
2190    }
2191}
2192
2193/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst.
2194///
2195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi8&expand=3600)
2196#[inline]
2197#[target_feature(enable = "avx512bw")]
2198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2199#[cfg_attr(test, assert_instr(vpmaxsb))]
2200#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2201pub const fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
2202    unsafe { simd_imax(a.as_i8x64(), b.as_i8x64()).as_m512i() }
2203}
2204
2205/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2206///
2207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi8&expand=3598)
2208#[inline]
2209#[target_feature(enable = "avx512bw")]
2210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2211#[cfg_attr(test, assert_instr(vpmaxsb))]
2212#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2213pub const fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2214    unsafe {
2215        let max = _mm512_max_epi8(a, b).as_i8x64();
2216        transmute(simd_select_bitmask(k, max, src.as_i8x64()))
2217    }
2218}
2219
2220/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2221///
2222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi8&expand=3599)
2223#[inline]
2224#[target_feature(enable = "avx512bw")]
2225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2226#[cfg_attr(test, assert_instr(vpmaxsb))]
2227#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2228pub const fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2229    unsafe {
2230        let max = _mm512_max_epi8(a, b).as_i8x64();
2231        transmute(simd_select_bitmask(k, max, i8x64::ZERO))
2232    }
2233}
2234
2235/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2236///
2237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi8&expand=3595)
2238#[inline]
2239#[target_feature(enable = "avx512bw,avx512vl")]
2240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2241#[cfg_attr(test, assert_instr(vpmaxsb))]
2242#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2243pub const fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2244    unsafe {
2245        let max = _mm256_max_epi8(a, b).as_i8x32();
2246        transmute(simd_select_bitmask(k, max, src.as_i8x32()))
2247    }
2248}
2249
2250/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2251///
2252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi8&expand=3596)
2253#[inline]
2254#[target_feature(enable = "avx512bw,avx512vl")]
2255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2256#[cfg_attr(test, assert_instr(vpmaxsb))]
2257#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2258pub const fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2259    unsafe {
2260        let max = _mm256_max_epi8(a, b).as_i8x32();
2261        transmute(simd_select_bitmask(k, max, i8x32::ZERO))
2262    }
2263}
2264
2265/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2266///
2267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi8&expand=3592)
2268#[inline]
2269#[target_feature(enable = "avx512bw,avx512vl")]
2270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2271#[cfg_attr(test, assert_instr(vpmaxsb))]
2272#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2273pub const fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2274    unsafe {
2275        let max = _mm_max_epi8(a, b).as_i8x16();
2276        transmute(simd_select_bitmask(k, max, src.as_i8x16()))
2277    }
2278}
2279
2280/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2281///
2282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi8&expand=3593)
2283#[inline]
2284#[target_feature(enable = "avx512bw,avx512vl")]
2285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2286#[cfg_attr(test, assert_instr(vpmaxsb))]
2287#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2288pub const fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2289    unsafe {
2290        let max = _mm_max_epi8(a, b).as_i8x16();
2291        transmute(simd_select_bitmask(k, max, i8x16::ZERO))
2292    }
2293}
2294
2295/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst.
2296///
2297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu16&expand=3723)
2298#[inline]
2299#[target_feature(enable = "avx512bw")]
2300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2301#[cfg_attr(test, assert_instr(vpminuw))]
2302#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2303pub const fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
2304    unsafe { simd_imin(a.as_u16x32(), b.as_u16x32()).as_m512i() }
2305}
2306
2307/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2308///
2309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu16&expand=3721)
2310#[inline]
2311#[target_feature(enable = "avx512bw")]
2312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2313#[cfg_attr(test, assert_instr(vpminuw))]
2314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2315pub const fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2316    unsafe {
2317        let min = _mm512_min_epu16(a, b).as_u16x32();
2318        transmute(simd_select_bitmask(k, min, src.as_u16x32()))
2319    }
2320}
2321
2322/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2323///
2324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu16&expand=3722)
2325#[inline]
2326#[target_feature(enable = "avx512bw")]
2327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2328#[cfg_attr(test, assert_instr(vpminuw))]
2329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2330pub const fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2331    unsafe {
2332        let min = _mm512_min_epu16(a, b).as_u16x32();
2333        transmute(simd_select_bitmask(k, min, u16x32::ZERO))
2334    }
2335}
2336
2337/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2338///
2339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu16&expand=3718)
2340#[inline]
2341#[target_feature(enable = "avx512bw,avx512vl")]
2342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2343#[cfg_attr(test, assert_instr(vpminuw))]
2344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2345pub const fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2346    unsafe {
2347        let min = _mm256_min_epu16(a, b).as_u16x16();
2348        transmute(simd_select_bitmask(k, min, src.as_u16x16()))
2349    }
2350}
2351
2352/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2353///
2354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu16&expand=3719)
2355#[inline]
2356#[target_feature(enable = "avx512bw,avx512vl")]
2357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2358#[cfg_attr(test, assert_instr(vpminuw))]
2359#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2360pub const fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2361    unsafe {
2362        let min = _mm256_min_epu16(a, b).as_u16x16();
2363        transmute(simd_select_bitmask(k, min, u16x16::ZERO))
2364    }
2365}
2366
2367/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2368///
2369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu16&expand=3715)
2370#[inline]
2371#[target_feature(enable = "avx512bw,avx512vl")]
2372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2373#[cfg_attr(test, assert_instr(vpminuw))]
2374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2375pub const fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2376    unsafe {
2377        let min = _mm_min_epu16(a, b).as_u16x8();
2378        transmute(simd_select_bitmask(k, min, src.as_u16x8()))
2379    }
2380}
2381
2382/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2383///
2384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu16&expand=3716)
2385#[inline]
2386#[target_feature(enable = "avx512bw,avx512vl")]
2387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2388#[cfg_attr(test, assert_instr(vpminuw))]
2389#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2390pub const fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2391    unsafe {
2392        let min = _mm_min_epu16(a, b).as_u16x8();
2393        transmute(simd_select_bitmask(k, min, u16x8::ZERO))
2394    }
2395}
2396
2397/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst.
2398///
2399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu8&expand=3750)
2400#[inline]
2401#[target_feature(enable = "avx512bw")]
2402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2403#[cfg_attr(test, assert_instr(vpminub))]
2404#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2405pub const fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
2406    unsafe { simd_imin(a.as_u8x64(), b.as_u8x64()).as_m512i() }
2407}
2408
2409/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2410///
2411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu8&expand=3748)
2412#[inline]
2413#[target_feature(enable = "avx512bw")]
2414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2415#[cfg_attr(test, assert_instr(vpminub))]
2416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2417pub const fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2418    unsafe {
2419        let min = _mm512_min_epu8(a, b).as_u8x64();
2420        transmute(simd_select_bitmask(k, min, src.as_u8x64()))
2421    }
2422}
2423
2424/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2425///
2426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu8&expand=3749)
2427#[inline]
2428#[target_feature(enable = "avx512bw")]
2429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2430#[cfg_attr(test, assert_instr(vpminub))]
2431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2432pub const fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2433    unsafe {
2434        let min = _mm512_min_epu8(a, b).as_u8x64();
2435        transmute(simd_select_bitmask(k, min, u8x64::ZERO))
2436    }
2437}
2438
2439/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2440///
2441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu8&expand=3745)
2442#[inline]
2443#[target_feature(enable = "avx512bw,avx512vl")]
2444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2445#[cfg_attr(test, assert_instr(vpminub))]
2446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2447pub const fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2448    unsafe {
2449        let min = _mm256_min_epu8(a, b).as_u8x32();
2450        transmute(simd_select_bitmask(k, min, src.as_u8x32()))
2451    }
2452}
2453
2454/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2455///
2456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu8&expand=3746)
2457#[inline]
2458#[target_feature(enable = "avx512bw,avx512vl")]
2459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2460#[cfg_attr(test, assert_instr(vpminub))]
2461#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2462pub const fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2463    unsafe {
2464        let min = _mm256_min_epu8(a, b).as_u8x32();
2465        transmute(simd_select_bitmask(k, min, u8x32::ZERO))
2466    }
2467}
2468
2469/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2470///
2471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu8&expand=3742)
2472#[inline]
2473#[target_feature(enable = "avx512bw,avx512vl")]
2474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2475#[cfg_attr(test, assert_instr(vpminub))]
2476#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2477pub const fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2478    unsafe {
2479        let min = _mm_min_epu8(a, b).as_u8x16();
2480        transmute(simd_select_bitmask(k, min, src.as_u8x16()))
2481    }
2482}
2483
2484/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2485///
2486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu8&expand=3743)
2487#[inline]
2488#[target_feature(enable = "avx512bw,avx512vl")]
2489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2490#[cfg_attr(test, assert_instr(vpminub))]
2491#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2492pub const fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2493    unsafe {
2494        let min = _mm_min_epu8(a, b).as_u8x16();
2495        transmute(simd_select_bitmask(k, min, u8x16::ZERO))
2496    }
2497}
2498
2499/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst.
2500///
2501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi16&expand=3687)
2502#[inline]
2503#[target_feature(enable = "avx512bw")]
2504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2505#[cfg_attr(test, assert_instr(vpminsw))]
2506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2507pub const fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
2508    unsafe { simd_imin(a.as_i16x32(), b.as_i16x32()).as_m512i() }
2509}
2510
2511/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2512///
2513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi16&expand=3685)
2514#[inline]
2515#[target_feature(enable = "avx512bw")]
2516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2517#[cfg_attr(test, assert_instr(vpminsw))]
2518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2519pub const fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2520    unsafe {
2521        let min = _mm512_min_epi16(a, b).as_i16x32();
2522        transmute(simd_select_bitmask(k, min, src.as_i16x32()))
2523    }
2524}
2525
2526/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2527///
2528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi16&expand=3686)
2529#[inline]
2530#[target_feature(enable = "avx512bw")]
2531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2532#[cfg_attr(test, assert_instr(vpminsw))]
2533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2534pub const fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2535    unsafe {
2536        let min = _mm512_min_epi16(a, b).as_i16x32();
2537        transmute(simd_select_bitmask(k, min, i16x32::ZERO))
2538    }
2539}
2540
2541/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2542///
2543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi16&expand=3682)
2544#[inline]
2545#[target_feature(enable = "avx512bw,avx512vl")]
2546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2547#[cfg_attr(test, assert_instr(vpminsw))]
2548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2549pub const fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2550    unsafe {
2551        let min = _mm256_min_epi16(a, b).as_i16x16();
2552        transmute(simd_select_bitmask(k, min, src.as_i16x16()))
2553    }
2554}
2555
2556/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2557///
2558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi16&expand=3683)
2559#[inline]
2560#[target_feature(enable = "avx512bw,avx512vl")]
2561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2562#[cfg_attr(test, assert_instr(vpminsw))]
2563#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2564pub const fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2565    unsafe {
2566        let min = _mm256_min_epi16(a, b).as_i16x16();
2567        transmute(simd_select_bitmask(k, min, i16x16::ZERO))
2568    }
2569}
2570
2571/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2572///
2573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi16&expand=3679)
2574#[inline]
2575#[target_feature(enable = "avx512bw,avx512vl")]
2576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2577#[cfg_attr(test, assert_instr(vpminsw))]
2578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2579pub const fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2580    unsafe {
2581        let min = _mm_min_epi16(a, b).as_i16x8();
2582        transmute(simd_select_bitmask(k, min, src.as_i16x8()))
2583    }
2584}
2585
2586/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2587///
2588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi16&expand=3680)
2589#[inline]
2590#[target_feature(enable = "avx512bw,avx512vl")]
2591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2592#[cfg_attr(test, assert_instr(vpminsw))]
2593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2594pub const fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2595    unsafe {
2596        let min = _mm_min_epi16(a, b).as_i16x8();
2597        transmute(simd_select_bitmask(k, min, i16x8::ZERO))
2598    }
2599}
2600
2601/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst.
2602///
2603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi8&expand=3714)
2604#[inline]
2605#[target_feature(enable = "avx512bw")]
2606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2607#[cfg_attr(test, assert_instr(vpminsb))]
2608#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2609pub const fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
2610    unsafe { simd_imin(a.as_i8x64(), b.as_i8x64()).as_m512i() }
2611}
2612
2613/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2614///
2615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi8&expand=3712)
2616#[inline]
2617#[target_feature(enable = "avx512bw")]
2618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2619#[cfg_attr(test, assert_instr(vpminsb))]
2620#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2621pub const fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2622    unsafe {
2623        let min = _mm512_min_epi8(a, b).as_i8x64();
2624        transmute(simd_select_bitmask(k, min, src.as_i8x64()))
2625    }
2626}
2627
2628/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2629///
2630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi8&expand=3713)
2631#[inline]
2632#[target_feature(enable = "avx512bw")]
2633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2634#[cfg_attr(test, assert_instr(vpminsb))]
2635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2636pub const fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2637    unsafe {
2638        let min = _mm512_min_epi8(a, b).as_i8x64();
2639        transmute(simd_select_bitmask(k, min, i8x64::ZERO))
2640    }
2641}
2642
2643/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2644///
2645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi8&expand=3709)
2646#[inline]
2647#[target_feature(enable = "avx512bw,avx512vl")]
2648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2649#[cfg_attr(test, assert_instr(vpminsb))]
2650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2651pub const fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2652    unsafe {
2653        let min = _mm256_min_epi8(a, b).as_i8x32();
2654        transmute(simd_select_bitmask(k, min, src.as_i8x32()))
2655    }
2656}
2657
2658/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2659///
2660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi8&expand=3710)
2661#[inline]
2662#[target_feature(enable = "avx512bw,avx512vl")]
2663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2664#[cfg_attr(test, assert_instr(vpminsb))]
2665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2666pub const fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2667    unsafe {
2668        let min = _mm256_min_epi8(a, b).as_i8x32();
2669        transmute(simd_select_bitmask(k, min, i8x32::ZERO))
2670    }
2671}
2672
2673/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2674///
2675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi8&expand=3706)
2676#[inline]
2677#[target_feature(enable = "avx512bw,avx512vl")]
2678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2679#[cfg_attr(test, assert_instr(vpminsb))]
2680#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2681pub const fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2682    unsafe {
2683        let min = _mm_min_epi8(a, b).as_i8x16();
2684        transmute(simd_select_bitmask(k, min, src.as_i8x16()))
2685    }
2686}
2687
2688/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2689///
2690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi8&expand=3707)
2691#[inline]
2692#[target_feature(enable = "avx512bw,avx512vl")]
2693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2694#[cfg_attr(test, assert_instr(vpminsb))]
2695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2696pub const fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2697    unsafe {
2698        let min = _mm_min_epi8(a, b).as_i8x16();
2699        transmute(simd_select_bitmask(k, min, i8x16::ZERO))
2700    }
2701}
2702
2703/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2704///
2705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu16_mask&expand=1050)
2706#[inline]
2707#[target_feature(enable = "avx512bw")]
2708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2709#[cfg_attr(test, assert_instr(vpcmp))]
2710#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2711pub const fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2712    unsafe { simd_bitmask::<u16x32, _>(simd_lt(a.as_u16x32(), b.as_u16x32())) }
2713}
2714
2715/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2716///
2717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu16_mask&expand=1051)
2718#[inline]
2719#[target_feature(enable = "avx512bw")]
2720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2721#[cfg_attr(test, assert_instr(vpcmp))]
2722#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2723pub const fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2724    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2725}
2726
2727/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2728///
2729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu16_mask&expand=1050)
2730#[inline]
2731#[target_feature(enable = "avx512bw,avx512vl")]
2732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2733#[cfg_attr(test, assert_instr(vpcmp))]
2734#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2735pub const fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2736    unsafe { simd_bitmask::<u16x16, _>(simd_lt(a.as_u16x16(), b.as_u16x16())) }
2737}
2738
2739/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2740///
2741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu16_mask&expand=1049)
2742#[inline]
2743#[target_feature(enable = "avx512bw,avx512vl")]
2744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2745#[cfg_attr(test, assert_instr(vpcmp))]
2746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2747pub const fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2748    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2749}
2750
2751/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2752///
2753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu16_mask&expand=1018)
2754#[inline]
2755#[target_feature(enable = "avx512bw,avx512vl")]
2756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2757#[cfg_attr(test, assert_instr(vpcmp))]
2758#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2759pub const fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2760    unsafe { simd_bitmask::<u16x8, _>(simd_lt(a.as_u16x8(), b.as_u16x8())) }
2761}
2762
2763/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2764///
2765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu16_mask&expand=1019)
2766#[inline]
2767#[target_feature(enable = "avx512bw,avx512vl")]
2768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2769#[cfg_attr(test, assert_instr(vpcmp))]
2770#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2771pub const fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2772    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2773}
2774
2775/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2776///
2777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_cmplt_epu8_mask&expand=1068)
2778#[inline]
2779#[target_feature(enable = "avx512bw")]
2780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2781#[cfg_attr(test, assert_instr(vpcmp))]
2782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2783pub const fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2784    unsafe { simd_bitmask::<u8x64, _>(simd_lt(a.as_u8x64(), b.as_u8x64())) }
2785}
2786
2787/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2788///
2789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu8_mask&expand=1069)
2790#[inline]
2791#[target_feature(enable = "avx512bw")]
2792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2793#[cfg_attr(test, assert_instr(vpcmp))]
2794#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2795pub const fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2796    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2797}
2798
2799/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2800///
2801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu8_mask&expand=1066)
2802#[inline]
2803#[target_feature(enable = "avx512bw,avx512vl")]
2804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2805#[cfg_attr(test, assert_instr(vpcmp))]
2806#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2807pub const fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2808    unsafe { simd_bitmask::<u8x32, _>(simd_lt(a.as_u8x32(), b.as_u8x32())) }
2809}
2810
2811/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2812///
2813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu8_mask&expand=1067)
2814#[inline]
2815#[target_feature(enable = "avx512bw,avx512vl")]
2816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2817#[cfg_attr(test, assert_instr(vpcmp))]
2818#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2819pub const fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2820    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2821}
2822
2823/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2824///
2825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu8_mask&expand=1064)
2826#[inline]
2827#[target_feature(enable = "avx512bw,avx512vl")]
2828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2829#[cfg_attr(test, assert_instr(vpcmp))]
2830#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2831pub const fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2832    unsafe { simd_bitmask::<u8x16, _>(simd_lt(a.as_u8x16(), b.as_u8x16())) }
2833}
2834
2835/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2836///
2837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu8_mask&expand=1065)
2838#[inline]
2839#[target_feature(enable = "avx512bw,avx512vl")]
2840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2841#[cfg_attr(test, assert_instr(vpcmp))]
2842#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2843pub const fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2844    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2845}
2846
2847/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2848///
2849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi16_mask&expand=1022)
2850#[inline]
2851#[target_feature(enable = "avx512bw")]
2852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2853#[cfg_attr(test, assert_instr(vpcmp))]
2854#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2855pub const fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2856    unsafe { simd_bitmask::<i16x32, _>(simd_lt(a.as_i16x32(), b.as_i16x32())) }
2857}
2858
2859/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2860///
2861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi16_mask&expand=1023)
2862#[inline]
2863#[target_feature(enable = "avx512bw")]
2864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2865#[cfg_attr(test, assert_instr(vpcmp))]
2866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2867pub const fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2868    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2869}
2870
2871/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2872///
2873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi16_mask&expand=1020)
2874#[inline]
2875#[target_feature(enable = "avx512bw,avx512vl")]
2876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2877#[cfg_attr(test, assert_instr(vpcmp))]
2878#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2879pub const fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2880    unsafe { simd_bitmask::<i16x16, _>(simd_lt(a.as_i16x16(), b.as_i16x16())) }
2881}
2882
2883/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2884///
2885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi16_mask&expand=1021)
2886#[inline]
2887#[target_feature(enable = "avx512bw,avx512vl")]
2888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2889#[cfg_attr(test, assert_instr(vpcmp))]
2890#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2891pub const fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2892    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2893}
2894
2895/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2896///
2897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16_mask&expand=1018)
2898#[inline]
2899#[target_feature(enable = "avx512bw,avx512vl")]
2900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2901#[cfg_attr(test, assert_instr(vpcmp))]
2902#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2903pub const fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2904    unsafe { simd_bitmask::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
2905}
2906
2907/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2908///
2909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi16_mask&expand=1019)
2910#[inline]
2911#[target_feature(enable = "avx512bw,avx512vl")]
2912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2913#[cfg_attr(test, assert_instr(vpcmp))]
2914#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2915pub const fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2916    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2917}
2918
2919/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2920///
2921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi8_mask&expand=1044)
2922#[inline]
2923#[target_feature(enable = "avx512bw")]
2924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2925#[cfg_attr(test, assert_instr(vpcmp))]
2926#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2927pub const fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2928    unsafe { simd_bitmask::<i8x64, _>(simd_lt(a.as_i8x64(), b.as_i8x64())) }
2929}
2930
2931/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2932///
2933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi8_mask&expand=1045)
2934#[inline]
2935#[target_feature(enable = "avx512bw")]
2936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2937#[cfg_attr(test, assert_instr(vpcmp))]
2938#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2939pub const fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2940    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2941}
2942
2943/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2944///
2945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi8_mask&expand=1042)
2946#[inline]
2947#[target_feature(enable = "avx512bw,avx512vl")]
2948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2949#[cfg_attr(test, assert_instr(vpcmp))]
2950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2951pub const fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2952    unsafe { simd_bitmask::<i8x32, _>(simd_lt(a.as_i8x32(), b.as_i8x32())) }
2953}
2954
2955/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2956///
2957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi8_mask&expand=1043)
2958#[inline]
2959#[target_feature(enable = "avx512bw,avx512vl")]
2960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2961#[cfg_attr(test, assert_instr(vpcmp))]
2962#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2963pub const fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2964    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2965}
2966
2967/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2968///
2969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8_mask&expand=1040)
2970#[inline]
2971#[target_feature(enable = "avx512bw,avx512vl")]
2972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2973#[cfg_attr(test, assert_instr(vpcmp))]
2974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2975pub const fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2976    unsafe { simd_bitmask::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
2977}
2978
2979/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2980///
2981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi8_mask&expand=1041)
2982#[inline]
2983#[target_feature(enable = "avx512bw,avx512vl")]
2984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2985#[cfg_attr(test, assert_instr(vpcmp))]
2986#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2987pub const fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2988    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2989}
2990
2991/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2992///
2993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu16_mask&expand=927)
2994#[inline]
2995#[target_feature(enable = "avx512bw")]
2996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2997#[cfg_attr(test, assert_instr(vpcmp))]
2998#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2999pub const fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3000    unsafe { simd_bitmask::<u16x32, _>(simd_gt(a.as_u16x32(), b.as_u16x32())) }
3001}
3002
3003/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3004///
3005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu16_mask&expand=928)
3006#[inline]
3007#[target_feature(enable = "avx512bw")]
3008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3009#[cfg_attr(test, assert_instr(vpcmp))]
3010#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3011pub const fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3012    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3013}
3014
3015/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3016///
3017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu16_mask&expand=925)
3018#[inline]
3019#[target_feature(enable = "avx512bw,avx512vl")]
3020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3021#[cfg_attr(test, assert_instr(vpcmp))]
3022#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3023pub const fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3024    unsafe { simd_bitmask::<u16x16, _>(simd_gt(a.as_u16x16(), b.as_u16x16())) }
3025}
3026
3027/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3028///
3029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu16_mask&expand=926)
3030#[inline]
3031#[target_feature(enable = "avx512bw,avx512vl")]
3032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3033#[cfg_attr(test, assert_instr(vpcmp))]
3034#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3035pub const fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3036    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3037}
3038
3039/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3040///
3041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu16_mask&expand=923)
3042#[inline]
3043#[target_feature(enable = "avx512bw,avx512vl")]
3044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3045#[cfg_attr(test, assert_instr(vpcmp))]
3046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3047pub const fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3048    unsafe { simd_bitmask::<u16x8, _>(simd_gt(a.as_u16x8(), b.as_u16x8())) }
3049}
3050
3051/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3052///
3053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu16_mask&expand=924)
3054#[inline]
3055#[target_feature(enable = "avx512bw,avx512vl")]
3056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3057#[cfg_attr(test, assert_instr(vpcmp))]
3058#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3059pub const fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3060    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3061}
3062
3063/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3064///
3065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu8_mask&expand=945)
3066#[inline]
3067#[target_feature(enable = "avx512bw")]
3068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3069#[cfg_attr(test, assert_instr(vpcmp))]
3070#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3071pub const fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3072    unsafe { simd_bitmask::<u8x64, _>(simd_gt(a.as_u8x64(), b.as_u8x64())) }
3073}
3074
3075/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3076///
3077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu8_mask&expand=946)
3078#[inline]
3079#[target_feature(enable = "avx512bw")]
3080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3081#[cfg_attr(test, assert_instr(vpcmp))]
3082#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3083pub const fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3084    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3085}
3086
3087/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3088///
3089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu8_mask&expand=943)
3090#[inline]
3091#[target_feature(enable = "avx512bw,avx512vl")]
3092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3093#[cfg_attr(test, assert_instr(vpcmp))]
3094#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3095pub const fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3096    unsafe { simd_bitmask::<u8x32, _>(simd_gt(a.as_u8x32(), b.as_u8x32())) }
3097}
3098
3099/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3100///
3101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu8_mask&expand=944)
3102#[inline]
3103#[target_feature(enable = "avx512bw,avx512vl")]
3104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3105#[cfg_attr(test, assert_instr(vpcmp))]
3106#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3107pub const fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3108    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3109}
3110
3111/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3112///
3113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu8_mask&expand=941)
3114#[inline]
3115#[target_feature(enable = "avx512bw,avx512vl")]
3116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3117#[cfg_attr(test, assert_instr(vpcmp))]
3118#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3119pub const fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3120    unsafe { simd_bitmask::<u8x16, _>(simd_gt(a.as_u8x16(), b.as_u8x16())) }
3121}
3122
3123/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3124///
3125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu8_mask&expand=942)
3126#[inline]
3127#[target_feature(enable = "avx512bw,avx512vl")]
3128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3129#[cfg_attr(test, assert_instr(vpcmp))]
3130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3131pub const fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3132    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3133}
3134
3135/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3136///
3137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi16_mask&expand=897)
3138#[inline]
3139#[target_feature(enable = "avx512bw")]
3140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3141#[cfg_attr(test, assert_instr(vpcmp))]
3142#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3143pub const fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3144    unsafe { simd_bitmask::<i16x32, _>(simd_gt(a.as_i16x32(), b.as_i16x32())) }
3145}
3146
3147/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3148///
3149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi16_mask&expand=898)
3150#[inline]
3151#[target_feature(enable = "avx512bw")]
3152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3153#[cfg_attr(test, assert_instr(vpcmp))]
3154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3155pub const fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3156    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3157}
3158
3159/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3160///
3161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16_mask&expand=895)
3162#[inline]
3163#[target_feature(enable = "avx512bw,avx512vl")]
3164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3165#[cfg_attr(test, assert_instr(vpcmp))]
3166#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3167pub const fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3168    unsafe { simd_bitmask::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
3169}
3170
3171/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3172///
3173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi16_mask&expand=896)
3174#[inline]
3175#[target_feature(enable = "avx512bw,avx512vl")]
3176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3177#[cfg_attr(test, assert_instr(vpcmp))]
3178#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3179pub const fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3180    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3181}
3182
3183/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3184///
3185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16_mask&expand=893)
3186#[inline]
3187#[target_feature(enable = "avx512bw,avx512vl")]
3188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3189#[cfg_attr(test, assert_instr(vpcmp))]
3190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3191pub const fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3192    unsafe { simd_bitmask::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
3193}
3194
3195/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3196///
3197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi16_mask&expand=894)
3198#[inline]
3199#[target_feature(enable = "avx512bw,avx512vl")]
3200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3201#[cfg_attr(test, assert_instr(vpcmp))]
3202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3203pub const fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3204    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3205}
3206
3207/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3208///
3209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi8_mask&expand=921)
3210#[inline]
3211#[target_feature(enable = "avx512bw")]
3212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3213#[cfg_attr(test, assert_instr(vpcmp))]
3214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3215pub const fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3216    unsafe { simd_bitmask::<i8x64, _>(simd_gt(a.as_i8x64(), b.as_i8x64())) }
3217}
3218
3219/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3220///
3221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi8_mask&expand=922)
3222#[inline]
3223#[target_feature(enable = "avx512bw")]
3224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3225#[cfg_attr(test, assert_instr(vpcmp))]
3226#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3227pub const fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3228    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3229}
3230
3231/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3232///
3233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8_mask&expand=919)
3234#[inline]
3235#[target_feature(enable = "avx512bw,avx512vl")]
3236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3237#[cfg_attr(test, assert_instr(vpcmp))]
3238#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3239pub const fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3240    unsafe { simd_bitmask::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
3241}
3242
3243/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3244///
3245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi8_mask&expand=920)
3246#[inline]
3247#[target_feature(enable = "avx512bw,avx512vl")]
3248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3249#[cfg_attr(test, assert_instr(vpcmp))]
3250#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3251pub const fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3252    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3253}
3254
3255/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3256///
3257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8_mask&expand=917)
3258#[inline]
3259#[target_feature(enable = "avx512bw,avx512vl")]
3260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3261#[cfg_attr(test, assert_instr(vpcmp))]
3262#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3263pub const fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3264    unsafe { simd_bitmask::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
3265}
3266
3267/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3268///
3269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi8_mask&expand=918)
3270#[inline]
3271#[target_feature(enable = "avx512bw,avx512vl")]
3272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3273#[cfg_attr(test, assert_instr(vpcmp))]
3274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3275pub const fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3276    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3277}
3278
3279/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3280///
3281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu16_mask&expand=989)
3282#[inline]
3283#[target_feature(enable = "avx512bw")]
3284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3285#[cfg_attr(test, assert_instr(vpcmp))]
3286#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3287pub const fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3288    unsafe { simd_bitmask::<u16x32, _>(simd_le(a.as_u16x32(), b.as_u16x32())) }
3289}
3290
3291/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3292///
3293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu16_mask&expand=990)
3294#[inline]
3295#[target_feature(enable = "avx512bw")]
3296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3297#[cfg_attr(test, assert_instr(vpcmp))]
3298#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3299pub const fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3300    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3301}
3302
3303/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3304///
3305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu16_mask&expand=987)
3306#[inline]
3307#[target_feature(enable = "avx512bw,avx512vl")]
3308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3309#[cfg_attr(test, assert_instr(vpcmp))]
3310#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3311pub const fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3312    unsafe { simd_bitmask::<u16x16, _>(simd_le(a.as_u16x16(), b.as_u16x16())) }
3313}
3314
3315/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3316///
3317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu16_mask&expand=988)
3318#[inline]
3319#[target_feature(enable = "avx512bw,avx512vl")]
3320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3321#[cfg_attr(test, assert_instr(vpcmp))]
3322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3323pub const fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3324    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3325}
3326
3327/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3328///
3329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu16_mask&expand=985)
3330#[inline]
3331#[target_feature(enable = "avx512bw,avx512vl")]
3332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3333#[cfg_attr(test, assert_instr(vpcmp))]
3334#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3335pub const fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3336    unsafe { simd_bitmask::<u16x8, _>(simd_le(a.as_u16x8(), b.as_u16x8())) }
3337}
3338
3339/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3340///
3341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu16_mask&expand=986)
3342#[inline]
3343#[target_feature(enable = "avx512bw,avx512vl")]
3344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3345#[cfg_attr(test, assert_instr(vpcmp))]
3346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3347pub const fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3348    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3349}
3350
3351/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3352///
3353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu8_mask&expand=1007)
3354#[inline]
3355#[target_feature(enable = "avx512bw")]
3356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3357#[cfg_attr(test, assert_instr(vpcmp))]
3358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3359pub const fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3360    unsafe { simd_bitmask::<u8x64, _>(simd_le(a.as_u8x64(), b.as_u8x64())) }
3361}
3362
3363/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3364///
3365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu8_mask&expand=1008)
3366#[inline]
3367#[target_feature(enable = "avx512bw")]
3368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3369#[cfg_attr(test, assert_instr(vpcmp))]
3370#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3371pub const fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3372    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3373}
3374
3375/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3376///
3377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu8_mask&expand=1005)
3378#[inline]
3379#[target_feature(enable = "avx512bw,avx512vl")]
3380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3381#[cfg_attr(test, assert_instr(vpcmp))]
3382#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3383pub const fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3384    unsafe { simd_bitmask::<u8x32, _>(simd_le(a.as_u8x32(), b.as_u8x32())) }
3385}
3386
3387/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3388///
3389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu8_mask&expand=1006)
3390#[inline]
3391#[target_feature(enable = "avx512bw,avx512vl")]
3392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3393#[cfg_attr(test, assert_instr(vpcmp))]
3394#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3395pub const fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3396    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3397}
3398
3399/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3400///
3401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu8_mask&expand=1003)
3402#[inline]
3403#[target_feature(enable = "avx512bw,avx512vl")]
3404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3405#[cfg_attr(test, assert_instr(vpcmp))]
3406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3407pub const fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3408    unsafe { simd_bitmask::<u8x16, _>(simd_le(a.as_u8x16(), b.as_u8x16())) }
3409}
3410
3411/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3412///
3413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu8_mask&expand=1004)
3414#[inline]
3415#[target_feature(enable = "avx512bw,avx512vl")]
3416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3417#[cfg_attr(test, assert_instr(vpcmp))]
3418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3419pub const fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3420    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3421}
3422
3423/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3424///
3425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi16_mask&expand=965)
3426#[inline]
3427#[target_feature(enable = "avx512bw")]
3428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3429#[cfg_attr(test, assert_instr(vpcmp))]
3430#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3431pub const fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3432    unsafe { simd_bitmask::<i16x32, _>(simd_le(a.as_i16x32(), b.as_i16x32())) }
3433}
3434
3435/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3436///
3437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi16_mask&expand=966)
3438#[inline]
3439#[target_feature(enable = "avx512bw")]
3440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3441#[cfg_attr(test, assert_instr(vpcmp))]
3442#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3443pub const fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3444    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3445}
3446
3447/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3448///
3449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi16_mask&expand=963)
3450#[inline]
3451#[target_feature(enable = "avx512bw,avx512vl")]
3452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3453#[cfg_attr(test, assert_instr(vpcmp))]
3454#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3455pub const fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3456    unsafe { simd_bitmask::<i16x16, _>(simd_le(a.as_i16x16(), b.as_i16x16())) }
3457}
3458
3459/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3460///
3461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi16_mask&expand=964)
3462#[inline]
3463#[target_feature(enable = "avx512bw,avx512vl")]
3464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3465#[cfg_attr(test, assert_instr(vpcmp))]
3466#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3467pub const fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3468    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3469}
3470
3471/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3472///
3473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi16_mask&expand=961)
3474#[inline]
3475#[target_feature(enable = "avx512bw,avx512vl")]
3476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3477#[cfg_attr(test, assert_instr(vpcmp))]
3478#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3479pub const fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3480    unsafe { simd_bitmask::<i16x8, _>(simd_le(a.as_i16x8(), b.as_i16x8())) }
3481}
3482
3483/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3484///
3485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi16_mask&expand=962)
3486#[inline]
3487#[target_feature(enable = "avx512bw,avx512vl")]
3488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3489#[cfg_attr(test, assert_instr(vpcmp))]
3490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3491pub const fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3492    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3493}
3494
3495/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3496///
3497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi8_mask&expand=983)
3498#[inline]
3499#[target_feature(enable = "avx512bw")]
3500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3501#[cfg_attr(test, assert_instr(vpcmp))]
3502#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3503pub const fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3504    unsafe { simd_bitmask::<i8x64, _>(simd_le(a.as_i8x64(), b.as_i8x64())) }
3505}
3506
3507/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3508///
3509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi8_mask&expand=984)
3510#[inline]
3511#[target_feature(enable = "avx512bw")]
3512#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3513#[cfg_attr(test, assert_instr(vpcmp))]
3514#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3515pub const fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3516    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3517}
3518
3519/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3520///
3521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi8_mask&expand=981)
3522#[inline]
3523#[target_feature(enable = "avx512bw,avx512vl")]
3524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3525#[cfg_attr(test, assert_instr(vpcmp))]
3526#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3527pub const fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3528    unsafe { simd_bitmask::<i8x32, _>(simd_le(a.as_i8x32(), b.as_i8x32())) }
3529}
3530
3531/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3532///
3533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi8_mask&expand=982)
3534#[inline]
3535#[target_feature(enable = "avx512bw,avx512vl")]
3536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3537#[cfg_attr(test, assert_instr(vpcmp))]
3538#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3539pub const fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3540    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3541}
3542
3543/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3544///
3545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi8_mask&expand=979)
3546#[inline]
3547#[target_feature(enable = "avx512bw,avx512vl")]
3548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3549#[cfg_attr(test, assert_instr(vpcmp))]
3550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3551pub const fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3552    unsafe { simd_bitmask::<i8x16, _>(simd_le(a.as_i8x16(), b.as_i8x16())) }
3553}
3554
3555/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3556///
3557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi8_mask&expand=980)
3558#[inline]
3559#[target_feature(enable = "avx512bw,avx512vl")]
3560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3561#[cfg_attr(test, assert_instr(vpcmp))]
3562#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3563pub const fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3564    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3565}
3566
3567/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3568///
3569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu16_mask&expand=867)
3570#[inline]
3571#[target_feature(enable = "avx512bw")]
3572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3573#[cfg_attr(test, assert_instr(vpcmp))]
3574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3575pub const fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3576    unsafe { simd_bitmask::<u16x32, _>(simd_ge(a.as_u16x32(), b.as_u16x32())) }
3577}
3578
3579/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3580///
3581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu16_mask&expand=868)
3582#[inline]
3583#[target_feature(enable = "avx512bw")]
3584#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3585#[cfg_attr(test, assert_instr(vpcmp))]
3586#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3587pub const fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3588    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3589}
3590
3591/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3592///
3593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu16_mask&expand=865)
3594#[inline]
3595#[target_feature(enable = "avx512bw,avx512vl")]
3596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3597#[cfg_attr(test, assert_instr(vpcmp))]
3598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3599pub const fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3600    unsafe { simd_bitmask::<u16x16, _>(simd_ge(a.as_u16x16(), b.as_u16x16())) }
3601}
3602
3603/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3604///
3605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu16_mask&expand=866)
3606#[inline]
3607#[target_feature(enable = "avx512bw,avx512vl")]
3608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3609#[cfg_attr(test, assert_instr(vpcmp))]
3610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3611pub const fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3612    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3613}
3614
3615/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3616///
3617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu16_mask&expand=863)
3618#[inline]
3619#[target_feature(enable = "avx512bw,avx512vl")]
3620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3621#[cfg_attr(test, assert_instr(vpcmp))]
3622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3623pub const fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3624    unsafe { simd_bitmask::<u16x8, _>(simd_ge(a.as_u16x8(), b.as_u16x8())) }
3625}
3626
3627/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3628///
3629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu16_mask&expand=864)
3630#[inline]
3631#[target_feature(enable = "avx512bw,avx512vl")]
3632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3633#[cfg_attr(test, assert_instr(vpcmp))]
3634#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3635pub const fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3636    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3637}
3638
3639/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3640///
3641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu8_mask&expand=885)
3642#[inline]
3643#[target_feature(enable = "avx512bw")]
3644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3645#[cfg_attr(test, assert_instr(vpcmp))]
3646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3647pub const fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3648    unsafe { simd_bitmask::<u8x64, _>(simd_ge(a.as_u8x64(), b.as_u8x64())) }
3649}
3650
3651/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3652///
3653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu8_mask&expand=886)
3654#[inline]
3655#[target_feature(enable = "avx512bw")]
3656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3657#[cfg_attr(test, assert_instr(vpcmp))]
3658#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3659pub const fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3660    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3661}
3662
3663/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3664///
3665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu8_mask&expand=883)
3666#[inline]
3667#[target_feature(enable = "avx512bw,avx512vl")]
3668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3669#[cfg_attr(test, assert_instr(vpcmp))]
3670#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3671pub const fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3672    unsafe { simd_bitmask::<u8x32, _>(simd_ge(a.as_u8x32(), b.as_u8x32())) }
3673}
3674
3675/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3676///
3677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu8_mask&expand=884)
3678#[inline]
3679#[target_feature(enable = "avx512bw,avx512vl")]
3680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3681#[cfg_attr(test, assert_instr(vpcmp))]
3682#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3683pub const fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3684    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3685}
3686
3687/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3688///
3689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu8_mask&expand=881)
3690#[inline]
3691#[target_feature(enable = "avx512bw,avx512vl")]
3692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3693#[cfg_attr(test, assert_instr(vpcmp))]
3694#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3695pub const fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3696    unsafe { simd_bitmask::<u8x16, _>(simd_ge(a.as_u8x16(), b.as_u8x16())) }
3697}
3698
3699/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3700///
3701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu8_mask&expand=882)
3702#[inline]
3703#[target_feature(enable = "avx512bw,avx512vl")]
3704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3705#[cfg_attr(test, assert_instr(vpcmp))]
3706#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3707pub const fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3708    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3709}
3710
3711/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3712///
3713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi16_mask&expand=843)
3714#[inline]
3715#[target_feature(enable = "avx512bw")]
3716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3717#[cfg_attr(test, assert_instr(vpcmp))]
3718#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3719pub const fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3720    unsafe { simd_bitmask::<i16x32, _>(simd_ge(a.as_i16x32(), b.as_i16x32())) }
3721}
3722
3723/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3724///
3725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi16_mask&expand=844)
3726#[inline]
3727#[target_feature(enable = "avx512bw")]
3728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3729#[cfg_attr(test, assert_instr(vpcmp))]
3730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3731pub const fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3732    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3733}
3734
3735/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3736///
3737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi16_mask&expand=841)
3738#[inline]
3739#[target_feature(enable = "avx512bw,avx512vl")]
3740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3741#[cfg_attr(test, assert_instr(vpcmp))]
3742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3743pub const fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3744    unsafe { simd_bitmask::<i16x16, _>(simd_ge(a.as_i16x16(), b.as_i16x16())) }
3745}
3746
3747/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3748///
3749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi16_mask&expand=842)
3750#[inline]
3751#[target_feature(enable = "avx512bw,avx512vl")]
3752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3753#[cfg_attr(test, assert_instr(vpcmp))]
3754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3755pub const fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3756    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3757}
3758
3759/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3760///
3761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi16_mask&expand=839)
3762#[inline]
3763#[target_feature(enable = "avx512bw,avx512vl")]
3764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3765#[cfg_attr(test, assert_instr(vpcmp))]
3766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3767pub const fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3768    unsafe { simd_bitmask::<i16x8, _>(simd_ge(a.as_i16x8(), b.as_i16x8())) }
3769}
3770
3771/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3772///
3773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi16_mask&expand=840)
3774#[inline]
3775#[target_feature(enable = "avx512bw,avx512vl")]
3776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3777#[cfg_attr(test, assert_instr(vpcmp))]
3778#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3779pub const fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3780    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3781}
3782
3783/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3784///
3785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi8_mask&expand=861)
3786#[inline]
3787#[target_feature(enable = "avx512bw")]
3788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3789#[cfg_attr(test, assert_instr(vpcmp))]
3790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3791pub const fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3792    unsafe { simd_bitmask::<i8x64, _>(simd_ge(a.as_i8x64(), b.as_i8x64())) }
3793}
3794
3795/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3796///
3797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi8_mask&expand=862)
3798#[inline]
3799#[target_feature(enable = "avx512bw")]
3800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3801#[cfg_attr(test, assert_instr(vpcmp))]
3802#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3803pub const fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3804    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3805}
3806
3807/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3808///
3809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi8_mask&expand=859)
3810#[inline]
3811#[target_feature(enable = "avx512bw,avx512vl")]
3812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3813#[cfg_attr(test, assert_instr(vpcmp))]
3814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3815pub const fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3816    unsafe { simd_bitmask::<i8x32, _>(simd_ge(a.as_i8x32(), b.as_i8x32())) }
3817}
3818
3819/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3820///
3821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi8_mask&expand=860)
3822#[inline]
3823#[target_feature(enable = "avx512bw,avx512vl")]
3824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3825#[cfg_attr(test, assert_instr(vpcmp))]
3826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3827pub const fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3828    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3829}
3830
3831/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3832///
3833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi8_mask&expand=857)
3834#[inline]
3835#[target_feature(enable = "avx512bw,avx512vl")]
3836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3837#[cfg_attr(test, assert_instr(vpcmp))]
3838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3839pub const fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3840    unsafe { simd_bitmask::<i8x16, _>(simd_ge(a.as_i8x16(), b.as_i8x16())) }
3841}
3842
3843/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3844///
3845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi8_mask&expand=858)
3846#[inline]
3847#[target_feature(enable = "avx512bw,avx512vl")]
3848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3849#[cfg_attr(test, assert_instr(vpcmp))]
3850#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3851pub const fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3852    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3853}
3854
3855/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3856///
3857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu16_mask&expand=801)
3858#[inline]
3859#[target_feature(enable = "avx512bw")]
3860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3861#[cfg_attr(test, assert_instr(vpcmp))]
3862#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3863pub const fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3864    unsafe { simd_bitmask::<u16x32, _>(simd_eq(a.as_u16x32(), b.as_u16x32())) }
3865}
3866
3867/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3868///
3869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu16_mask&expand=802)
3870#[inline]
3871#[target_feature(enable = "avx512bw")]
3872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3873#[cfg_attr(test, assert_instr(vpcmp))]
3874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3875pub const fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3876    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3877}
3878
3879/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3880///
3881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu16_mask&expand=799)
3882#[inline]
3883#[target_feature(enable = "avx512bw,avx512vl")]
3884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3885#[cfg_attr(test, assert_instr(vpcmp))]
3886#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3887pub const fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3888    unsafe { simd_bitmask::<u16x16, _>(simd_eq(a.as_u16x16(), b.as_u16x16())) }
3889}
3890
3891/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3892///
3893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu16_mask&expand=800)
3894#[inline]
3895#[target_feature(enable = "avx512bw,avx512vl")]
3896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3897#[cfg_attr(test, assert_instr(vpcmp))]
3898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3899pub const fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3900    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3901}
3902
3903/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3904///
3905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu16_mask&expand=797)
3906#[inline]
3907#[target_feature(enable = "avx512bw,avx512vl")]
3908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3909#[cfg_attr(test, assert_instr(vpcmp))]
3910#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3911pub const fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3912    unsafe { simd_bitmask::<u16x8, _>(simd_eq(a.as_u16x8(), b.as_u16x8())) }
3913}
3914
3915/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3916///
3917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu16_mask&expand=798)
3918#[inline]
3919#[target_feature(enable = "avx512bw,avx512vl")]
3920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3921#[cfg_attr(test, assert_instr(vpcmp))]
3922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3923pub const fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3924    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3925}
3926
3927/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3928///
3929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu8_mask&expand=819)
3930#[inline]
3931#[target_feature(enable = "avx512bw")]
3932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3933#[cfg_attr(test, assert_instr(vpcmp))]
3934#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3935pub const fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3936    unsafe { simd_bitmask::<u8x64, _>(simd_eq(a.as_u8x64(), b.as_u8x64())) }
3937}
3938
3939/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3940///
3941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu8_mask&expand=820)
3942#[inline]
3943#[target_feature(enable = "avx512bw")]
3944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3945#[cfg_attr(test, assert_instr(vpcmp))]
3946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3947pub const fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3948    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3949}
3950
3951/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3952///
3953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu8_mask&expand=817)
3954#[inline]
3955#[target_feature(enable = "avx512bw,avx512vl")]
3956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3957#[cfg_attr(test, assert_instr(vpcmp))]
3958#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3959pub const fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3960    unsafe { simd_bitmask::<u8x32, _>(simd_eq(a.as_u8x32(), b.as_u8x32())) }
3961}
3962
3963/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3964///
3965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu8_mask&expand=818)
3966#[inline]
3967#[target_feature(enable = "avx512bw,avx512vl")]
3968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3969#[cfg_attr(test, assert_instr(vpcmp))]
3970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3971pub const fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3972    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3973}
3974
3975/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3976///
3977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu8_mask&expand=815)
3978#[inline]
3979#[target_feature(enable = "avx512bw,avx512vl")]
3980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3981#[cfg_attr(test, assert_instr(vpcmp))]
3982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3983pub const fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3984    unsafe { simd_bitmask::<u8x16, _>(simd_eq(a.as_u8x16(), b.as_u8x16())) }
3985}
3986
3987/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3988///
3989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu8_mask&expand=816)
3990#[inline]
3991#[target_feature(enable = "avx512bw,avx512vl")]
3992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3993#[cfg_attr(test, assert_instr(vpcmp))]
3994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3995pub const fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3996    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3997}
3998
3999/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
4000///
4001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi16_mask&expand=771)
4002#[inline]
4003#[target_feature(enable = "avx512bw")]
4004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4005#[cfg_attr(test, assert_instr(vpcmp))]
4006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4007pub const fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
4008    unsafe { simd_bitmask::<i16x32, _>(simd_eq(a.as_i16x32(), b.as_i16x32())) }
4009}
4010
4011/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4012///
4013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi16_mask&expand=772)
4014#[inline]
4015#[target_feature(enable = "avx512bw")]
4016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4017#[cfg_attr(test, assert_instr(vpcmp))]
4018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4019pub const fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4020    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
4021}
4022
4023/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
4024///
4025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16_mask&expand=769)
4026#[inline]
4027#[target_feature(enable = "avx512bw,avx512vl")]
4028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4029#[cfg_attr(test, assert_instr(vpcmp))]
4030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4031pub const fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4032    unsafe { simd_bitmask::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
4033}
4034
4035/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4036///
4037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi16_mask&expand=770)
4038#[inline]
4039#[target_feature(enable = "avx512bw,avx512vl")]
4040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4041#[cfg_attr(test, assert_instr(vpcmp))]
4042#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4043pub const fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4044    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
4045}
4046
4047/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
4048///
4049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16_mask&expand=767)
4050#[inline]
4051#[target_feature(enable = "avx512bw,avx512vl")]
4052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4053#[cfg_attr(test, assert_instr(vpcmp))]
4054#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4055pub const fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4056    unsafe { simd_bitmask::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
4057}
4058
4059/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4060///
4061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi16_mask&expand=768)
4062#[inline]
4063#[target_feature(enable = "avx512bw,avx512vl")]
4064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4065#[cfg_attr(test, assert_instr(vpcmp))]
4066#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4067pub const fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4068    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
4069}
4070
4071/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
4072///
4073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi8_mask&expand=795)
4074#[inline]
4075#[target_feature(enable = "avx512bw")]
4076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4077#[cfg_attr(test, assert_instr(vpcmp))]
4078#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4079pub const fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4080    unsafe { simd_bitmask::<i8x64, _>(simd_eq(a.as_i8x64(), b.as_i8x64())) }
4081}
4082
4083/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4084///
4085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi8_mask&expand=796)
4086#[inline]
4087#[target_feature(enable = "avx512bw")]
4088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4089#[cfg_attr(test, assert_instr(vpcmp))]
4090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4091pub const fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4092    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
4093}
4094
4095/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
4096///
4097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8_mask&expand=793)
4098#[inline]
4099#[target_feature(enable = "avx512bw,avx512vl")]
4100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4101#[cfg_attr(test, assert_instr(vpcmp))]
4102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4103pub const fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4104    unsafe { simd_bitmask::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
4105}
4106
4107/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4108///
4109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi8_mask&expand=794)
4110#[inline]
4111#[target_feature(enable = "avx512bw,avx512vl")]
4112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4113#[cfg_attr(test, assert_instr(vpcmp))]
4114#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4115pub const fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4116    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
4117}
4118
4119/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
4120///
4121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8_mask&expand=791)
4122#[inline]
4123#[target_feature(enable = "avx512bw,avx512vl")]
4124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4125#[cfg_attr(test, assert_instr(vpcmp))]
4126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4127pub const fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4128    unsafe { simd_bitmask::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
4129}
4130
4131/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4132///
4133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi8_mask&expand=792)
4134#[inline]
4135#[target_feature(enable = "avx512bw,avx512vl")]
4136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4137#[cfg_attr(test, assert_instr(vpcmp))]
4138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4139pub const fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4140    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
4141}
4142
4143/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4144///
4145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu16_mask&expand=1106)
4146#[inline]
4147#[target_feature(enable = "avx512bw")]
4148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4149#[cfg_attr(test, assert_instr(vpcmp))]
4150#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4151pub const fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
4152    unsafe { simd_bitmask::<u16x32, _>(simd_ne(a.as_u16x32(), b.as_u16x32())) }
4153}
4154
4155/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4156///
4157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu16_mask&expand=1107)
4158#[inline]
4159#[target_feature(enable = "avx512bw")]
4160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4161#[cfg_attr(test, assert_instr(vpcmp))]
4162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4163pub const fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4164    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
4165}
4166
4167/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4168///
4169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu16_mask&expand=1104)
4170#[inline]
4171#[target_feature(enable = "avx512bw,avx512vl")]
4172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4173#[cfg_attr(test, assert_instr(vpcmp))]
4174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4175pub const fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4176    unsafe { simd_bitmask::<u16x16, _>(simd_ne(a.as_u16x16(), b.as_u16x16())) }
4177}
4178
4179/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4180///
4181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu16_mask&expand=1105)
4182#[inline]
4183#[target_feature(enable = "avx512bw,avx512vl")]
4184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4185#[cfg_attr(test, assert_instr(vpcmp))]
4186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4187pub const fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4188    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
4189}
4190
4191/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4192///
4193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu16_mask&expand=1102)
4194#[inline]
4195#[target_feature(enable = "avx512bw,avx512vl")]
4196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4197#[cfg_attr(test, assert_instr(vpcmp))]
4198#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4199pub const fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4200    unsafe { simd_bitmask::<u16x8, _>(simd_ne(a.as_u16x8(), b.as_u16x8())) }
4201}
4202
4203/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4204///
4205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu16_mask&expand=1103)
4206#[inline]
4207#[target_feature(enable = "avx512bw,avx512vl")]
4208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4209#[cfg_attr(test, assert_instr(vpcmp))]
4210#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4211pub const fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4212    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
4213}
4214
4215/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4216///
4217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu8_mask&expand=1124)
4218#[inline]
4219#[target_feature(enable = "avx512bw")]
4220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4221#[cfg_attr(test, assert_instr(vpcmp))]
4222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4223pub const fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4224    unsafe { simd_bitmask::<u8x64, _>(simd_ne(a.as_u8x64(), b.as_u8x64())) }
4225}
4226
4227/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4228///
4229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu8_mask&expand=1125)
4230#[inline]
4231#[target_feature(enable = "avx512bw")]
4232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4233#[cfg_attr(test, assert_instr(vpcmp))]
4234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4235pub const fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4236    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
4237}
4238
4239/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4240///
4241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu8_mask&expand=1122)
4242#[inline]
4243#[target_feature(enable = "avx512bw,avx512vl")]
4244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4245#[cfg_attr(test, assert_instr(vpcmp))]
4246#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4247pub const fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4248    unsafe { simd_bitmask::<u8x32, _>(simd_ne(a.as_u8x32(), b.as_u8x32())) }
4249}
4250
4251/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4252///
4253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu8_mask&expand=1123)
4254#[inline]
4255#[target_feature(enable = "avx512bw,avx512vl")]
4256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4257#[cfg_attr(test, assert_instr(vpcmp))]
4258#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4259pub const fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4260    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
4261}
4262
4263/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4264///
4265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu8_mask&expand=1120)
4266#[inline]
4267#[target_feature(enable = "avx512bw,avx512vl")]
4268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4269#[cfg_attr(test, assert_instr(vpcmp))]
4270#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4271pub const fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4272    unsafe { simd_bitmask::<u8x16, _>(simd_ne(a.as_u8x16(), b.as_u8x16())) }
4273}
4274
4275/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4276///
4277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu8_mask&expand=1121)
4278#[inline]
4279#[target_feature(enable = "avx512bw,avx512vl")]
4280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4281#[cfg_attr(test, assert_instr(vpcmp))]
4282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4283pub const fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4284    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
4285}
4286
4287/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4288///
4289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi16_mask&expand=1082)
4290#[inline]
4291#[target_feature(enable = "avx512bw")]
4292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4293#[cfg_attr(test, assert_instr(vpcmp))]
4294#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4295pub const fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
4296    unsafe { simd_bitmask::<i16x32, _>(simd_ne(a.as_i16x32(), b.as_i16x32())) }
4297}
4298
4299/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4300///
4301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi16_mask&expand=1083)
4302#[inline]
4303#[target_feature(enable = "avx512bw")]
4304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4305#[cfg_attr(test, assert_instr(vpcmp))]
4306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4307pub const fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4308    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4309}
4310
4311/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4312///
4313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi16_mask&expand=1080)
4314#[inline]
4315#[target_feature(enable = "avx512bw,avx512vl")]
4316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4317#[cfg_attr(test, assert_instr(vpcmp))]
4318#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4319pub const fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4320    unsafe { simd_bitmask::<i16x16, _>(simd_ne(a.as_i16x16(), b.as_i16x16())) }
4321}
4322
4323/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4324///
4325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi16_mask&expand=1081)
4326#[inline]
4327#[target_feature(enable = "avx512bw,avx512vl")]
4328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4329#[cfg_attr(test, assert_instr(vpcmp))]
4330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4331pub const fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4332    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4333}
4334
4335/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4336///
4337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi16_mask&expand=1078)
4338#[inline]
4339#[target_feature(enable = "avx512bw,avx512vl")]
4340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4341#[cfg_attr(test, assert_instr(vpcmp))]
4342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4343pub const fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4344    unsafe { simd_bitmask::<i16x8, _>(simd_ne(a.as_i16x8(), b.as_i16x8())) }
4345}
4346
4347/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4348///
4349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi16_mask&expand=1079)
4350#[inline]
4351#[target_feature(enable = "avx512bw,avx512vl")]
4352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4353#[cfg_attr(test, assert_instr(vpcmp))]
4354#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4355pub const fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4356    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4357}
4358
4359/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4360///
4361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi8_mask&expand=1100)
4362#[inline]
4363#[target_feature(enable = "avx512bw")]
4364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4365#[cfg_attr(test, assert_instr(vpcmp))]
4366#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4367pub const fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4368    unsafe { simd_bitmask::<i8x64, _>(simd_ne(a.as_i8x64(), b.as_i8x64())) }
4369}
4370
4371/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4372///
4373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi8_mask&expand=1101)
4374#[inline]
4375#[target_feature(enable = "avx512bw")]
4376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4377#[cfg_attr(test, assert_instr(vpcmp))]
4378#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4379pub const fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4380    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4381}
4382
4383/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4384///
4385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi8_mask&expand=1098)
4386#[inline]
4387#[target_feature(enable = "avx512bw,avx512vl")]
4388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4389#[cfg_attr(test, assert_instr(vpcmp))]
4390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4391pub const fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4392    unsafe { simd_bitmask::<i8x32, _>(simd_ne(a.as_i8x32(), b.as_i8x32())) }
4393}
4394
4395/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4396///
4397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi8_mask&expand=1099)
4398#[inline]
4399#[target_feature(enable = "avx512bw,avx512vl")]
4400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4401#[cfg_attr(test, assert_instr(vpcmp))]
4402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4403pub const fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4404    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4405}
4406
4407/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4408///
4409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi8_mask&expand=1096)
4410#[inline]
4411#[target_feature(enable = "avx512bw,avx512vl")]
4412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4413#[cfg_attr(test, assert_instr(vpcmp))]
4414#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4415pub const fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4416    unsafe { simd_bitmask::<i8x16, _>(simd_ne(a.as_i8x16(), b.as_i8x16())) }
4417}
4418
4419/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4420///
4421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi8_mask&expand=1097)
4422#[inline]
4423#[target_feature(enable = "avx512bw,avx512vl")]
4424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4425#[cfg_attr(test, assert_instr(vpcmp))]
4426#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4427pub const fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4428    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4429}
4430
4431/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k.
4432///
4433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu16_mask&expand=715)
4434#[inline]
4435#[target_feature(enable = "avx512bw")]
4436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4437#[rustc_legacy_const_generics(2)]
4438#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4439#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4440pub const fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4441    unsafe {
4442        static_assert_uimm_bits!(IMM8, 3);
4443        let a = a.as_u16x32();
4444        let b = b.as_u16x32();
4445        let r = match IMM8 {
4446            0 => simd_eq(a, b),
4447            1 => simd_lt(a, b),
4448            2 => simd_le(a, b),
4449            3 => i16x32::ZERO,
4450            4 => simd_ne(a, b),
4451            5 => simd_ge(a, b),
4452            6 => simd_gt(a, b),
4453            _ => i16x32::splat(-1),
4454        };
4455        simd_bitmask(r)
4456    }
4457}
4458
4459/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4460///
4461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu16_mask&expand=716)
4462#[inline]
4463#[target_feature(enable = "avx512bw")]
4464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4465#[rustc_legacy_const_generics(3)]
4466#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4467#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4468pub const fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
4469    k1: __mmask32,
4470    a: __m512i,
4471    b: __m512i,
4472) -> __mmask32 {
4473    unsafe {
4474        static_assert_uimm_bits!(IMM8, 3);
4475        let a = a.as_u16x32();
4476        let b = b.as_u16x32();
4477        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4478        let r = match IMM8 {
4479            0 => simd_and(k1, simd_eq(a, b)),
4480            1 => simd_and(k1, simd_lt(a, b)),
4481            2 => simd_and(k1, simd_le(a, b)),
4482            3 => i16x32::ZERO,
4483            4 => simd_and(k1, simd_ne(a, b)),
4484            5 => simd_and(k1, simd_ge(a, b)),
4485            6 => simd_and(k1, simd_gt(a, b)),
4486            _ => k1,
4487        };
4488        simd_bitmask(r)
4489    }
4490}
4491
4492/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4493///
4494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu16_mask&expand=713)
4495#[inline]
4496#[target_feature(enable = "avx512bw,avx512vl")]
4497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4498#[rustc_legacy_const_generics(2)]
4499#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4500#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4501pub const fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4502    unsafe {
4503        static_assert_uimm_bits!(IMM8, 3);
4504        let a = a.as_u16x16();
4505        let b = b.as_u16x16();
4506        let r = match IMM8 {
4507            0 => simd_eq(a, b),
4508            1 => simd_lt(a, b),
4509            2 => simd_le(a, b),
4510            3 => i16x16::ZERO,
4511            4 => simd_ne(a, b),
4512            5 => simd_ge(a, b),
4513            6 => simd_gt(a, b),
4514            _ => i16x16::splat(-1),
4515        };
4516        simd_bitmask(r)
4517    }
4518}
4519
4520/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4521///
4522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu16_mask&expand=714)
4523#[inline]
4524#[target_feature(enable = "avx512bw,avx512vl")]
4525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4526#[rustc_legacy_const_generics(3)]
4527#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4528#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4529pub const fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
4530    k1: __mmask16,
4531    a: __m256i,
4532    b: __m256i,
4533) -> __mmask16 {
4534    unsafe {
4535        static_assert_uimm_bits!(IMM8, 3);
4536        let a = a.as_u16x16();
4537        let b = b.as_u16x16();
4538        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4539        let r = match IMM8 {
4540            0 => simd_and(k1, simd_eq(a, b)),
4541            1 => simd_and(k1, simd_lt(a, b)),
4542            2 => simd_and(k1, simd_le(a, b)),
4543            3 => i16x16::ZERO,
4544            4 => simd_and(k1, simd_ne(a, b)),
4545            5 => simd_and(k1, simd_ge(a, b)),
4546            6 => simd_and(k1, simd_gt(a, b)),
4547            _ => k1,
4548        };
4549        simd_bitmask(r)
4550    }
4551}
4552
4553/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4554///
4555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu16_mask&expand=711)
4556#[inline]
4557#[target_feature(enable = "avx512bw,avx512vl")]
4558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4559#[rustc_legacy_const_generics(2)]
4560#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4562pub const fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4563    unsafe {
4564        static_assert_uimm_bits!(IMM8, 3);
4565        let a = a.as_u16x8();
4566        let b = b.as_u16x8();
4567        let r = match IMM8 {
4568            0 => simd_eq(a, b),
4569            1 => simd_lt(a, b),
4570            2 => simd_le(a, b),
4571            3 => i16x8::ZERO,
4572            4 => simd_ne(a, b),
4573            5 => simd_ge(a, b),
4574            6 => simd_gt(a, b),
4575            _ => i16x8::splat(-1),
4576        };
4577        simd_bitmask(r)
4578    }
4579}
4580
4581/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4582///
4583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu16_mask&expand=712)
4584#[inline]
4585#[target_feature(enable = "avx512bw,avx512vl")]
4586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4587#[rustc_legacy_const_generics(3)]
4588#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4590pub const fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(
4591    k1: __mmask8,
4592    a: __m128i,
4593    b: __m128i,
4594) -> __mmask8 {
4595    unsafe {
4596        static_assert_uimm_bits!(IMM8, 3);
4597        let a = a.as_u16x8();
4598        let b = b.as_u16x8();
4599        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4600        let r = match IMM8 {
4601            0 => simd_and(k1, simd_eq(a, b)),
4602            1 => simd_and(k1, simd_lt(a, b)),
4603            2 => simd_and(k1, simd_le(a, b)),
4604            3 => i16x8::ZERO,
4605            4 => simd_and(k1, simd_ne(a, b)),
4606            5 => simd_and(k1, simd_ge(a, b)),
4607            6 => simd_and(k1, simd_gt(a, b)),
4608            _ => k1,
4609        };
4610        simd_bitmask(r)
4611    }
4612}
4613
4614/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4615///
4616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu8_mask&expand=733)
4617#[inline]
4618#[target_feature(enable = "avx512bw")]
4619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4620#[rustc_legacy_const_generics(2)]
4621#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4623pub const fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4624    unsafe {
4625        static_assert_uimm_bits!(IMM8, 3);
4626        let a = a.as_u8x64();
4627        let b = b.as_u8x64();
4628        let r = match IMM8 {
4629            0 => simd_eq(a, b),
4630            1 => simd_lt(a, b),
4631            2 => simd_le(a, b),
4632            3 => i8x64::ZERO,
4633            4 => simd_ne(a, b),
4634            5 => simd_ge(a, b),
4635            6 => simd_gt(a, b),
4636            _ => i8x64::splat(-1),
4637        };
4638        simd_bitmask(r)
4639    }
4640}
4641
4642/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4643///
4644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu8_mask&expand=734)
4645#[inline]
4646#[target_feature(enable = "avx512bw")]
4647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4648#[rustc_legacy_const_generics(3)]
4649#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4651pub const fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
4652    k1: __mmask64,
4653    a: __m512i,
4654    b: __m512i,
4655) -> __mmask64 {
4656    unsafe {
4657        static_assert_uimm_bits!(IMM8, 3);
4658        let a = a.as_u8x64();
4659        let b = b.as_u8x64();
4660        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4661        let r = match IMM8 {
4662            0 => simd_and(k1, simd_eq(a, b)),
4663            1 => simd_and(k1, simd_lt(a, b)),
4664            2 => simd_and(k1, simd_le(a, b)),
4665            3 => i8x64::ZERO,
4666            4 => simd_and(k1, simd_ne(a, b)),
4667            5 => simd_and(k1, simd_ge(a, b)),
4668            6 => simd_and(k1, simd_gt(a, b)),
4669            _ => k1,
4670        };
4671        simd_bitmask(r)
4672    }
4673}
4674
4675/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4676///
4677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu8_mask&expand=731)
4678#[inline]
4679#[target_feature(enable = "avx512bw,avx512vl")]
4680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4681#[rustc_legacy_const_generics(2)]
4682#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4684pub const fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4685    unsafe {
4686        static_assert_uimm_bits!(IMM8, 3);
4687        let a = a.as_u8x32();
4688        let b = b.as_u8x32();
4689        let r = match IMM8 {
4690            0 => simd_eq(a, b),
4691            1 => simd_lt(a, b),
4692            2 => simd_le(a, b),
4693            3 => i8x32::ZERO,
4694            4 => simd_ne(a, b),
4695            5 => simd_ge(a, b),
4696            6 => simd_gt(a, b),
4697            _ => i8x32::splat(-1),
4698        };
4699        simd_bitmask(r)
4700    }
4701}
4702
4703/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4704///
4705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu8_mask&expand=732)
4706#[inline]
4707#[target_feature(enable = "avx512bw,avx512vl")]
4708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4709#[rustc_legacy_const_generics(3)]
4710#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4711#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4712pub const fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
4713    k1: __mmask32,
4714    a: __m256i,
4715    b: __m256i,
4716) -> __mmask32 {
4717    unsafe {
4718        static_assert_uimm_bits!(IMM8, 3);
4719        let a = a.as_u8x32();
4720        let b = b.as_u8x32();
4721        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4722        let r = match IMM8 {
4723            0 => simd_and(k1, simd_eq(a, b)),
4724            1 => simd_and(k1, simd_lt(a, b)),
4725            2 => simd_and(k1, simd_le(a, b)),
4726            3 => i8x32::ZERO,
4727            4 => simd_and(k1, simd_ne(a, b)),
4728            5 => simd_and(k1, simd_ge(a, b)),
4729            6 => simd_and(k1, simd_gt(a, b)),
4730            _ => k1,
4731        };
4732        simd_bitmask(r)
4733    }
4734}
4735
4736/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4737///
4738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu8_mask&expand=729)
4739#[inline]
4740#[target_feature(enable = "avx512bw,avx512vl")]
4741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4742#[rustc_legacy_const_generics(2)]
4743#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4745pub const fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4746    unsafe {
4747        static_assert_uimm_bits!(IMM8, 3);
4748        let a = a.as_u8x16();
4749        let b = b.as_u8x16();
4750        let r = match IMM8 {
4751            0 => simd_eq(a, b),
4752            1 => simd_lt(a, b),
4753            2 => simd_le(a, b),
4754            3 => i8x16::ZERO,
4755            4 => simd_ne(a, b),
4756            5 => simd_ge(a, b),
4757            6 => simd_gt(a, b),
4758            _ => i8x16::splat(-1),
4759        };
4760        simd_bitmask(r)
4761    }
4762}
4763
4764/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4765///
4766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu8_mask&expand=730)
4767#[inline]
4768#[target_feature(enable = "avx512bw,avx512vl")]
4769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4770#[rustc_legacy_const_generics(3)]
4771#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4773pub const fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(
4774    k1: __mmask16,
4775    a: __m128i,
4776    b: __m128i,
4777) -> __mmask16 {
4778    unsafe {
4779        static_assert_uimm_bits!(IMM8, 3);
4780        let a = a.as_u8x16();
4781        let b = b.as_u8x16();
4782        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4783        let r = match IMM8 {
4784            0 => simd_and(k1, simd_eq(a, b)),
4785            1 => simd_and(k1, simd_lt(a, b)),
4786            2 => simd_and(k1, simd_le(a, b)),
4787            3 => i8x16::ZERO,
4788            4 => simd_and(k1, simd_ne(a, b)),
4789            5 => simd_and(k1, simd_ge(a, b)),
4790            6 => simd_and(k1, simd_gt(a, b)),
4791            _ => k1,
4792        };
4793        simd_bitmask(r)
4794    }
4795}
4796
4797/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4798///
4799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi16_mask&expand=691)
4800#[inline]
4801#[target_feature(enable = "avx512bw")]
4802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4803#[rustc_legacy_const_generics(2)]
4804#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4805#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4806pub const fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4807    unsafe {
4808        static_assert_uimm_bits!(IMM8, 3);
4809        let a = a.as_i16x32();
4810        let b = b.as_i16x32();
4811        let r = match IMM8 {
4812            0 => simd_eq(a, b),
4813            1 => simd_lt(a, b),
4814            2 => simd_le(a, b),
4815            3 => i16x32::ZERO,
4816            4 => simd_ne(a, b),
4817            5 => simd_ge(a, b),
4818            6 => simd_gt(a, b),
4819            _ => i16x32::splat(-1),
4820        };
4821        simd_bitmask(r)
4822    }
4823}
4824
4825/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4826///
4827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi16_mask&expand=692)
4828#[inline]
4829#[target_feature(enable = "avx512bw")]
4830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4831#[rustc_legacy_const_generics(3)]
4832#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4833#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4834pub const fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
4835    k1: __mmask32,
4836    a: __m512i,
4837    b: __m512i,
4838) -> __mmask32 {
4839    unsafe {
4840        static_assert_uimm_bits!(IMM8, 3);
4841        let a = a.as_i16x32();
4842        let b = b.as_i16x32();
4843        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4844        let r = match IMM8 {
4845            0 => simd_and(k1, simd_eq(a, b)),
4846            1 => simd_and(k1, simd_lt(a, b)),
4847            2 => simd_and(k1, simd_le(a, b)),
4848            3 => i16x32::ZERO,
4849            4 => simd_and(k1, simd_ne(a, b)),
4850            5 => simd_and(k1, simd_ge(a, b)),
4851            6 => simd_and(k1, simd_gt(a, b)),
4852            _ => k1,
4853        };
4854        simd_bitmask(r)
4855    }
4856}
4857
4858/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4859///
4860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi16_mask&expand=689)
4861#[inline]
4862#[target_feature(enable = "avx512bw,avx512vl")]
4863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4864#[rustc_legacy_const_generics(2)]
4865#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4867pub const fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4868    unsafe {
4869        static_assert_uimm_bits!(IMM8, 3);
4870        let a = a.as_i16x16();
4871        let b = b.as_i16x16();
4872        let r = match IMM8 {
4873            0 => simd_eq(a, b),
4874            1 => simd_lt(a, b),
4875            2 => simd_le(a, b),
4876            3 => i16x16::ZERO,
4877            4 => simd_ne(a, b),
4878            5 => simd_ge(a, b),
4879            6 => simd_gt(a, b),
4880            _ => i16x16::splat(-1),
4881        };
4882        simd_bitmask(r)
4883    }
4884}
4885
4886/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4887///
4888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi16_mask&expand=690)
4889#[inline]
4890#[target_feature(enable = "avx512bw,avx512vl")]
4891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4892#[rustc_legacy_const_generics(3)]
4893#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4894#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4895pub const fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
4896    k1: __mmask16,
4897    a: __m256i,
4898    b: __m256i,
4899) -> __mmask16 {
4900    unsafe {
4901        static_assert_uimm_bits!(IMM8, 3);
4902        let a = a.as_i16x16();
4903        let b = b.as_i16x16();
4904        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4905        let r = match IMM8 {
4906            0 => simd_and(k1, simd_eq(a, b)),
4907            1 => simd_and(k1, simd_lt(a, b)),
4908            2 => simd_and(k1, simd_le(a, b)),
4909            3 => i16x16::ZERO,
4910            4 => simd_and(k1, simd_ne(a, b)),
4911            5 => simd_and(k1, simd_ge(a, b)),
4912            6 => simd_and(k1, simd_gt(a, b)),
4913            _ => k1,
4914        };
4915        simd_bitmask(r)
4916    }
4917}
4918
4919/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4920///
4921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi16_mask&expand=687)
4922#[inline]
4923#[target_feature(enable = "avx512bw,avx512vl")]
4924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4925#[rustc_legacy_const_generics(2)]
4926#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4927#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4928pub const fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4929    unsafe {
4930        static_assert_uimm_bits!(IMM8, 3);
4931        let a = a.as_i16x8();
4932        let b = b.as_i16x8();
4933        let r = match IMM8 {
4934            0 => simd_eq(a, b),
4935            1 => simd_lt(a, b),
4936            2 => simd_le(a, b),
4937            3 => i16x8::ZERO,
4938            4 => simd_ne(a, b),
4939            5 => simd_ge(a, b),
4940            6 => simd_gt(a, b),
4941            _ => i16x8::splat(-1),
4942        };
4943        simd_bitmask(r)
4944    }
4945}
4946
4947/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4948///
4949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi16_mask&expand=688)
4950#[inline]
4951#[target_feature(enable = "avx512bw,avx512vl")]
4952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4953#[rustc_legacy_const_generics(3)]
4954#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4955#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4956pub const fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(
4957    k1: __mmask8,
4958    a: __m128i,
4959    b: __m128i,
4960) -> __mmask8 {
4961    unsafe {
4962        static_assert_uimm_bits!(IMM8, 3);
4963        let a = a.as_i16x8();
4964        let b = b.as_i16x8();
4965        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4966        let r = match IMM8 {
4967            0 => simd_and(k1, simd_eq(a, b)),
4968            1 => simd_and(k1, simd_lt(a, b)),
4969            2 => simd_and(k1, simd_le(a, b)),
4970            3 => i16x8::ZERO,
4971            4 => simd_and(k1, simd_ne(a, b)),
4972            5 => simd_and(k1, simd_ge(a, b)),
4973            6 => simd_and(k1, simd_gt(a, b)),
4974            _ => k1,
4975        };
4976        simd_bitmask(r)
4977    }
4978}
4979
4980/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4981///
4982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi8_mask&expand=709)
4983#[inline]
4984#[target_feature(enable = "avx512bw")]
4985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4986#[rustc_legacy_const_generics(2)]
4987#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4989pub const fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4990    unsafe {
4991        static_assert_uimm_bits!(IMM8, 3);
4992        let a = a.as_i8x64();
4993        let b = b.as_i8x64();
4994        let r = match IMM8 {
4995            0 => simd_eq(a, b),
4996            1 => simd_lt(a, b),
4997            2 => simd_le(a, b),
4998            3 => i8x64::ZERO,
4999            4 => simd_ne(a, b),
5000            5 => simd_ge(a, b),
5001            6 => simd_gt(a, b),
5002            _ => i8x64::splat(-1),
5003        };
5004        simd_bitmask(r)
5005    }
5006}
5007
5008/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
5009///
5010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi8_mask&expand=710)
5011#[inline]
5012#[target_feature(enable = "avx512bw")]
5013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5014#[rustc_legacy_const_generics(3)]
5015#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5016#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5017pub const fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
5018    k1: __mmask64,
5019    a: __m512i,
5020    b: __m512i,
5021) -> __mmask64 {
5022    unsafe {
5023        static_assert_uimm_bits!(IMM8, 3);
5024        let a = a.as_i8x64();
5025        let b = b.as_i8x64();
5026        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
5027        let r = match IMM8 {
5028            0 => simd_and(k1, simd_eq(a, b)),
5029            1 => simd_and(k1, simd_lt(a, b)),
5030            2 => simd_and(k1, simd_le(a, b)),
5031            3 => i8x64::ZERO,
5032            4 => simd_and(k1, simd_ne(a, b)),
5033            5 => simd_and(k1, simd_ge(a, b)),
5034            6 => simd_and(k1, simd_gt(a, b)),
5035            _ => k1,
5036        };
5037        simd_bitmask(r)
5038    }
5039}
5040
5041/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
5042///
5043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi8_mask&expand=707)
5044#[inline]
5045#[target_feature(enable = "avx512bw,avx512vl")]
5046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5047#[rustc_legacy_const_generics(2)]
5048#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5049#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5050pub const fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
5051    unsafe {
5052        static_assert_uimm_bits!(IMM8, 3);
5053        let a = a.as_i8x32();
5054        let b = b.as_i8x32();
5055        let r = match IMM8 {
5056            0 => simd_eq(a, b),
5057            1 => simd_lt(a, b),
5058            2 => simd_le(a, b),
5059            3 => i8x32::ZERO,
5060            4 => simd_ne(a, b),
5061            5 => simd_ge(a, b),
5062            6 => simd_gt(a, b),
5063            _ => i8x32::splat(-1),
5064        };
5065        simd_bitmask(r)
5066    }
5067}
5068
5069/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
5070///
5071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi8_mask&expand=708)
5072#[inline]
5073#[target_feature(enable = "avx512bw,avx512vl")]
5074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5075#[rustc_legacy_const_generics(3)]
5076#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5078pub const fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
5079    k1: __mmask32,
5080    a: __m256i,
5081    b: __m256i,
5082) -> __mmask32 {
5083    unsafe {
5084        static_assert_uimm_bits!(IMM8, 3);
5085        let a = a.as_i8x32();
5086        let b = b.as_i8x32();
5087        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
5088        let r = match IMM8 {
5089            0 => simd_and(k1, simd_eq(a, b)),
5090            1 => simd_and(k1, simd_lt(a, b)),
5091            2 => simd_and(k1, simd_le(a, b)),
5092            3 => i8x32::ZERO,
5093            4 => simd_and(k1, simd_ne(a, b)),
5094            5 => simd_and(k1, simd_ge(a, b)),
5095            6 => simd_and(k1, simd_gt(a, b)),
5096            _ => k1,
5097        };
5098        simd_bitmask(r)
5099    }
5100}
5101
5102/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
5103///
5104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi8_mask&expand=705)
5105#[inline]
5106#[target_feature(enable = "avx512bw,avx512vl")]
5107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5108#[rustc_legacy_const_generics(2)]
5109#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5111pub const fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
5112    unsafe {
5113        static_assert_uimm_bits!(IMM8, 3);
5114        let a = a.as_i8x16();
5115        let b = b.as_i8x16();
5116        let r = match IMM8 {
5117            0 => simd_eq(a, b),
5118            1 => simd_lt(a, b),
5119            2 => simd_le(a, b),
5120            3 => i8x16::ZERO,
5121            4 => simd_ne(a, b),
5122            5 => simd_ge(a, b),
5123            6 => simd_gt(a, b),
5124            _ => i8x16::splat(-1),
5125        };
5126        simd_bitmask(r)
5127    }
5128}
5129
5130/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi8_mask&expand=706)
5133#[inline]
5134#[target_feature(enable = "avx512bw,avx512vl")]
5135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5136#[rustc_legacy_const_generics(3)]
5137#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5139pub const fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(
5140    k1: __mmask16,
5141    a: __m128i,
5142    b: __m128i,
5143) -> __mmask16 {
5144    unsafe {
5145        static_assert_uimm_bits!(IMM8, 3);
5146        let a = a.as_i8x16();
5147        let b = b.as_i8x16();
5148        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
5149        let r = match IMM8 {
5150            0 => simd_and(k1, simd_eq(a, b)),
5151            1 => simd_and(k1, simd_lt(a, b)),
5152            2 => simd_and(k1, simd_le(a, b)),
5153            3 => i8x16::ZERO,
5154            4 => simd_and(k1, simd_ne(a, b)),
5155            5 => simd_and(k1, simd_ge(a, b)),
5156            6 => simd_and(k1, simd_gt(a, b)),
5157            _ => k1,
5158        };
5159        simd_bitmask(r)
5160    }
5161}
5162
5163/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
5164///
5165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi16)
5166#[inline]
5167#[target_feature(enable = "avx512bw,avx512vl")]
5168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5170pub const fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
5171    unsafe { simd_reduce_add_ordered(a.as_i16x16(), 0) }
5172}
5173
5174/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5175///
5176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi16)
5177#[inline]
5178#[target_feature(enable = "avx512bw,avx512vl")]
5179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5181pub const fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
5182    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO), 0) }
5183}
5184
5185/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
5186///
5187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi16)
5188#[inline]
5189#[target_feature(enable = "avx512bw,avx512vl")]
5190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5191#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5192pub const fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
5193    unsafe { simd_reduce_add_ordered(a.as_i16x8(), 0) }
5194}
5195
5196/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5197///
5198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi16)
5199#[inline]
5200#[target_feature(enable = "avx512bw,avx512vl")]
5201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5203pub const fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
5204    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO), 0) }
5205}
5206
5207/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
5208///
5209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi8)
5210#[inline]
5211#[target_feature(enable = "avx512bw,avx512vl")]
5212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5214pub const fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
5215    unsafe { simd_reduce_add_ordered(a.as_i8x32(), 0) }
5216}
5217
5218/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5219///
5220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi8)
5221#[inline]
5222#[target_feature(enable = "avx512bw,avx512vl")]
5223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5224#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5225pub const fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
5226    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO), 0) }
5227}
5228
5229/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
5230///
5231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi8)
5232#[inline]
5233#[target_feature(enable = "avx512bw,avx512vl")]
5234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5235#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5236pub const fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
5237    unsafe { simd_reduce_add_ordered(a.as_i8x16(), 0) }
5238}
5239
5240/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi8)
5243#[inline]
5244#[target_feature(enable = "avx512bw,avx512vl")]
5245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5246#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5247pub const fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
5248    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO), 0) }
5249}
5250
5251/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5252///
5253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi16)
5254#[inline]
5255#[target_feature(enable = "avx512bw,avx512vl")]
5256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5257#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5258pub const fn _mm256_reduce_and_epi16(a: __m256i) -> i16 {
5259    unsafe { simd_reduce_and(a.as_i16x16()) }
5260}
5261
5262/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5263///
5264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi16)
5265#[inline]
5266#[target_feature(enable = "avx512bw,avx512vl")]
5267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5269pub const fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 {
5270    unsafe {
5271        simd_reduce_and(simd_select_bitmask(
5272            k,
5273            a.as_i16x16(),
5274            _mm256_set1_epi64x(-1).as_i16x16(),
5275        ))
5276    }
5277}
5278
5279/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5280///
5281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi16)
5282#[inline]
5283#[target_feature(enable = "avx512bw,avx512vl")]
5284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5286pub const fn _mm_reduce_and_epi16(a: __m128i) -> i16 {
5287    unsafe { simd_reduce_and(a.as_i16x8()) }
5288}
5289
5290/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5291///
5292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi16)
5293#[inline]
5294#[target_feature(enable = "avx512bw,avx512vl")]
5295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5297pub const fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 {
5298    unsafe {
5299        simd_reduce_and(simd_select_bitmask(
5300            k,
5301            a.as_i16x8(),
5302            _mm_set1_epi64x(-1).as_i16x8(),
5303        ))
5304    }
5305}
5306
5307/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5308///
5309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi8)
5310#[inline]
5311#[target_feature(enable = "avx512bw,avx512vl")]
5312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5313#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5314pub const fn _mm256_reduce_and_epi8(a: __m256i) -> i8 {
5315    unsafe { simd_reduce_and(a.as_i8x32()) }
5316}
5317
5318/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5319///
5320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi8)
5321#[inline]
5322#[target_feature(enable = "avx512bw,avx512vl")]
5323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5324#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5325pub const fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 {
5326    unsafe {
5327        simd_reduce_and(simd_select_bitmask(
5328            k,
5329            a.as_i8x32(),
5330            _mm256_set1_epi64x(-1).as_i8x32(),
5331        ))
5332    }
5333}
5334
5335/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5336///
5337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi8)
5338#[inline]
5339#[target_feature(enable = "avx512bw,avx512vl")]
5340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5342pub const fn _mm_reduce_and_epi8(a: __m128i) -> i8 {
5343    unsafe { simd_reduce_and(a.as_i8x16()) }
5344}
5345
5346/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5347///
5348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi8)
5349#[inline]
5350#[target_feature(enable = "avx512bw,avx512vl")]
5351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5352#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5353pub const fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 {
5354    unsafe {
5355        simd_reduce_and(simd_select_bitmask(
5356            k,
5357            a.as_i8x16(),
5358            _mm_set1_epi64x(-1).as_i8x16(),
5359        ))
5360    }
5361}
5362
5363/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5364///
5365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi16)
5366#[inline]
5367#[target_feature(enable = "avx512bw,avx512vl")]
5368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5369#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5370pub const fn _mm256_reduce_max_epi16(a: __m256i) -> i16 {
5371    unsafe { simd_reduce_max(a.as_i16x16()) }
5372}
5373
5374/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5375///
5376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi16)
5377#[inline]
5378#[target_feature(enable = "avx512bw,avx512vl")]
5379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5380#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5381pub const fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 {
5382    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768))) }
5383}
5384
5385/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5386///
5387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi16)
5388#[inline]
5389#[target_feature(enable = "avx512bw,avx512vl")]
5390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5392pub const fn _mm_reduce_max_epi16(a: __m128i) -> i16 {
5393    unsafe { simd_reduce_max(a.as_i16x8()) }
5394}
5395
5396/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5397///
5398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi16)
5399#[inline]
5400#[target_feature(enable = "avx512bw,avx512vl")]
5401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5403pub const fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 {
5404    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768))) }
5405}
5406
5407/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5408///
5409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi8)
5410#[inline]
5411#[target_feature(enable = "avx512bw,avx512vl")]
5412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5413#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5414pub const fn _mm256_reduce_max_epi8(a: __m256i) -> i8 {
5415    unsafe { simd_reduce_max(a.as_i8x32()) }
5416}
5417
5418/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5419///
5420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi8)
5421#[inline]
5422#[target_feature(enable = "avx512bw,avx512vl")]
5423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5424#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5425pub const fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 {
5426    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128))) }
5427}
5428
5429/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5430///
5431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi8)
5432#[inline]
5433#[target_feature(enable = "avx512bw,avx512vl")]
5434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5436pub const fn _mm_reduce_max_epi8(a: __m128i) -> i8 {
5437    unsafe { simd_reduce_max(a.as_i8x16()) }
5438}
5439
5440/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi8)
5443#[inline]
5444#[target_feature(enable = "avx512bw,avx512vl")]
5445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5447pub const fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 {
5448    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128))) }
5449}
5450
5451/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5452///
5453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu16)
5454#[inline]
5455#[target_feature(enable = "avx512bw,avx512vl")]
5456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5457#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5458pub const fn _mm256_reduce_max_epu16(a: __m256i) -> u16 {
5459    unsafe { simd_reduce_max(a.as_u16x16()) }
5460}
5461
5462/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5463///
5464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu16)
5465#[inline]
5466#[target_feature(enable = "avx512bw,avx512vl")]
5467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5468#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5469pub const fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 {
5470    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO)) }
5471}
5472
5473/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5474///
5475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu16)
5476#[inline]
5477#[target_feature(enable = "avx512bw,avx512vl")]
5478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5479#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5480pub const fn _mm_reduce_max_epu16(a: __m128i) -> u16 {
5481    unsafe { simd_reduce_max(a.as_u16x8()) }
5482}
5483
5484/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5485///
5486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu16)
5487#[inline]
5488#[target_feature(enable = "avx512bw,avx512vl")]
5489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5491pub const fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 {
5492    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO)) }
5493}
5494
5495/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5496///
5497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu8)
5498#[inline]
5499#[target_feature(enable = "avx512bw,avx512vl")]
5500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5502pub const fn _mm256_reduce_max_epu8(a: __m256i) -> u8 {
5503    unsafe { simd_reduce_max(a.as_u8x32()) }
5504}
5505
5506/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5507///
5508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu8)
5509#[inline]
5510#[target_feature(enable = "avx512bw,avx512vl")]
5511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5512#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5513pub const fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 {
5514    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO)) }
5515}
5516
5517/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5518///
5519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu8)
5520#[inline]
5521#[target_feature(enable = "avx512bw,avx512vl")]
5522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5524pub const fn _mm_reduce_max_epu8(a: __m128i) -> u8 {
5525    unsafe { simd_reduce_max(a.as_u8x16()) }
5526}
5527
5528/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5529///
5530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu8)
5531#[inline]
5532#[target_feature(enable = "avx512bw,avx512vl")]
5533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5535pub const fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 {
5536    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO)) }
5537}
5538
5539/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5540///
5541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi16)
5542#[inline]
5543#[target_feature(enable = "avx512bw,avx512vl")]
5544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5545#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5546pub const fn _mm256_reduce_min_epi16(a: __m256i) -> i16 {
5547    unsafe { simd_reduce_min(a.as_i16x16()) }
5548}
5549
5550/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5551///
5552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi16)
5553#[inline]
5554#[target_feature(enable = "avx512bw,avx512vl")]
5555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5556#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5557pub const fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 {
5558    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff))) }
5559}
5560
5561/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5562///
5563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi16)
5564#[inline]
5565#[target_feature(enable = "avx512bw,avx512vl")]
5566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5568pub const fn _mm_reduce_min_epi16(a: __m128i) -> i16 {
5569    unsafe { simd_reduce_min(a.as_i16x8()) }
5570}
5571
5572/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5573///
5574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi16)
5575#[inline]
5576#[target_feature(enable = "avx512bw,avx512vl")]
5577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5579pub const fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 {
5580    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff))) }
5581}
5582
5583/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi8)
5586#[inline]
5587#[target_feature(enable = "avx512bw,avx512vl")]
5588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5590pub const fn _mm256_reduce_min_epi8(a: __m256i) -> i8 {
5591    unsafe { simd_reduce_min(a.as_i8x32()) }
5592}
5593
5594/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5595///
5596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi8)
5597#[inline]
5598#[target_feature(enable = "avx512bw,avx512vl")]
5599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5600#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5601pub const fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 {
5602    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f))) }
5603}
5604
5605/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5606///
5607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi8)
5608#[inline]
5609#[target_feature(enable = "avx512bw,avx512vl")]
5610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5611#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5612pub const fn _mm_reduce_min_epi8(a: __m128i) -> i8 {
5613    unsafe { simd_reduce_min(a.as_i8x16()) }
5614}
5615
5616/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5617///
5618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi8)
5619#[inline]
5620#[target_feature(enable = "avx512bw,avx512vl")]
5621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5623pub const fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 {
5624    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f))) }
5625}
5626
5627/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5628///
5629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu16)
5630#[inline]
5631#[target_feature(enable = "avx512bw,avx512vl")]
5632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5633#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5634pub const fn _mm256_reduce_min_epu16(a: __m256i) -> u16 {
5635    unsafe { simd_reduce_min(a.as_u16x16()) }
5636}
5637
5638/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5639///
5640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu16)
5641#[inline]
5642#[target_feature(enable = "avx512bw,avx512vl")]
5643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5644#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5645pub const fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 {
5646    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff))) }
5647}
5648
5649/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5650///
5651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu16)
5652#[inline]
5653#[target_feature(enable = "avx512bw,avx512vl")]
5654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5655#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5656pub const fn _mm_reduce_min_epu16(a: __m128i) -> u16 {
5657    unsafe { simd_reduce_min(a.as_u16x8()) }
5658}
5659
5660/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5661///
5662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu16)
5663#[inline]
5664#[target_feature(enable = "avx512bw,avx512vl")]
5665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5666#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5667pub const fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 {
5668    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff))) }
5669}
5670
5671/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5672///
5673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu8)
5674#[inline]
5675#[target_feature(enable = "avx512bw,avx512vl")]
5676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5677#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5678pub const fn _mm256_reduce_min_epu8(a: __m256i) -> u8 {
5679    unsafe { simd_reduce_min(a.as_u8x32()) }
5680}
5681
5682/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5683///
5684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu8)
5685#[inline]
5686#[target_feature(enable = "avx512bw,avx512vl")]
5687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5688#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5689pub const fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 {
5690    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff))) }
5691}
5692
5693/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5694///
5695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu8)
5696#[inline]
5697#[target_feature(enable = "avx512bw,avx512vl")]
5698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5699#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5700pub const fn _mm_reduce_min_epu8(a: __m128i) -> u8 {
5701    unsafe { simd_reduce_min(a.as_u8x16()) }
5702}
5703
5704/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5705///
5706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu8)
5707#[inline]
5708#[target_feature(enable = "avx512bw,avx512vl")]
5709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5710#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5711pub const fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
5712    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff))) }
5713}
5714
5715/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5716///
5717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi16)
5718#[inline]
5719#[target_feature(enable = "avx512bw,avx512vl")]
5720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5721#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5722pub const fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
5723    unsafe { simd_reduce_mul_ordered(a.as_i16x16(), 1) }
5724}
5725
5726/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5727///
5728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi16)
5729#[inline]
5730#[target_feature(enable = "avx512bw,avx512vl")]
5731#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5732#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5733pub const fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
5734    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1)), 1) }
5735}
5736
5737/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5738///
5739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi16)
5740#[inline]
5741#[target_feature(enable = "avx512bw,avx512vl")]
5742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5743#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5744pub const fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
5745    unsafe { simd_reduce_mul_ordered(a.as_i16x8(), 1) }
5746}
5747
5748/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5749///
5750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi16)
5751#[inline]
5752#[target_feature(enable = "avx512bw,avx512vl")]
5753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5755pub const fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
5756    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1)), 1) }
5757}
5758
5759/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5760///
5761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi8)
5762#[inline]
5763#[target_feature(enable = "avx512bw,avx512vl")]
5764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5766pub const fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
5767    unsafe { simd_reduce_mul_ordered(a.as_i8x32(), 1) }
5768}
5769
5770/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5771///
5772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi8)
5773#[inline]
5774#[target_feature(enable = "avx512bw,avx512vl")]
5775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5777pub const fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
5778    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1)), 1) }
5779}
5780
5781/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5782///
5783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi8)
5784#[inline]
5785#[target_feature(enable = "avx512bw,avx512vl")]
5786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5788pub const fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
5789    unsafe { simd_reduce_mul_ordered(a.as_i8x16(), 1) }
5790}
5791
5792/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5793///
5794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi8)
5795#[inline]
5796#[target_feature(enable = "avx512bw,avx512vl")]
5797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5798#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5799pub const fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
5800    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1)), 1) }
5801}
5802
5803/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5804///
5805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi16)
5806#[inline]
5807#[target_feature(enable = "avx512bw,avx512vl")]
5808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5810pub const fn _mm256_reduce_or_epi16(a: __m256i) -> i16 {
5811    unsafe { simd_reduce_or(a.as_i16x16()) }
5812}
5813
5814/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5815///
5816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi16)
5817#[inline]
5818#[target_feature(enable = "avx512bw,avx512vl")]
5819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5820#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5821pub const fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 {
5822    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
5823}
5824
5825/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5826///
5827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi16)
5828#[inline]
5829#[target_feature(enable = "avx512bw,avx512vl")]
5830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5832pub const fn _mm_reduce_or_epi16(a: __m128i) -> i16 {
5833    unsafe { simd_reduce_or(a.as_i16x8()) }
5834}
5835
5836/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5837///
5838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi16)
5839#[inline]
5840#[target_feature(enable = "avx512bw,avx512vl")]
5841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5842#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5843pub const fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 {
5844    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
5845}
5846
5847/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5848///
5849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi8)
5850#[inline]
5851#[target_feature(enable = "avx512bw,avx512vl")]
5852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5854pub const fn _mm256_reduce_or_epi8(a: __m256i) -> i8 {
5855    unsafe { simd_reduce_or(a.as_i8x32()) }
5856}
5857
5858/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5859///
5860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi8)
5861#[inline]
5862#[target_feature(enable = "avx512bw,avx512vl")]
5863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5865pub const fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 {
5866    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
5867}
5868
5869/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5870///
5871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi8)
5872#[inline]
5873#[target_feature(enable = "avx512bw,avx512vl")]
5874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5875#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5876pub const fn _mm_reduce_or_epi8(a: __m128i) -> i8 {
5877    unsafe { simd_reduce_or(a.as_i8x16()) }
5878}
5879
5880/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5881///
5882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi8)
5883#[inline]
5884#[target_feature(enable = "avx512bw,avx512vl")]
5885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5886#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5887pub const fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 {
5888    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
5889}
5890
5891/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5892///
5893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi16&expand=3368)
5894#[inline]
5895#[target_feature(enable = "avx512bw")]
5896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5897#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5899pub const unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
5900    ptr::read_unaligned(mem_addr as *const __m512i)
5901}
5902
5903/// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5904///
5905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi16&expand=3365)
5906#[inline]
5907#[target_feature(enable = "avx512bw,avx512vl")]
5908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5909#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5910#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5911pub const unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
5912    ptr::read_unaligned(mem_addr as *const __m256i)
5913}
5914
5915/// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5916///
5917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi16&expand=3362)
5918#[inline]
5919#[target_feature(enable = "avx512bw,avx512vl")]
5920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5921#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5923pub const unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
5924    ptr::read_unaligned(mem_addr as *const __m128i)
5925}
5926
5927/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5928///
5929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi8&expand=3395)
5930#[inline]
5931#[target_feature(enable = "avx512bw")]
5932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5933#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5934#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5935pub const unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
5936    ptr::read_unaligned(mem_addr as *const __m512i)
5937}
5938
5939/// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5940///
5941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi8&expand=3392)
5942#[inline]
5943#[target_feature(enable = "avx512bw,avx512vl")]
5944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5945#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5947pub const unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
5948    ptr::read_unaligned(mem_addr as *const __m256i)
5949}
5950
5951/// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5952///
5953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi8&expand=3389)
5954#[inline]
5955#[target_feature(enable = "avx512bw,avx512vl")]
5956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5957#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5958#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5959pub const unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
5960    ptr::read_unaligned(mem_addr as *const __m128i)
5961}
5962
5963/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5964///
5965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi16&expand=5622)
5966#[inline]
5967#[target_feature(enable = "avx512bw")]
5968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5969#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5971pub const unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
5972    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5973}
5974
5975/// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5976///
5977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi16&expand=5620)
5978#[inline]
5979#[target_feature(enable = "avx512bw,avx512vl")]
5980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5981#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5983pub const unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
5984    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5985}
5986
5987/// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5988///
5989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi16&expand=5618)
5990#[inline]
5991#[target_feature(enable = "avx512bw,avx512vl")]
5992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5993#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5995pub const unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
5996    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5997}
5998
5999/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
6000///
6001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi8&expand=5640)
6002#[inline]
6003#[target_feature(enable = "avx512bw")]
6004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6005#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
6006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6007pub const unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
6008    ptr::write_unaligned(mem_addr as *mut __m512i, a);
6009}
6010
6011/// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
6012///
6013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi8&expand=5638)
6014#[inline]
6015#[target_feature(enable = "avx512bw,avx512vl")]
6016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6017#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
6018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6019pub const unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
6020    ptr::write_unaligned(mem_addr as *mut __m256i, a);
6021}
6022
6023/// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
6024///
6025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi8&expand=5636)
6026#[inline]
6027#[target_feature(enable = "avx512bw,avx512vl")]
6028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6029#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
6030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6031pub const unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
6032    ptr::write_unaligned(mem_addr as *mut __m128i, a);
6033}
6034
6035/// Load packed 16-bit integers from memory into dst using writemask k
6036/// (elements are copied from src when the corresponding mask bit is not set).
6037/// mem_addr does not need to be aligned on any particular boundary.
6038///
6039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi16)
6040#[inline]
6041#[target_feature(enable = "avx512bw")]
6042#[cfg_attr(test, assert_instr(vmovdqu16))]
6043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6045pub const unsafe fn _mm512_mask_loadu_epi16(
6046    src: __m512i,
6047    k: __mmask32,
6048    mem_addr: *const i16,
6049) -> __m512i {
6050    let mask = simd_select_bitmask(k, i16x32::splat(!0), i16x32::ZERO);
6051    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x32()).as_m512i()
6052}
6053
6054/// Load packed 16-bit integers from memory into dst using zeromask k
6055/// (elements are zeroed out when the corresponding mask bit is not set).
6056/// mem_addr does not need to be aligned on any particular boundary.
6057///
6058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi16)
6059#[inline]
6060#[target_feature(enable = "avx512bw")]
6061#[cfg_attr(test, assert_instr(vmovdqu16))]
6062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6063#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6064pub const unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
6065    _mm512_mask_loadu_epi16(_mm512_setzero_si512(), k, mem_addr)
6066}
6067
6068/// Load packed 8-bit integers from memory into dst using writemask k
6069/// (elements are copied from src when the corresponding mask bit is not set).
6070/// mem_addr does not need to be aligned on any particular boundary.
6071///
6072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi8)
6073#[inline]
6074#[target_feature(enable = "avx512bw")]
6075#[cfg_attr(test, assert_instr(vmovdqu8))]
6076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6078pub const unsafe fn _mm512_mask_loadu_epi8(
6079    src: __m512i,
6080    k: __mmask64,
6081    mem_addr: *const i8,
6082) -> __m512i {
6083    let mask = simd_select_bitmask(k, i8x64::splat(!0), i8x64::ZERO);
6084    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x64()).as_m512i()
6085}
6086
6087/// Load packed 8-bit integers from memory into dst using zeromask k
6088/// (elements are zeroed out when the corresponding mask bit is not set).
6089/// mem_addr does not need to be aligned on any particular boundary.
6090///
6091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi8)
6092#[inline]
6093#[target_feature(enable = "avx512bw")]
6094#[cfg_attr(test, assert_instr(vmovdqu8))]
6095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6097pub const unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
6098    _mm512_mask_loadu_epi8(_mm512_setzero_si512(), k, mem_addr)
6099}
6100
6101/// Load packed 16-bit integers from memory into dst using writemask k
6102/// (elements are copied from src when the corresponding mask bit is not set).
6103/// mem_addr does not need to be aligned on any particular boundary.
6104///
6105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi16)
6106#[inline]
6107#[target_feature(enable = "avx512bw,avx512vl")]
6108#[cfg_attr(test, assert_instr(vmovdqu16))]
6109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6111pub const unsafe fn _mm256_mask_loadu_epi16(
6112    src: __m256i,
6113    k: __mmask16,
6114    mem_addr: *const i16,
6115) -> __m256i {
6116    let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
6117    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x16()).as_m256i()
6118}
6119
6120/// Load packed 16-bit integers from memory into dst using zeromask k
6121/// (elements are zeroed out when the corresponding mask bit is not set).
6122/// mem_addr does not need to be aligned on any particular boundary.
6123///
6124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi16)
6125#[inline]
6126#[target_feature(enable = "avx512bw,avx512vl")]
6127#[cfg_attr(test, assert_instr(vmovdqu16))]
6128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6129#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6130pub const unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
6131    _mm256_mask_loadu_epi16(_mm256_setzero_si256(), k, mem_addr)
6132}
6133
6134/// Load packed 8-bit integers from memory into dst using writemask k
6135/// (elements are copied from src when the corresponding mask bit is not set).
6136/// mem_addr does not need to be aligned on any particular boundary.
6137///
6138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi8)
6139#[inline]
6140#[target_feature(enable = "avx512bw,avx512vl")]
6141#[cfg_attr(test, assert_instr(vmovdqu8))]
6142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6143#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6144pub const unsafe fn _mm256_mask_loadu_epi8(
6145    src: __m256i,
6146    k: __mmask32,
6147    mem_addr: *const i8,
6148) -> __m256i {
6149    let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
6150    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x32()).as_m256i()
6151}
6152
6153/// Load packed 8-bit integers from memory into dst using zeromask k
6154/// (elements are zeroed out when the corresponding mask bit is not set).
6155/// mem_addr does not need to be aligned on any particular boundary.
6156///
6157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi8)
6158#[inline]
6159#[target_feature(enable = "avx512bw,avx512vl")]
6160#[cfg_attr(test, assert_instr(vmovdqu8))]
6161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6163pub const unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
6164    _mm256_mask_loadu_epi8(_mm256_setzero_si256(), k, mem_addr)
6165}
6166
6167/// Load packed 16-bit integers from memory into dst using writemask k
6168/// (elements are copied from src when the corresponding mask bit is not set).
6169/// mem_addr does not need to be aligned on any particular boundary.
6170///
6171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi16)
6172#[inline]
6173#[target_feature(enable = "avx512bw,avx512vl")]
6174#[cfg_attr(test, assert_instr(vmovdqu16))]
6175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6177pub const unsafe fn _mm_mask_loadu_epi16(
6178    src: __m128i,
6179    k: __mmask8,
6180    mem_addr: *const i16,
6181) -> __m128i {
6182    let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
6183    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x8()).as_m128i()
6184}
6185
6186/// Load packed 16-bit integers from memory into dst using zeromask k
6187/// (elements are zeroed out when the corresponding mask bit is not set).
6188/// mem_addr does not need to be aligned on any particular boundary.
6189///
6190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi16)
6191#[inline]
6192#[target_feature(enable = "avx512bw,avx512vl")]
6193#[cfg_attr(test, assert_instr(vmovdqu16))]
6194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6195#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6196pub const unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
6197    _mm_mask_loadu_epi16(_mm_setzero_si128(), k, mem_addr)
6198}
6199
6200/// Load packed 8-bit integers from memory into dst using writemask k
6201/// (elements are copied from src when the corresponding mask bit is not set).
6202/// mem_addr does not need to be aligned on any particular boundary.
6203///
6204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi8)
6205#[inline]
6206#[target_feature(enable = "avx512bw,avx512vl")]
6207#[cfg_attr(test, assert_instr(vmovdqu8))]
6208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6209#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6210pub const unsafe fn _mm_mask_loadu_epi8(
6211    src: __m128i,
6212    k: __mmask16,
6213    mem_addr: *const i8,
6214) -> __m128i {
6215    let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
6216    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x16()).as_m128i()
6217}
6218
6219/// Load packed 8-bit integers from memory into dst using zeromask k
6220/// (elements are zeroed out when the corresponding mask bit is not set).
6221/// mem_addr does not need to be aligned on any particular boundary.
6222///
6223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi8)
6224#[inline]
6225#[target_feature(enable = "avx512bw,avx512vl")]
6226#[cfg_attr(test, assert_instr(vmovdqu8))]
6227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6229pub const unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
6230    _mm_mask_loadu_epi8(_mm_setzero_si128(), k, mem_addr)
6231}
6232
6233/// Store packed 16-bit integers from a into memory using writemask k.
6234/// mem_addr does not need to be aligned on any particular boundary.
6235///
6236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi16)
6237#[inline]
6238#[target_feature(enable = "avx512bw")]
6239#[cfg_attr(test, assert_instr(vmovdqu16))]
6240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6241#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6242pub const unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
6243    let mask = simd_select_bitmask(mask, i16x32::splat(!0), i16x32::ZERO);
6244    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x32());
6245}
6246
6247/// Store packed 8-bit integers from a into memory using writemask k.
6248/// mem_addr does not need to be aligned on any particular boundary.
6249///
6250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi8)
6251#[inline]
6252#[target_feature(enable = "avx512bw")]
6253#[cfg_attr(test, assert_instr(vmovdqu8))]
6254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6255#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6256pub const unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
6257    let mask = simd_select_bitmask(mask, i8x64::splat(!0), i8x64::ZERO);
6258    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x64());
6259}
6260
6261/// Store packed 16-bit integers from a into memory using writemask k.
6262/// mem_addr does not need to be aligned on any particular boundary.
6263///
6264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi16)
6265#[inline]
6266#[target_feature(enable = "avx512bw,avx512vl")]
6267#[cfg_attr(test, assert_instr(vmovdqu16))]
6268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6269#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6270pub const unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
6271    let mask = simd_select_bitmask(mask, i16x16::splat(!0), i16x16::ZERO);
6272    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x16());
6273}
6274
6275/// Store packed 8-bit integers from a into memory using writemask k.
6276/// mem_addr does not need to be aligned on any particular boundary.
6277///
6278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi8)
6279#[inline]
6280#[target_feature(enable = "avx512bw,avx512vl")]
6281#[cfg_attr(test, assert_instr(vmovdqu8))]
6282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6284pub const unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
6285    let mask = simd_select_bitmask(mask, i8x32::splat(!0), i8x32::ZERO);
6286    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x32());
6287}
6288
6289/// Store packed 16-bit integers from a into memory using writemask k.
6290/// mem_addr does not need to be aligned on any particular boundary.
6291///
6292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi16)
6293#[inline]
6294#[target_feature(enable = "avx512bw,avx512vl")]
6295#[cfg_attr(test, assert_instr(vmovdqu16))]
6296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6297#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6298pub const unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
6299    let mask = simd_select_bitmask(mask, i16x8::splat(!0), i16x8::ZERO);
6300    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x8());
6301}
6302
6303/// Store packed 8-bit integers from a into memory using writemask k.
6304/// mem_addr does not need to be aligned on any particular boundary.
6305///
6306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi8)
6307#[inline]
6308#[target_feature(enable = "avx512bw,avx512vl")]
6309#[cfg_attr(test, assert_instr(vmovdqu8))]
6310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6312pub const unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
6313    let mask = simd_select_bitmask(mask, i8x16::splat(!0), i8x16::ZERO);
6314    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x16());
6315}
6316
6317/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
6318///
6319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_madd_epi16&expand=3511)
6320#[inline]
6321#[target_feature(enable = "avx512bw")]
6322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6323#[cfg_attr(test, assert_instr(vpmaddwd))]
6324pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
6325    // It's a trick used in the Adler-32 algorithm to perform a widening addition.
6326    //
6327    // ```rust
6328    // #[target_feature(enable = "avx512bw")]
6329    // unsafe fn widening_add(mad: __m512i) -> __m512i {
6330    //     _mm512_madd_epi16(mad, _mm512_set1_epi16(1))
6331    // }
6332    // ```
6333    //
6334    // If we implement this using generic vector intrinsics, the optimizer
6335    // will eliminate this pattern, and `vpmaddwd` will no longer be emitted.
6336    // For this reason, we use x86 intrinsics.
6337    unsafe { transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) }
6338}
6339
6340/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6341///
6342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_madd_epi16&expand=3512)
6343#[inline]
6344#[target_feature(enable = "avx512bw")]
6345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6346#[cfg_attr(test, assert_instr(vpmaddwd))]
6347pub fn _mm512_mask_madd_epi16(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
6348    unsafe {
6349        let madd = _mm512_madd_epi16(a, b).as_i32x16();
6350        transmute(simd_select_bitmask(k, madd, src.as_i32x16()))
6351    }
6352}
6353
6354/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6355///
6356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_madd_epi16&expand=3513)
6357#[inline]
6358#[target_feature(enable = "avx512bw")]
6359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6360#[cfg_attr(test, assert_instr(vpmaddwd))]
6361pub fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
6362    unsafe {
6363        let madd = _mm512_madd_epi16(a, b).as_i32x16();
6364        transmute(simd_select_bitmask(k, madd, i32x16::ZERO))
6365    }
6366}
6367
6368/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6369///
6370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_madd_epi16&expand=3509)
6371#[inline]
6372#[target_feature(enable = "avx512bw,avx512vl")]
6373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6374#[cfg_attr(test, assert_instr(vpmaddwd))]
6375pub fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
6376    unsafe {
6377        let madd = _mm256_madd_epi16(a, b).as_i32x8();
6378        transmute(simd_select_bitmask(k, madd, src.as_i32x8()))
6379    }
6380}
6381
6382/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6383///
6384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_madd_epi16&expand=3510)
6385#[inline]
6386#[target_feature(enable = "avx512bw,avx512vl")]
6387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6388#[cfg_attr(test, assert_instr(vpmaddwd))]
6389pub fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
6390    unsafe {
6391        let madd = _mm256_madd_epi16(a, b).as_i32x8();
6392        transmute(simd_select_bitmask(k, madd, i32x8::ZERO))
6393    }
6394}
6395
6396/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6397///
6398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_madd_epi16&expand=3506)
6399#[inline]
6400#[target_feature(enable = "avx512bw,avx512vl")]
6401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6402#[cfg_attr(test, assert_instr(vpmaddwd))]
6403pub fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6404    unsafe {
6405        let madd = _mm_madd_epi16(a, b).as_i32x4();
6406        transmute(simd_select_bitmask(k, madd, src.as_i32x4()))
6407    }
6408}
6409
6410/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6411///
6412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_madd_epi16&expand=3507)
6413#[inline]
6414#[target_feature(enable = "avx512bw,avx512vl")]
6415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6416#[cfg_attr(test, assert_instr(vpmaddwd))]
6417pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6418    unsafe {
6419        let madd = _mm_madd_epi16(a, b).as_i32x4();
6420        transmute(simd_select_bitmask(k, madd, i32x4::ZERO))
6421    }
6422}
6423
6424/// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst.
6425///
6426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maddubs_epi16&expand=3539)
6427#[inline]
6428#[target_feature(enable = "avx512bw")]
6429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6430#[cfg_attr(test, assert_instr(vpmaddubsw))]
6431pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
6432    unsafe { transmute(vpmaddubsw(a.as_u8x64(), b.as_i8x64())) }
6433}
6434
6435/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6436///
6437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_maddubs_epi16&expand=3540)
6438#[inline]
6439#[target_feature(enable = "avx512bw")]
6440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6441#[cfg_attr(test, assert_instr(vpmaddubsw))]
6442pub fn _mm512_mask_maddubs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6443    unsafe {
6444        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
6445        transmute(simd_select_bitmask(k, madd, src.as_i16x32()))
6446    }
6447}
6448
6449/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6450///
6451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_maddubs_epi16&expand=3541)
6452#[inline]
6453#[target_feature(enable = "avx512bw")]
6454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6455#[cfg_attr(test, assert_instr(vpmaddubsw))]
6456pub fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6457    unsafe {
6458        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
6459        transmute(simd_select_bitmask(k, madd, i16x32::ZERO))
6460    }
6461}
6462
6463/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6464///
6465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_maddubs_epi16&expand=3537)
6466#[inline]
6467#[target_feature(enable = "avx512bw,avx512vl")]
6468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6469#[cfg_attr(test, assert_instr(vpmaddubsw))]
6470pub fn _mm256_mask_maddubs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6471    unsafe {
6472        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
6473        transmute(simd_select_bitmask(k, madd, src.as_i16x16()))
6474    }
6475}
6476
6477/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6478///
6479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_maddubs_epi16&expand=3538)
6480#[inline]
6481#[target_feature(enable = "avx512bw,avx512vl")]
6482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6483#[cfg_attr(test, assert_instr(vpmaddubsw))]
6484pub fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6485    unsafe {
6486        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
6487        transmute(simd_select_bitmask(k, madd, i16x16::ZERO))
6488    }
6489}
6490
6491/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6492///
6493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_maddubs_epi16&expand=3534)
6494#[inline]
6495#[target_feature(enable = "avx512bw,avx512vl")]
6496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6497#[cfg_attr(test, assert_instr(vpmaddubsw))]
6498pub fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6499    unsafe {
6500        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6501        transmute(simd_select_bitmask(k, madd, src.as_i16x8()))
6502    }
6503}
6504
6505/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6506///
6507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_maddubs_epi16&expand=3535)
6508#[inline]
6509#[target_feature(enable = "avx512bw,avx512vl")]
6510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6511#[cfg_attr(test, assert_instr(vpmaddubsw))]
6512pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6513    unsafe {
6514        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6515        transmute(simd_select_bitmask(k, madd, i16x8::ZERO))
6516    }
6517}
6518
6519/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst.
6520///
6521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi32&expand=4091)
6522#[inline]
6523#[target_feature(enable = "avx512bw")]
6524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6525#[cfg_attr(test, assert_instr(vpackssdw))]
6526pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
6527    unsafe { transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) }
6528}
6529
6530/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6531///
6532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi32&expand=4089)
6533#[inline]
6534#[target_feature(enable = "avx512bw")]
6535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6536#[cfg_attr(test, assert_instr(vpackssdw))]
6537pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6538    unsafe {
6539        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6540        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6541    }
6542}
6543
6544/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6545///
6546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi32&expand=4090)
6547#[inline]
6548#[target_feature(enable = "avx512bw")]
6549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6550#[cfg_attr(test, assert_instr(vpackssdw))]
6551pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6552    unsafe {
6553        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6554        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6555    }
6556}
6557
6558/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6559///
6560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi32&expand=4086)
6561#[inline]
6562#[target_feature(enable = "avx512bw,avx512vl")]
6563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6564#[cfg_attr(test, assert_instr(vpackssdw))]
6565pub fn _mm256_mask_packs_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6566    unsafe {
6567        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6568        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6569    }
6570}
6571
6572/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6573///
6574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packs_epi32&expand=4087)
6575#[inline]
6576#[target_feature(enable = "avx512bw,avx512vl")]
6577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6578#[cfg_attr(test, assert_instr(vpackssdw))]
6579pub fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6580    unsafe {
6581        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6582        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6583    }
6584}
6585
6586/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6587///
6588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi32&expand=4083)
6589#[inline]
6590#[target_feature(enable = "avx512bw,avx512vl")]
6591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6592#[cfg_attr(test, assert_instr(vpackssdw))]
6593pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6594    unsafe {
6595        let pack = _mm_packs_epi32(a, b).as_i16x8();
6596        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6597    }
6598}
6599
6600/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6601///
6602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi32&expand=4084)
6603#[inline]
6604#[target_feature(enable = "avx512bw,avx512vl")]
6605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6606#[cfg_attr(test, assert_instr(vpackssdw))]
6607pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6608    unsafe {
6609        let pack = _mm_packs_epi32(a, b).as_i16x8();
6610        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6611    }
6612}
6613
6614/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst.
6615///
6616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi16&expand=4082)
6617#[inline]
6618#[target_feature(enable = "avx512bw")]
6619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6620#[cfg_attr(test, assert_instr(vpacksswb))]
6621pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
6622    unsafe { transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) }
6623}
6624
6625/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6626///
6627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi16&expand=4080)
6628#[inline]
6629#[target_feature(enable = "avx512bw")]
6630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6631#[cfg_attr(test, assert_instr(vpacksswb))]
6632pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6633    unsafe {
6634        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6635        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6636    }
6637}
6638
6639/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6640///
6641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi16&expand=4081)
6642#[inline]
6643#[target_feature(enable = "avx512bw")]
6644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6645#[cfg_attr(test, assert_instr(vpacksswb))]
6646pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6647    unsafe {
6648        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6649        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6650    }
6651}
6652
6653/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6654///
6655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi16&expand=4077)
6656#[inline]
6657#[target_feature(enable = "avx512bw,avx512vl")]
6658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6659#[cfg_attr(test, assert_instr(vpacksswb))]
6660pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6661    unsafe {
6662        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6663        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6664    }
6665}
6666
6667/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6668///
6669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_maskz_packs_epi16&expand=4078)
6670#[inline]
6671#[target_feature(enable = "avx512bw,avx512vl")]
6672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6673#[cfg_attr(test, assert_instr(vpacksswb))]
6674pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6675    unsafe {
6676        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6677        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6678    }
6679}
6680
6681/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6682///
6683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi16&expand=4074)
6684#[inline]
6685#[target_feature(enable = "avx512bw,avx512vl")]
6686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6687#[cfg_attr(test, assert_instr(vpacksswb))]
6688pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6689    unsafe {
6690        let pack = _mm_packs_epi16(a, b).as_i8x16();
6691        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6692    }
6693}
6694
6695/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6696///
6697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi16&expand=4075)
6698#[inline]
6699#[target_feature(enable = "avx512bw,avx512vl")]
6700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6701#[cfg_attr(test, assert_instr(vpacksswb))]
6702pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6703    unsafe {
6704        let pack = _mm_packs_epi16(a, b).as_i8x16();
6705        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6706    }
6707}
6708
6709/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst.
6710///
6711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi32&expand=4130)
6712#[inline]
6713#[target_feature(enable = "avx512bw")]
6714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6715#[cfg_attr(test, assert_instr(vpackusdw))]
6716pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
6717    unsafe { transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) }
6718}
6719
6720/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6721///
6722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi32&expand=4128)
6723#[inline]
6724#[target_feature(enable = "avx512bw")]
6725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6726#[cfg_attr(test, assert_instr(vpackusdw))]
6727pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6728    unsafe {
6729        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6730        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6731    }
6732}
6733
6734/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6735///
6736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi32&expand=4129)
6737#[inline]
6738#[target_feature(enable = "avx512bw")]
6739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6740#[cfg_attr(test, assert_instr(vpackusdw))]
6741pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6742    unsafe {
6743        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6744        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6745    }
6746}
6747
6748/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6749///
6750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi32&expand=4125)
6751#[inline]
6752#[target_feature(enable = "avx512bw,avx512vl")]
6753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6754#[cfg_attr(test, assert_instr(vpackusdw))]
6755pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6756    unsafe {
6757        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6758        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6759    }
6760}
6761
6762/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6763///
6764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi32&expand=4126)
6765#[inline]
6766#[target_feature(enable = "avx512bw,avx512vl")]
6767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6768#[cfg_attr(test, assert_instr(vpackusdw))]
6769pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6770    unsafe {
6771        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6772        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6773    }
6774}
6775
6776/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6777///
6778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi32&expand=4122)
6779#[inline]
6780#[target_feature(enable = "avx512bw,avx512vl")]
6781#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6782#[cfg_attr(test, assert_instr(vpackusdw))]
6783pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6784    unsafe {
6785        let pack = _mm_packus_epi32(a, b).as_i16x8();
6786        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6787    }
6788}
6789
6790/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6791///
6792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi32&expand=4123)
6793#[inline]
6794#[target_feature(enable = "avx512bw,avx512vl")]
6795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6796#[cfg_attr(test, assert_instr(vpackusdw))]
6797pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6798    unsafe {
6799        let pack = _mm_packus_epi32(a, b).as_i16x8();
6800        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6801    }
6802}
6803
6804/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst.
6805///
6806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi16&expand=4121)
6807#[inline]
6808#[target_feature(enable = "avx512bw")]
6809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6810#[cfg_attr(test, assert_instr(vpackuswb))]
6811pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
6812    unsafe { transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) }
6813}
6814
6815/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6816///
6817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi16&expand=4119)
6818#[inline]
6819#[target_feature(enable = "avx512bw")]
6820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6821#[cfg_attr(test, assert_instr(vpackuswb))]
6822pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6823    unsafe {
6824        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6825        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6826    }
6827}
6828
6829/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6830///
6831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi16&expand=4120)
6832#[inline]
6833#[target_feature(enable = "avx512bw")]
6834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6835#[cfg_attr(test, assert_instr(vpackuswb))]
6836pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6837    unsafe {
6838        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6839        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6840    }
6841}
6842
6843/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6844///
6845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi16&expand=4116)
6846#[inline]
6847#[target_feature(enable = "avx512bw,avx512vl")]
6848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6849#[cfg_attr(test, assert_instr(vpackuswb))]
6850pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6851    unsafe {
6852        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6853        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6854    }
6855}
6856
6857/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6858///
6859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi16&expand=4117)
6860#[inline]
6861#[target_feature(enable = "avx512bw,avx512vl")]
6862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6863#[cfg_attr(test, assert_instr(vpackuswb))]
6864pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6865    unsafe {
6866        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6867        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6868    }
6869}
6870
6871/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6872///
6873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi16&expand=4113)
6874#[inline]
6875#[target_feature(enable = "avx512bw,avx512vl")]
6876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6877#[cfg_attr(test, assert_instr(vpackuswb))]
6878pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6879    unsafe {
6880        let pack = _mm_packus_epi16(a, b).as_i8x16();
6881        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6882    }
6883}
6884
6885/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6886///
6887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi16&expand=4114)
6888#[inline]
6889#[target_feature(enable = "avx512bw,avx512vl")]
6890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6891#[cfg_attr(test, assert_instr(vpackuswb))]
6892pub fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6893    unsafe {
6894        let pack = _mm_packus_epi16(a, b).as_i8x16();
6895        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6896    }
6897}
6898
6899/// Average packed unsigned 16-bit integers in a and b, and store the results in dst.
6900///
6901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu16&expand=388)
6902#[inline]
6903#[target_feature(enable = "avx512bw")]
6904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6905#[cfg_attr(test, assert_instr(vpavgw))]
6906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6907pub const fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
6908    unsafe {
6909        let a = simd_cast::<_, u32x32>(a.as_u16x32());
6910        let b = simd_cast::<_, u32x32>(b.as_u16x32());
6911        let r = simd_shr(simd_add(simd_add(a, b), u32x32::splat(1)), u32x32::splat(1));
6912        transmute(simd_cast::<_, u16x32>(r))
6913    }
6914}
6915
6916/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6917///
6918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu16&expand=389)
6919#[inline]
6920#[target_feature(enable = "avx512bw")]
6921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6922#[cfg_attr(test, assert_instr(vpavgw))]
6923#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6924pub const fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6925    unsafe {
6926        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6927        transmute(simd_select_bitmask(k, avg, src.as_u16x32()))
6928    }
6929}
6930
6931/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6932///
6933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu16&expand=390)
6934#[inline]
6935#[target_feature(enable = "avx512bw")]
6936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6937#[cfg_attr(test, assert_instr(vpavgw))]
6938#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6939pub const fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6940    unsafe {
6941        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6942        transmute(simd_select_bitmask(k, avg, u16x32::ZERO))
6943    }
6944}
6945
6946/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6947///
6948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu16&expand=386)
6949#[inline]
6950#[target_feature(enable = "avx512bw,avx512vl")]
6951#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6952#[cfg_attr(test, assert_instr(vpavgw))]
6953#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6954pub const fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6955    unsafe {
6956        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6957        transmute(simd_select_bitmask(k, avg, src.as_u16x16()))
6958    }
6959}
6960
6961/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6962///
6963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu16&expand=387)
6964#[inline]
6965#[target_feature(enable = "avx512bw,avx512vl")]
6966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6967#[cfg_attr(test, assert_instr(vpavgw))]
6968#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6969pub const fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6970    unsafe {
6971        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6972        transmute(simd_select_bitmask(k, avg, u16x16::ZERO))
6973    }
6974}
6975
6976/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6977///
6978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu16&expand=383)
6979#[inline]
6980#[target_feature(enable = "avx512bw,avx512vl")]
6981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6982#[cfg_attr(test, assert_instr(vpavgw))]
6983#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6984pub const fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6985    unsafe {
6986        let avg = _mm_avg_epu16(a, b).as_u16x8();
6987        transmute(simd_select_bitmask(k, avg, src.as_u16x8()))
6988    }
6989}
6990
6991/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6992///
6993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu16&expand=384)
6994#[inline]
6995#[target_feature(enable = "avx512bw,avx512vl")]
6996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6997#[cfg_attr(test, assert_instr(vpavgw))]
6998#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6999pub const fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
7000    unsafe {
7001        let avg = _mm_avg_epu16(a, b).as_u16x8();
7002        transmute(simd_select_bitmask(k, avg, u16x8::ZERO))
7003    }
7004}
7005
7006/// Average packed unsigned 8-bit integers in a and b, and store the results in dst.
7007///
7008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu8&expand=397)
7009#[inline]
7010#[target_feature(enable = "avx512bw")]
7011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7012#[cfg_attr(test, assert_instr(vpavgb))]
7013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7014pub const fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
7015    unsafe {
7016        let a = simd_cast::<_, u16x64>(a.as_u8x64());
7017        let b = simd_cast::<_, u16x64>(b.as_u8x64());
7018        let r = simd_shr(simd_add(simd_add(a, b), u16x64::splat(1)), u16x64::splat(1));
7019        transmute(simd_cast::<_, u8x64>(r))
7020    }
7021}
7022
7023/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7024///
7025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu8&expand=398)
7026#[inline]
7027#[target_feature(enable = "avx512bw")]
7028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7029#[cfg_attr(test, assert_instr(vpavgb))]
7030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7031pub const fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7032    unsafe {
7033        let avg = _mm512_avg_epu8(a, b).as_u8x64();
7034        transmute(simd_select_bitmask(k, avg, src.as_u8x64()))
7035    }
7036}
7037
7038/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7039///
7040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu8&expand=399)
7041#[inline]
7042#[target_feature(enable = "avx512bw")]
7043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7044#[cfg_attr(test, assert_instr(vpavgb))]
7045#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7046pub const fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7047    unsafe {
7048        let avg = _mm512_avg_epu8(a, b).as_u8x64();
7049        transmute(simd_select_bitmask(k, avg, u8x64::ZERO))
7050    }
7051}
7052
7053/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7054///
7055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu8&expand=395)
7056#[inline]
7057#[target_feature(enable = "avx512bw,avx512vl")]
7058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7059#[cfg_attr(test, assert_instr(vpavgb))]
7060#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7061pub const fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7062    unsafe {
7063        let avg = _mm256_avg_epu8(a, b).as_u8x32();
7064        transmute(simd_select_bitmask(k, avg, src.as_u8x32()))
7065    }
7066}
7067
7068/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7069///
7070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu8&expand=396)
7071#[inline]
7072#[target_feature(enable = "avx512bw,avx512vl")]
7073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7074#[cfg_attr(test, assert_instr(vpavgb))]
7075#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7076pub const fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7077    unsafe {
7078        let avg = _mm256_avg_epu8(a, b).as_u8x32();
7079        transmute(simd_select_bitmask(k, avg, u8x32::ZERO))
7080    }
7081}
7082
7083/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7084///
7085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu8&expand=392)
7086#[inline]
7087#[target_feature(enable = "avx512bw,avx512vl")]
7088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7089#[cfg_attr(test, assert_instr(vpavgb))]
7090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7091pub const fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7092    unsafe {
7093        let avg = _mm_avg_epu8(a, b).as_u8x16();
7094        transmute(simd_select_bitmask(k, avg, src.as_u8x16()))
7095    }
7096}
7097
7098/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7099///
7100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu8&expand=393)
7101#[inline]
7102#[target_feature(enable = "avx512bw,avx512vl")]
7103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7104#[cfg_attr(test, assert_instr(vpavgb))]
7105#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7106pub const fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7107    unsafe {
7108        let avg = _mm_avg_epu8(a, b).as_u8x16();
7109        transmute(simd_select_bitmask(k, avg, u8x16::ZERO))
7110    }
7111}
7112
7113/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst.
7114///
7115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi16&expand=5271)
7116#[inline]
7117#[target_feature(enable = "avx512bw")]
7118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7119#[cfg_attr(test, assert_instr(vpsllw))]
7120pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
7121    unsafe { transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) }
7122}
7123
7124/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7125///
7126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi16&expand=5269)
7127#[inline]
7128#[target_feature(enable = "avx512bw")]
7129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7130#[cfg_attr(test, assert_instr(vpsllw))]
7131pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7132    unsafe {
7133        let shf = _mm512_sll_epi16(a, count).as_i16x32();
7134        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7135    }
7136}
7137
7138/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7139///
7140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi16&expand=5270)
7141#[inline]
7142#[target_feature(enable = "avx512bw")]
7143#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7144#[cfg_attr(test, assert_instr(vpsllw))]
7145pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7146    unsafe {
7147        let shf = _mm512_sll_epi16(a, count).as_i16x32();
7148        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7149    }
7150}
7151
7152/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7153///
7154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi16&expand=5266)
7155#[inline]
7156#[target_feature(enable = "avx512bw,avx512vl")]
7157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7158#[cfg_attr(test, assert_instr(vpsllw))]
7159pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7160    unsafe {
7161        let shf = _mm256_sll_epi16(a, count).as_i16x16();
7162        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7163    }
7164}
7165
7166/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7167///
7168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi16&expand=5267)
7169#[inline]
7170#[target_feature(enable = "avx512bw,avx512vl")]
7171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7172#[cfg_attr(test, assert_instr(vpsllw))]
7173pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7174    unsafe {
7175        let shf = _mm256_sll_epi16(a, count).as_i16x16();
7176        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7177    }
7178}
7179
7180/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7181///
7182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi16&expand=5263)
7183#[inline]
7184#[target_feature(enable = "avx512bw,avx512vl")]
7185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7186#[cfg_attr(test, assert_instr(vpsllw))]
7187pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7188    unsafe {
7189        let shf = _mm_sll_epi16(a, count).as_i16x8();
7190        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7191    }
7192}
7193
7194/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7195///
7196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi16&expand=5264)
7197#[inline]
7198#[target_feature(enable = "avx512bw,avx512vl")]
7199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7200#[cfg_attr(test, assert_instr(vpsllw))]
7201pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7202    unsafe {
7203        let shf = _mm_sll_epi16(a, count).as_i16x8();
7204        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7205    }
7206}
7207
7208/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
7209///
7210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi16&expand=5301)
7211#[inline]
7212#[target_feature(enable = "avx512bw")]
7213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7214#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7215#[rustc_legacy_const_generics(1)]
7216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7217pub const fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7218    unsafe {
7219        static_assert_uimm_bits!(IMM8, 8);
7220        if IMM8 >= 16 {
7221            _mm512_setzero_si512()
7222        } else {
7223            transmute(simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
7224        }
7225    }
7226}
7227
7228/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7229///
7230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi16&expand=5299)
7231#[inline]
7232#[target_feature(enable = "avx512bw")]
7233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7234#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7235#[rustc_legacy_const_generics(3)]
7236#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7237pub const fn _mm512_mask_slli_epi16<const IMM8: u32>(
7238    src: __m512i,
7239    k: __mmask32,
7240    a: __m512i,
7241) -> __m512i {
7242    unsafe {
7243        static_assert_uimm_bits!(IMM8, 8);
7244        let shf = if IMM8 >= 16 {
7245            u16x32::ZERO
7246        } else {
7247            simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16))
7248        };
7249        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
7250    }
7251}
7252
7253/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7254///
7255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi16&expand=5300)
7256#[inline]
7257#[target_feature(enable = "avx512bw")]
7258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7259#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7260#[rustc_legacy_const_generics(2)]
7261#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7262pub const fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
7263    unsafe {
7264        static_assert_uimm_bits!(IMM8, 8);
7265        if IMM8 >= 16 {
7266            _mm512_setzero_si512()
7267        } else {
7268            let shf = simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16));
7269            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
7270        }
7271    }
7272}
7273
7274/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7275///
7276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi16&expand=5296)
7277#[inline]
7278#[target_feature(enable = "avx512bw,avx512vl")]
7279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7280#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7281#[rustc_legacy_const_generics(3)]
7282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7283pub const fn _mm256_mask_slli_epi16<const IMM8: u32>(
7284    src: __m256i,
7285    k: __mmask16,
7286    a: __m256i,
7287) -> __m256i {
7288    unsafe {
7289        static_assert_uimm_bits!(IMM8, 8);
7290        let shf = if IMM8 >= 16 {
7291            u16x16::ZERO
7292        } else {
7293            simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16))
7294        };
7295        transmute(simd_select_bitmask(k, shf, src.as_u16x16()))
7296    }
7297}
7298
7299/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7300///
7301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi16&expand=5297)
7302#[inline]
7303#[target_feature(enable = "avx512bw,avx512vl")]
7304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7305#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7306#[rustc_legacy_const_generics(2)]
7307#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7308pub const fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
7309    unsafe {
7310        static_assert_uimm_bits!(IMM8, 8);
7311        if IMM8 >= 16 {
7312            _mm256_setzero_si256()
7313        } else {
7314            let shf = simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16));
7315            transmute(simd_select_bitmask(k, shf, u16x16::ZERO))
7316        }
7317    }
7318}
7319
7320/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7321///
7322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi16&expand=5293)
7323#[inline]
7324#[target_feature(enable = "avx512bw,avx512vl")]
7325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7326#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7327#[rustc_legacy_const_generics(3)]
7328#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7329pub const fn _mm_mask_slli_epi16<const IMM8: u32>(
7330    src: __m128i,
7331    k: __mmask8,
7332    a: __m128i,
7333) -> __m128i {
7334    unsafe {
7335        static_assert_uimm_bits!(IMM8, 8);
7336        let shf = if IMM8 >= 16 {
7337            u16x8::ZERO
7338        } else {
7339            simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16))
7340        };
7341        transmute(simd_select_bitmask(k, shf, src.as_u16x8()))
7342    }
7343}
7344
7345/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7346///
7347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi16&expand=5294)
7348#[inline]
7349#[target_feature(enable = "avx512bw,avx512vl")]
7350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7351#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7352#[rustc_legacy_const_generics(2)]
7353#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7354pub const fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
7355    unsafe {
7356        static_assert_uimm_bits!(IMM8, 8);
7357        if IMM8 >= 16 {
7358            _mm_setzero_si128()
7359        } else {
7360            let shf = simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16));
7361            transmute(simd_select_bitmask(k, shf, u16x8::ZERO))
7362        }
7363    }
7364}
7365
7366/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7367///
7368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi16&expand=5333)
7369#[inline]
7370#[target_feature(enable = "avx512bw")]
7371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7372#[cfg_attr(test, assert_instr(vpsllvw))]
7373#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7374pub const fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
7375    unsafe {
7376        let count = count.as_u16x32();
7377        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7378        let count = simd_select(no_overflow, count, u16x32::ZERO);
7379        simd_select(no_overflow, simd_shl(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
7380    }
7381}
7382
7383/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7384///
7385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi16&expand=5331)
7386#[inline]
7387#[target_feature(enable = "avx512bw")]
7388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7389#[cfg_attr(test, assert_instr(vpsllvw))]
7390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7391pub const fn _mm512_mask_sllv_epi16(
7392    src: __m512i,
7393    k: __mmask32,
7394    a: __m512i,
7395    count: __m512i,
7396) -> __m512i {
7397    unsafe {
7398        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
7399        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7400    }
7401}
7402
7403/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7404///
7405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi16&expand=5332)
7406#[inline]
7407#[target_feature(enable = "avx512bw")]
7408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7409#[cfg_attr(test, assert_instr(vpsllvw))]
7410#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7411pub const fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7412    unsafe {
7413        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
7414        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7415    }
7416}
7417
7418/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7419///
7420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi16&expand=5330)
7421#[inline]
7422#[target_feature(enable = "avx512bw,avx512vl")]
7423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7424#[cfg_attr(test, assert_instr(vpsllvw))]
7425#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7426pub const fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
7427    unsafe {
7428        let count = count.as_u16x16();
7429        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7430        let count = simd_select(no_overflow, count, u16x16::ZERO);
7431        simd_select(no_overflow, simd_shl(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
7432    }
7433}
7434
7435/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7436///
7437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi16&expand=5328)
7438#[inline]
7439#[target_feature(enable = "avx512bw,avx512vl")]
7440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7441#[cfg_attr(test, assert_instr(vpsllvw))]
7442#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7443pub const fn _mm256_mask_sllv_epi16(
7444    src: __m256i,
7445    k: __mmask16,
7446    a: __m256i,
7447    count: __m256i,
7448) -> __m256i {
7449    unsafe {
7450        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
7451        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7452    }
7453}
7454
7455/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7456///
7457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi16&expand=5329)
7458#[inline]
7459#[target_feature(enable = "avx512bw,avx512vl")]
7460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7461#[cfg_attr(test, assert_instr(vpsllvw))]
7462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7463pub const fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7464    unsafe {
7465        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
7466        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7467    }
7468}
7469
7470/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7471///
7472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi16&expand=5327)
7473#[inline]
7474#[target_feature(enable = "avx512bw,avx512vl")]
7475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7476#[cfg_attr(test, assert_instr(vpsllvw))]
7477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7478pub const fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
7479    unsafe {
7480        let count = count.as_u16x8();
7481        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
7482        let count = simd_select(no_overflow, count, u16x8::ZERO);
7483        simd_select(no_overflow, simd_shl(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
7484    }
7485}
7486
7487/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7488///
7489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi16&expand=5325)
7490#[inline]
7491#[target_feature(enable = "avx512bw,avx512vl")]
7492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7493#[cfg_attr(test, assert_instr(vpsllvw))]
7494#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7495pub const fn _mm_mask_sllv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7496    unsafe {
7497        let shf = _mm_sllv_epi16(a, count).as_i16x8();
7498        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7499    }
7500}
7501
7502/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7503///
7504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi16&expand=5326)
7505#[inline]
7506#[target_feature(enable = "avx512bw,avx512vl")]
7507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7508#[cfg_attr(test, assert_instr(vpsllvw))]
7509#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7510pub const fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7511    unsafe {
7512        let shf = _mm_sllv_epi16(a, count).as_i16x8();
7513        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7514    }
7515}
7516
7517/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst.
7518///
7519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi16&expand=5483)
7520#[inline]
7521#[target_feature(enable = "avx512bw")]
7522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7523#[cfg_attr(test, assert_instr(vpsrlw))]
7524pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
7525    unsafe { transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) }
7526}
7527
7528/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7529///
7530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi16&expand=5481)
7531#[inline]
7532#[target_feature(enable = "avx512bw")]
7533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7534#[cfg_attr(test, assert_instr(vpsrlw))]
7535pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7536    unsafe {
7537        let shf = _mm512_srl_epi16(a, count).as_i16x32();
7538        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7539    }
7540}
7541
7542/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7543///
7544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi16&expand=5482)
7545#[inline]
7546#[target_feature(enable = "avx512bw")]
7547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7548#[cfg_attr(test, assert_instr(vpsrlw))]
7549pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7550    unsafe {
7551        let shf = _mm512_srl_epi16(a, count).as_i16x32();
7552        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7553    }
7554}
7555
7556/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7557///
7558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi16&expand=5478)
7559#[inline]
7560#[target_feature(enable = "avx512bw,avx512vl")]
7561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7562#[cfg_attr(test, assert_instr(vpsrlw))]
7563pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7564    unsafe {
7565        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7566        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7567    }
7568}
7569
7570/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7571///
7572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi16&expand=5479)
7573#[inline]
7574#[target_feature(enable = "avx512bw,avx512vl")]
7575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7576#[cfg_attr(test, assert_instr(vpsrlw))]
7577pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7578    unsafe {
7579        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7580        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7581    }
7582}
7583
7584/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7585///
7586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi16&expand=5475)
7587#[inline]
7588#[target_feature(enable = "avx512bw,avx512vl")]
7589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7590#[cfg_attr(test, assert_instr(vpsrlw))]
7591pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7592    unsafe {
7593        let shf = _mm_srl_epi16(a, count).as_i16x8();
7594        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7595    }
7596}
7597
7598/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7599///
7600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi16&expand=5476)
7601#[inline]
7602#[target_feature(enable = "avx512bw,avx512vl")]
7603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7604#[cfg_attr(test, assert_instr(vpsrlw))]
7605pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7606    unsafe {
7607        let shf = _mm_srl_epi16(a, count).as_i16x8();
7608        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7609    }
7610}
7611
7612/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
7613///
7614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi16&expand=5513)
7615#[inline]
7616#[target_feature(enable = "avx512bw")]
7617#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7618#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7619#[rustc_legacy_const_generics(1)]
7620#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7621pub const fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7622    unsafe {
7623        static_assert_uimm_bits!(IMM8, 8);
7624        if IMM8 >= 16 {
7625            _mm512_setzero_si512()
7626        } else {
7627            transmute(simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
7628        }
7629    }
7630}
7631
7632/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7633///
7634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi16&expand=5511)
7635#[inline]
7636#[target_feature(enable = "avx512bw")]
7637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7638#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7639#[rustc_legacy_const_generics(3)]
7640#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7641pub const fn _mm512_mask_srli_epi16<const IMM8: u32>(
7642    src: __m512i,
7643    k: __mmask32,
7644    a: __m512i,
7645) -> __m512i {
7646    unsafe {
7647        static_assert_uimm_bits!(IMM8, 8);
7648        let shf = if IMM8 >= 16 {
7649            u16x32::ZERO
7650        } else {
7651            simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16))
7652        };
7653        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
7654    }
7655}
7656
7657/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7658///
7659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi16&expand=5512)
7660#[inline]
7661#[target_feature(enable = "avx512bw")]
7662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7663#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7664#[rustc_legacy_const_generics(2)]
7665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7666pub const fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
7667    unsafe {
7668        static_assert_uimm_bits!(IMM8, 8);
7669        //imm8 should be u32, it seems the document to verify is incorrect
7670        if IMM8 >= 16 {
7671            _mm512_setzero_si512()
7672        } else {
7673            let shf = simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16));
7674            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
7675        }
7676    }
7677}
7678
7679/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7680///
7681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi16&expand=5508)
7682#[inline]
7683#[target_feature(enable = "avx512bw,avx512vl")]
7684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7685#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7686#[rustc_legacy_const_generics(3)]
7687#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7688pub const fn _mm256_mask_srli_epi16<const IMM8: i32>(
7689    src: __m256i,
7690    k: __mmask16,
7691    a: __m256i,
7692) -> __m256i {
7693    unsafe {
7694        static_assert_uimm_bits!(IMM8, 8);
7695        let shf = _mm256_srli_epi16::<IMM8>(a);
7696        transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16()))
7697    }
7698}
7699
7700/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7701///
7702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi16&expand=5509)
7703#[inline]
7704#[target_feature(enable = "avx512bw,avx512vl")]
7705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7706#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7707#[rustc_legacy_const_generics(2)]
7708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7709pub const fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
7710    unsafe {
7711        static_assert_uimm_bits!(IMM8, 8);
7712        let shf = _mm256_srli_epi16::<IMM8>(a);
7713        transmute(simd_select_bitmask(k, shf.as_i16x16(), i16x16::ZERO))
7714    }
7715}
7716
7717/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7718///
7719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi16&expand=5505)
7720#[inline]
7721#[target_feature(enable = "avx512bw,avx512vl")]
7722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7723#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7724#[rustc_legacy_const_generics(3)]
7725#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7726pub const fn _mm_mask_srli_epi16<const IMM8: i32>(
7727    src: __m128i,
7728    k: __mmask8,
7729    a: __m128i,
7730) -> __m128i {
7731    unsafe {
7732        static_assert_uimm_bits!(IMM8, 8);
7733        let shf = _mm_srli_epi16::<IMM8>(a);
7734        transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8()))
7735    }
7736}
7737
7738/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7739///
7740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi16&expand=5506)
7741#[inline]
7742#[target_feature(enable = "avx512bw,avx512vl")]
7743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7744#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7745#[rustc_legacy_const_generics(2)]
7746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7747pub const fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
7748    unsafe {
7749        static_assert_uimm_bits!(IMM8, 8);
7750        let shf = _mm_srli_epi16::<IMM8>(a);
7751        transmute(simd_select_bitmask(k, shf.as_i16x8(), i16x8::ZERO))
7752    }
7753}
7754
7755/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7756///
7757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi16&expand=5545)
7758#[inline]
7759#[target_feature(enable = "avx512bw")]
7760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7761#[cfg_attr(test, assert_instr(vpsrlvw))]
7762#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7763pub const fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
7764    unsafe {
7765        let count = count.as_u16x32();
7766        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7767        let count = simd_select(no_overflow, count, u16x32::ZERO);
7768        simd_select(no_overflow, simd_shr(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
7769    }
7770}
7771
7772/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7773///
7774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi16&expand=5543)
7775#[inline]
7776#[target_feature(enable = "avx512bw")]
7777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7778#[cfg_attr(test, assert_instr(vpsrlvw))]
7779#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7780pub const fn _mm512_mask_srlv_epi16(
7781    src: __m512i,
7782    k: __mmask32,
7783    a: __m512i,
7784    count: __m512i,
7785) -> __m512i {
7786    unsafe {
7787        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7788        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7789    }
7790}
7791
7792/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7793///
7794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi16&expand=5544)
7795#[inline]
7796#[target_feature(enable = "avx512bw")]
7797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7798#[cfg_attr(test, assert_instr(vpsrlvw))]
7799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7800pub const fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7801    unsafe {
7802        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7803        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7804    }
7805}
7806
7807/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7808///
7809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi16&expand=5542)
7810#[inline]
7811#[target_feature(enable = "avx512bw,avx512vl")]
7812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7813#[cfg_attr(test, assert_instr(vpsrlvw))]
7814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7815pub const fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
7816    unsafe {
7817        let count = count.as_u16x16();
7818        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7819        let count = simd_select(no_overflow, count, u16x16::ZERO);
7820        simd_select(no_overflow, simd_shr(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
7821    }
7822}
7823
7824/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7825///
7826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi16&expand=5540)
7827#[inline]
7828#[target_feature(enable = "avx512bw,avx512vl")]
7829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7830#[cfg_attr(test, assert_instr(vpsrlvw))]
7831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7832pub const fn _mm256_mask_srlv_epi16(
7833    src: __m256i,
7834    k: __mmask16,
7835    a: __m256i,
7836    count: __m256i,
7837) -> __m256i {
7838    unsafe {
7839        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7840        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7841    }
7842}
7843
7844/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7845///
7846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi16&expand=5541)
7847#[inline]
7848#[target_feature(enable = "avx512bw,avx512vl")]
7849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7850#[cfg_attr(test, assert_instr(vpsrlvw))]
7851#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7852pub const fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7853    unsafe {
7854        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7855        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7856    }
7857}
7858
7859/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7860///
7861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi16&expand=5539)
7862#[inline]
7863#[target_feature(enable = "avx512bw,avx512vl")]
7864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7865#[cfg_attr(test, assert_instr(vpsrlvw))]
7866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7867pub const fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
7868    unsafe {
7869        let count = count.as_u16x8();
7870        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
7871        let count = simd_select(no_overflow, count, u16x8::ZERO);
7872        simd_select(no_overflow, simd_shr(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
7873    }
7874}
7875
7876/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7877///
7878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi16&expand=5537)
7879#[inline]
7880#[target_feature(enable = "avx512bw,avx512vl")]
7881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7882#[cfg_attr(test, assert_instr(vpsrlvw))]
7883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7884pub const fn _mm_mask_srlv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7885    unsafe {
7886        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7887        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7888    }
7889}
7890
7891/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7892///
7893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi16&expand=5538)
7894#[inline]
7895#[target_feature(enable = "avx512bw,avx512vl")]
7896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7897#[cfg_attr(test, assert_instr(vpsrlvw))]
7898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7899pub const fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7900    unsafe {
7901        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7902        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7903    }
7904}
7905
7906/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst.
7907///
7908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi16&expand=5398)
7909#[inline]
7910#[target_feature(enable = "avx512bw")]
7911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7912#[cfg_attr(test, assert_instr(vpsraw))]
7913pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
7914    unsafe { transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) }
7915}
7916
7917/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7918///
7919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi16&expand=5396)
7920#[inline]
7921#[target_feature(enable = "avx512bw")]
7922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7923#[cfg_attr(test, assert_instr(vpsraw))]
7924pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7925    unsafe {
7926        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7927        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7928    }
7929}
7930
7931/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7932///
7933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi16&expand=5397)
7934#[inline]
7935#[target_feature(enable = "avx512bw")]
7936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7937#[cfg_attr(test, assert_instr(vpsraw))]
7938pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7939    unsafe {
7940        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7941        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7942    }
7943}
7944
7945/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7946///
7947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi16&expand=5393)
7948#[inline]
7949#[target_feature(enable = "avx512bw,avx512vl")]
7950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7951#[cfg_attr(test, assert_instr(vpsraw))]
7952pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7953    unsafe {
7954        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7955        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7956    }
7957}
7958
7959/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7960///
7961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi16&expand=5394)
7962#[inline]
7963#[target_feature(enable = "avx512bw,avx512vl")]
7964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7965#[cfg_attr(test, assert_instr(vpsraw))]
7966pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7967    unsafe {
7968        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7969        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7970    }
7971}
7972
7973/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7974///
7975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi16&expand=5390)
7976#[inline]
7977#[target_feature(enable = "avx512bw,avx512vl")]
7978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7979#[cfg_attr(test, assert_instr(vpsraw))]
7980pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7981    unsafe {
7982        let shf = _mm_sra_epi16(a, count).as_i16x8();
7983        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7984    }
7985}
7986
7987/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7988///
7989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi16&expand=5391)
7990#[inline]
7991#[target_feature(enable = "avx512bw,avx512vl")]
7992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7993#[cfg_attr(test, assert_instr(vpsraw))]
7994pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7995    unsafe {
7996        let shf = _mm_sra_epi16(a, count).as_i16x8();
7997        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7998    }
7999}
8000
8001/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
8002///
8003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi16&expand=5427)
8004#[inline]
8005#[target_feature(enable = "avx512bw")]
8006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8007#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8008#[rustc_legacy_const_generics(1)]
8009#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8010pub const fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
8011    unsafe {
8012        static_assert_uimm_bits!(IMM8, 8);
8013        transmute(simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)))
8014    }
8015}
8016
8017/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8018///
8019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi16&expand=5425)
8020#[inline]
8021#[target_feature(enable = "avx512bw")]
8022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8023#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8024#[rustc_legacy_const_generics(3)]
8025#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8026pub const fn _mm512_mask_srai_epi16<const IMM8: u32>(
8027    src: __m512i,
8028    k: __mmask32,
8029    a: __m512i,
8030) -> __m512i {
8031    unsafe {
8032        static_assert_uimm_bits!(IMM8, 8);
8033        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
8034        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
8035    }
8036}
8037
8038/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8039///
8040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi16&expand=5426)
8041#[inline]
8042#[target_feature(enable = "avx512bw")]
8043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8044#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8045#[rustc_legacy_const_generics(2)]
8046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8047pub const fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
8048    unsafe {
8049        static_assert_uimm_bits!(IMM8, 8);
8050        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
8051        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
8052    }
8053}
8054
8055/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8056///
8057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi16&expand=5422)
8058#[inline]
8059#[target_feature(enable = "avx512bw,avx512vl")]
8060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8061#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8062#[rustc_legacy_const_generics(3)]
8063#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8064pub const fn _mm256_mask_srai_epi16<const IMM8: u32>(
8065    src: __m256i,
8066    k: __mmask16,
8067    a: __m256i,
8068) -> __m256i {
8069    unsafe {
8070        static_assert_uimm_bits!(IMM8, 8);
8071        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
8072        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
8073    }
8074}
8075
8076/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8077///
8078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi16&expand=5423)
8079#[inline]
8080#[target_feature(enable = "avx512bw,avx512vl")]
8081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8082#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8083#[rustc_legacy_const_generics(2)]
8084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8085pub const fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
8086    unsafe {
8087        static_assert_uimm_bits!(IMM8, 8);
8088        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
8089        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
8090    }
8091}
8092
8093/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8094///
8095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi16&expand=5419)
8096#[inline]
8097#[target_feature(enable = "avx512bw,avx512vl")]
8098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8099#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8100#[rustc_legacy_const_generics(3)]
8101#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8102pub const fn _mm_mask_srai_epi16<const IMM8: u32>(
8103    src: __m128i,
8104    k: __mmask8,
8105    a: __m128i,
8106) -> __m128i {
8107    unsafe {
8108        static_assert_uimm_bits!(IMM8, 8);
8109        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
8110        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
8111    }
8112}
8113
8114/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8115///
8116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi16&expand=5420)
8117#[inline]
8118#[target_feature(enable = "avx512bw,avx512vl")]
8119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8120#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8121#[rustc_legacy_const_generics(2)]
8122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8123pub const fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
8124    unsafe {
8125        static_assert_uimm_bits!(IMM8, 8);
8126        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
8127        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
8128    }
8129}
8130
8131/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
8132///
8133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi16&expand=5456)
8134#[inline]
8135#[target_feature(enable = "avx512bw")]
8136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8137#[cfg_attr(test, assert_instr(vpsravw))]
8138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8139pub const fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
8140    unsafe {
8141        let count = count.as_u16x32();
8142        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
8143        let count = simd_select(no_overflow, transmute(count), i16x32::splat(15));
8144        simd_shr(a.as_i16x32(), count).as_m512i()
8145    }
8146}
8147
8148/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8149///
8150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi16&expand=5454)
8151#[inline]
8152#[target_feature(enable = "avx512bw")]
8153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8154#[cfg_attr(test, assert_instr(vpsravw))]
8155#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8156pub const fn _mm512_mask_srav_epi16(
8157    src: __m512i,
8158    k: __mmask32,
8159    a: __m512i,
8160    count: __m512i,
8161) -> __m512i {
8162    unsafe {
8163        let shf = _mm512_srav_epi16(a, count).as_i16x32();
8164        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
8165    }
8166}
8167
8168/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8169///
8170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi16&expand=5455)
8171#[inline]
8172#[target_feature(enable = "avx512bw")]
8173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8174#[cfg_attr(test, assert_instr(vpsravw))]
8175#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8176pub const fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
8177    unsafe {
8178        let shf = _mm512_srav_epi16(a, count).as_i16x32();
8179        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
8180    }
8181}
8182
8183/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
8184///
8185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi16&expand=5453)
8186#[inline]
8187#[target_feature(enable = "avx512bw,avx512vl")]
8188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8189#[cfg_attr(test, assert_instr(vpsravw))]
8190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8191pub const fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
8192    unsafe {
8193        let count = count.as_u16x16();
8194        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
8195        let count = simd_select(no_overflow, transmute(count), i16x16::splat(15));
8196        simd_shr(a.as_i16x16(), count).as_m256i()
8197    }
8198}
8199
8200/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8201///
8202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi16&expand=5451)
8203#[inline]
8204#[target_feature(enable = "avx512bw,avx512vl")]
8205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8206#[cfg_attr(test, assert_instr(vpsravw))]
8207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8208pub const fn _mm256_mask_srav_epi16(
8209    src: __m256i,
8210    k: __mmask16,
8211    a: __m256i,
8212    count: __m256i,
8213) -> __m256i {
8214    unsafe {
8215        let shf = _mm256_srav_epi16(a, count).as_i16x16();
8216        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
8217    }
8218}
8219
8220/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8221///
8222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi16&expand=5452)
8223#[inline]
8224#[target_feature(enable = "avx512bw,avx512vl")]
8225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8226#[cfg_attr(test, assert_instr(vpsravw))]
8227#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8228pub const fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
8229    unsafe {
8230        let shf = _mm256_srav_epi16(a, count).as_i16x16();
8231        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
8232    }
8233}
8234
8235/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
8236///
8237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi16&expand=5450)
8238#[inline]
8239#[target_feature(enable = "avx512bw,avx512vl")]
8240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8241#[cfg_attr(test, assert_instr(vpsravw))]
8242#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8243pub const fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
8244    unsafe {
8245        let count = count.as_u16x8();
8246        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
8247        let count = simd_select(no_overflow, transmute(count), i16x8::splat(15));
8248        simd_shr(a.as_i16x8(), count).as_m128i()
8249    }
8250}
8251
8252/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8253///
8254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi16&expand=5448)
8255#[inline]
8256#[target_feature(enable = "avx512bw,avx512vl")]
8257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8258#[cfg_attr(test, assert_instr(vpsravw))]
8259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8260pub const fn _mm_mask_srav_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
8261    unsafe {
8262        let shf = _mm_srav_epi16(a, count).as_i16x8();
8263        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
8264    }
8265}
8266
8267/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8268///
8269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi16&expand=5449)
8270#[inline]
8271#[target_feature(enable = "avx512bw,avx512vl")]
8272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8273#[cfg_attr(test, assert_instr(vpsravw))]
8274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8275pub const fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
8276    unsafe {
8277        let shf = _mm_srav_epi16(a, count).as_i16x8();
8278        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
8279    }
8280}
8281
8282/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8283///
8284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi16&expand=4226)
8285#[inline]
8286#[target_feature(enable = "avx512bw")]
8287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8288#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8289pub fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
8290    unsafe { transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32())) }
8291}
8292
8293/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8294///
8295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi16&expand=4223)
8296#[inline]
8297#[target_feature(enable = "avx512bw")]
8298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8299#[cfg_attr(test, assert_instr(vpermt2w))]
8300pub fn _mm512_mask_permutex2var_epi16(
8301    a: __m512i,
8302    k: __mmask32,
8303    idx: __m512i,
8304    b: __m512i,
8305) -> __m512i {
8306    unsafe {
8307        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
8308        transmute(simd_select_bitmask(k, permute, a.as_i16x32()))
8309    }
8310}
8311
8312/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8313///
8314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi16&expand=4225)
8315#[inline]
8316#[target_feature(enable = "avx512bw")]
8317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8318#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8319pub fn _mm512_maskz_permutex2var_epi16(
8320    k: __mmask32,
8321    a: __m512i,
8322    idx: __m512i,
8323    b: __m512i,
8324) -> __m512i {
8325    unsafe {
8326        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
8327        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
8328    }
8329}
8330
8331/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8332///
8333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi16&expand=4224)
8334#[inline]
8335#[target_feature(enable = "avx512bw")]
8336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8337#[cfg_attr(test, assert_instr(vpermi2w))]
8338pub fn _mm512_mask2_permutex2var_epi16(
8339    a: __m512i,
8340    idx: __m512i,
8341    k: __mmask32,
8342    b: __m512i,
8343) -> __m512i {
8344    unsafe {
8345        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
8346        transmute(simd_select_bitmask(k, permute, idx.as_i16x32()))
8347    }
8348}
8349
8350/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8351///
8352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi16&expand=4222)
8353#[inline]
8354#[target_feature(enable = "avx512bw,avx512vl")]
8355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8356#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8357pub fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
8358    unsafe { transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16())) }
8359}
8360
8361/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8362///
8363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi16&expand=4219)
8364#[inline]
8365#[target_feature(enable = "avx512bw,avx512vl")]
8366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8367#[cfg_attr(test, assert_instr(vpermt2w))]
8368pub fn _mm256_mask_permutex2var_epi16(
8369    a: __m256i,
8370    k: __mmask16,
8371    idx: __m256i,
8372    b: __m256i,
8373) -> __m256i {
8374    unsafe {
8375        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
8376        transmute(simd_select_bitmask(k, permute, a.as_i16x16()))
8377    }
8378}
8379
8380/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8381///
8382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi16&expand=4221)
8383#[inline]
8384#[target_feature(enable = "avx512bw,avx512vl")]
8385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8386#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8387pub fn _mm256_maskz_permutex2var_epi16(
8388    k: __mmask16,
8389    a: __m256i,
8390    idx: __m256i,
8391    b: __m256i,
8392) -> __m256i {
8393    unsafe {
8394        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
8395        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
8396    }
8397}
8398
8399/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8400///
8401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi16&expand=4220)
8402#[inline]
8403#[target_feature(enable = "avx512bw,avx512vl")]
8404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8405#[cfg_attr(test, assert_instr(vpermi2w))]
8406pub fn _mm256_mask2_permutex2var_epi16(
8407    a: __m256i,
8408    idx: __m256i,
8409    k: __mmask16,
8410    b: __m256i,
8411) -> __m256i {
8412    unsafe {
8413        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
8414        transmute(simd_select_bitmask(k, permute, idx.as_i16x16()))
8415    }
8416}
8417
8418/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8419///
8420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi16&expand=4218)
8421#[inline]
8422#[target_feature(enable = "avx512bw,avx512vl")]
8423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8424#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8425pub fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
8426    unsafe { transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8())) }
8427}
8428
8429/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8430///
8431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi16&expand=4215)
8432#[inline]
8433#[target_feature(enable = "avx512bw,avx512vl")]
8434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8435#[cfg_attr(test, assert_instr(vpermt2w))]
8436pub fn _mm_mask_permutex2var_epi16(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
8437    unsafe {
8438        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
8439        transmute(simd_select_bitmask(k, permute, a.as_i16x8()))
8440    }
8441}
8442
8443/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8444///
8445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi16&expand=4217)
8446#[inline]
8447#[target_feature(enable = "avx512bw,avx512vl")]
8448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8449#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8450pub fn _mm_maskz_permutex2var_epi16(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
8451    unsafe {
8452        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
8453        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
8454    }
8455}
8456
8457/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8458///
8459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi16&expand=4216)
8460#[inline]
8461#[target_feature(enable = "avx512bw,avx512vl")]
8462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8463#[cfg_attr(test, assert_instr(vpermi2w))]
8464pub fn _mm_mask2_permutex2var_epi16(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
8465    unsafe {
8466        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
8467        transmute(simd_select_bitmask(k, permute, idx.as_i16x8()))
8468    }
8469}
8470
8471/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8472///
8473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi16&expand=4295)
8474#[inline]
8475#[target_feature(enable = "avx512bw")]
8476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8477#[cfg_attr(test, assert_instr(vpermw))]
8478pub fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i {
8479    unsafe { transmute(vpermw(a.as_i16x32(), idx.as_i16x32())) }
8480}
8481
8482/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8483///
8484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi16&expand=4293)
8485#[inline]
8486#[target_feature(enable = "avx512bw")]
8487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8488#[cfg_attr(test, assert_instr(vpermw))]
8489pub fn _mm512_mask_permutexvar_epi16(
8490    src: __m512i,
8491    k: __mmask32,
8492    idx: __m512i,
8493    a: __m512i,
8494) -> __m512i {
8495    unsafe {
8496        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
8497        transmute(simd_select_bitmask(k, permute, src.as_i16x32()))
8498    }
8499}
8500
8501/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8502///
8503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi16&expand=4294)
8504#[inline]
8505#[target_feature(enable = "avx512bw")]
8506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8507#[cfg_attr(test, assert_instr(vpermw))]
8508pub fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i {
8509    unsafe {
8510        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
8511        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
8512    }
8513}
8514
8515/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8516///
8517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi16&expand=4292)
8518#[inline]
8519#[target_feature(enable = "avx512bw,avx512vl")]
8520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8521#[cfg_attr(test, assert_instr(vpermw))]
8522pub fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i {
8523    unsafe { transmute(vpermw256(a.as_i16x16(), idx.as_i16x16())) }
8524}
8525
8526/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8527///
8528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi16&expand=4290)
8529#[inline]
8530#[target_feature(enable = "avx512bw,avx512vl")]
8531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8532#[cfg_attr(test, assert_instr(vpermw))]
8533pub fn _mm256_mask_permutexvar_epi16(
8534    src: __m256i,
8535    k: __mmask16,
8536    idx: __m256i,
8537    a: __m256i,
8538) -> __m256i {
8539    unsafe {
8540        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
8541        transmute(simd_select_bitmask(k, permute, src.as_i16x16()))
8542    }
8543}
8544
8545/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8546///
8547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi16&expand=4291)
8548#[inline]
8549#[target_feature(enable = "avx512bw,avx512vl")]
8550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8551#[cfg_attr(test, assert_instr(vpermw))]
8552pub fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i {
8553    unsafe {
8554        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
8555        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
8556    }
8557}
8558
8559/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8560///
8561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi16&expand=4289)
8562#[inline]
8563#[target_feature(enable = "avx512bw,avx512vl")]
8564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8565#[cfg_attr(test, assert_instr(vpermw))]
8566pub fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i {
8567    unsafe { transmute(vpermw128(a.as_i16x8(), idx.as_i16x8())) }
8568}
8569
8570/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8571///
8572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi16&expand=4287)
8573#[inline]
8574#[target_feature(enable = "avx512bw,avx512vl")]
8575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8576#[cfg_attr(test, assert_instr(vpermw))]
8577pub fn _mm_mask_permutexvar_epi16(src: __m128i, k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
8578    unsafe {
8579        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
8580        transmute(simd_select_bitmask(k, permute, src.as_i16x8()))
8581    }
8582}
8583
8584/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8585///
8586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi16&expand=4288)
8587#[inline]
8588#[target_feature(enable = "avx512bw,avx512vl")]
8589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8590#[cfg_attr(test, assert_instr(vpermw))]
8591pub fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
8592    unsafe {
8593        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
8594        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
8595    }
8596}
8597
8598/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8599///
8600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi16&expand=430)
8601#[inline]
8602#[target_feature(enable = "avx512bw")]
8603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8604#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8605#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8606pub const fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8607    unsafe { transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32())) }
8608}
8609
8610/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8611///
8612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi16&expand=429)
8613#[inline]
8614#[target_feature(enable = "avx512bw,avx512vl")]
8615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8616#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8618pub const fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8619    unsafe { transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16())) }
8620}
8621
8622/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8623///
8624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi16&expand=427)
8625#[inline]
8626#[target_feature(enable = "avx512bw,avx512vl")]
8627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8628#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8629#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8630pub const fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8631    unsafe { transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8())) }
8632}
8633
8634/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8635///
8636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi8&expand=441)
8637#[inline]
8638#[target_feature(enable = "avx512bw")]
8639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8640#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8642pub const fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8643    unsafe { transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64())) }
8644}
8645
8646/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8647///
8648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi8&expand=440)
8649#[inline]
8650#[target_feature(enable = "avx512bw,avx512vl")]
8651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8652#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8653#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8654pub const fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8655    unsafe { transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32())) }
8656}
8657
8658/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8659///
8660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi8&expand=439)
8661#[inline]
8662#[target_feature(enable = "avx512bw,avx512vl")]
8663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8664#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8666pub const fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8667    unsafe { transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16())) }
8668}
8669
8670/// Broadcast the low packed 16-bit integer from a to all elements of dst.
8671///
8672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastw_epi16&expand=587)
8673#[inline]
8674#[target_feature(enable = "avx512bw")]
8675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8676#[cfg_attr(test, assert_instr(vpbroadcastw))]
8677#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8678pub const fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
8679    unsafe {
8680        let a = _mm512_castsi128_si512(a).as_i16x32();
8681        let ret: i16x32 = simd_shuffle!(
8682            a,
8683            a,
8684            [
8685                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8686                0, 0, 0, 0,
8687            ],
8688        );
8689        transmute(ret)
8690    }
8691}
8692
8693/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8694///
8695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastw_epi16&expand=588)
8696#[inline]
8697#[target_feature(enable = "avx512bw")]
8698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8699#[cfg_attr(test, assert_instr(vpbroadcastw))]
8700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8701pub const fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
8702    unsafe {
8703        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8704        transmute(simd_select_bitmask(k, broadcast, src.as_i16x32()))
8705    }
8706}
8707
8708/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8709///
8710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastw_epi16&expand=589)
8711#[inline]
8712#[target_feature(enable = "avx512bw")]
8713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8714#[cfg_attr(test, assert_instr(vpbroadcastw))]
8715#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8716pub const fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
8717    unsafe {
8718        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8719        transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO))
8720    }
8721}
8722
8723/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8724///
8725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastw_epi16&expand=585)
8726#[inline]
8727#[target_feature(enable = "avx512bw,avx512vl")]
8728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8729#[cfg_attr(test, assert_instr(vpbroadcastw))]
8730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8731pub const fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
8732    unsafe {
8733        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8734        transmute(simd_select_bitmask(k, broadcast, src.as_i16x16()))
8735    }
8736}
8737
8738/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8739///
8740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastw_epi16&expand=586)
8741#[inline]
8742#[target_feature(enable = "avx512bw,avx512vl")]
8743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8744#[cfg_attr(test, assert_instr(vpbroadcastw))]
8745#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8746pub const fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
8747    unsafe {
8748        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8749        transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO))
8750    }
8751}
8752
8753/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8754///
8755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastw_epi16&expand=582)
8756#[inline]
8757#[target_feature(enable = "avx512bw,avx512vl")]
8758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8759#[cfg_attr(test, assert_instr(vpbroadcastw))]
8760#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8761pub const fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8762    unsafe {
8763        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8764        transmute(simd_select_bitmask(k, broadcast, src.as_i16x8()))
8765    }
8766}
8767
8768/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8769///
8770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastw_epi16&expand=583)
8771#[inline]
8772#[target_feature(enable = "avx512bw,avx512vl")]
8773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8774#[cfg_attr(test, assert_instr(vpbroadcastw))]
8775#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8776pub const fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
8777    unsafe {
8778        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8779        transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO))
8780    }
8781}
8782
8783/// Broadcast the low packed 8-bit integer from a to all elements of dst.
8784///
8785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastb_epi8&expand=536)
8786#[inline]
8787#[target_feature(enable = "avx512bw")]
8788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8789#[cfg_attr(test, assert_instr(vpbroadcastb))]
8790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8791pub const fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
8792    unsafe {
8793        let a = _mm512_castsi128_si512(a).as_i8x64();
8794        let ret: i8x64 = simd_shuffle!(
8795            a,
8796            a,
8797            [
8798                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8799                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8800                0, 0, 0, 0, 0, 0, 0, 0,
8801            ],
8802        );
8803        transmute(ret)
8804    }
8805}
8806
8807/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8808///
8809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastb_epi8&expand=537)
8810#[inline]
8811#[target_feature(enable = "avx512bw")]
8812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8813#[cfg_attr(test, assert_instr(vpbroadcastb))]
8814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8815pub const fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
8816    unsafe {
8817        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8818        transmute(simd_select_bitmask(k, broadcast, src.as_i8x64()))
8819    }
8820}
8821
8822/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8823///
8824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastb_epi8&expand=538)
8825#[inline]
8826#[target_feature(enable = "avx512bw")]
8827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8828#[cfg_attr(test, assert_instr(vpbroadcastb))]
8829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8830pub const fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
8831    unsafe {
8832        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8833        transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO))
8834    }
8835}
8836
8837/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8838///
8839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastb_epi8&expand=534)
8840#[inline]
8841#[target_feature(enable = "avx512bw,avx512vl")]
8842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8843#[cfg_attr(test, assert_instr(vpbroadcastb))]
8844#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8845pub const fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i {
8846    unsafe {
8847        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8848        transmute(simd_select_bitmask(k, broadcast, src.as_i8x32()))
8849    }
8850}
8851
8852/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8853///
8854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastb_epi8&expand=535)
8855#[inline]
8856#[target_feature(enable = "avx512bw,avx512vl")]
8857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8858#[cfg_attr(test, assert_instr(vpbroadcastb))]
8859#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8860pub const fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
8861    unsafe {
8862        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8863        transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO))
8864    }
8865}
8866
8867/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8868///
8869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastb_epi8&expand=531)
8870#[inline]
8871#[target_feature(enable = "avx512bw,avx512vl")]
8872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8873#[cfg_attr(test, assert_instr(vpbroadcastb))]
8874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8875pub const fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
8876    unsafe {
8877        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8878        transmute(simd_select_bitmask(k, broadcast, src.as_i8x16()))
8879    }
8880}
8881
8882/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8883///
8884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastb_epi8&expand=532)
8885#[inline]
8886#[target_feature(enable = "avx512bw,avx512vl")]
8887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8888#[cfg_attr(test, assert_instr(vpbroadcastb))]
8889#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8890pub const fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
8891    unsafe {
8892        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8893        transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO))
8894    }
8895}
8896
8897/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
8898///
8899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi16&expand=6012)
8900#[inline]
8901#[target_feature(enable = "avx512bw")]
8902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8903#[cfg_attr(test, assert_instr(vpunpckhwd))]
8904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8905pub const fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
8906    unsafe {
8907        let a = a.as_i16x32();
8908        let b = b.as_i16x32();
8909        #[rustfmt::skip]
8910        let r: i16x32 = simd_shuffle!(
8911            a,
8912            b,
8913            [
8914                4, 32 + 4, 5, 32 + 5,
8915                6, 32 + 6, 7, 32 + 7,
8916                12, 32 + 12, 13, 32 + 13,
8917                14, 32 + 14, 15, 32 + 15,
8918                20, 32 + 20, 21, 32 + 21,
8919                22, 32 + 22, 23, 32 + 23,
8920                28, 32 + 28, 29, 32 + 29,
8921                30, 32 + 30, 31, 32 + 31,
8922            ],
8923        );
8924        transmute(r)
8925    }
8926}
8927
8928/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8929///
8930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi16&expand=6010)
8931#[inline]
8932#[target_feature(enable = "avx512bw")]
8933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8934#[cfg_attr(test, assert_instr(vpunpckhwd))]
8935#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8936pub const fn _mm512_mask_unpackhi_epi16(
8937    src: __m512i,
8938    k: __mmask32,
8939    a: __m512i,
8940    b: __m512i,
8941) -> __m512i {
8942    unsafe {
8943        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8944        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32()))
8945    }
8946}
8947
8948/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8949///
8950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi16&expand=6011)
8951#[inline]
8952#[target_feature(enable = "avx512bw")]
8953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8954#[cfg_attr(test, assert_instr(vpunpckhwd))]
8955#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8956pub const fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8957    unsafe {
8958        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8959        transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO))
8960    }
8961}
8962
8963/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8964///
8965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi16&expand=6007)
8966#[inline]
8967#[target_feature(enable = "avx512bw,avx512vl")]
8968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8969#[cfg_attr(test, assert_instr(vpunpckhwd))]
8970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8971pub const fn _mm256_mask_unpackhi_epi16(
8972    src: __m256i,
8973    k: __mmask16,
8974    a: __m256i,
8975    b: __m256i,
8976) -> __m256i {
8977    unsafe {
8978        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8979        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16()))
8980    }
8981}
8982
8983/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8984///
8985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi16&expand=6008)
8986#[inline]
8987#[target_feature(enable = "avx512bw,avx512vl")]
8988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8989#[cfg_attr(test, assert_instr(vpunpckhwd))]
8990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8991pub const fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8992    unsafe {
8993        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8994        transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO))
8995    }
8996}
8997
8998/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8999///
9000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi16&expand=6004)
9001#[inline]
9002#[target_feature(enable = "avx512bw,avx512vl")]
9003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9004#[cfg_attr(test, assert_instr(vpunpckhwd))]
9005#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9006pub const fn _mm_mask_unpackhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9007    unsafe {
9008        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
9009        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8()))
9010    }
9011}
9012
9013/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9014///
9015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi16&expand=6005)
9016#[inline]
9017#[target_feature(enable = "avx512bw,avx512vl")]
9018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9019#[cfg_attr(test, assert_instr(vpunpckhwd))]
9020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9021pub const fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9022    unsafe {
9023        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
9024        transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO))
9025    }
9026}
9027
9028/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
9029///
9030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi8&expand=6039)
9031#[inline]
9032#[target_feature(enable = "avx512bw")]
9033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9034#[cfg_attr(test, assert_instr(vpunpckhbw))]
9035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9036pub const fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
9037    unsafe {
9038        let a = a.as_i8x64();
9039        let b = b.as_i8x64();
9040        #[rustfmt::skip]
9041        let r: i8x64 = simd_shuffle!(
9042            a,
9043            b,
9044            [
9045                8, 64 + 8, 9, 64 + 9,
9046                10, 64 + 10, 11, 64 + 11,
9047                12, 64 + 12, 13, 64 + 13,
9048                14, 64 + 14, 15, 64 + 15,
9049                24, 64 + 24, 25, 64 + 25,
9050                26, 64 + 26, 27, 64 + 27,
9051                28, 64 + 28, 29, 64 + 29,
9052                30, 64 + 30, 31, 64 + 31,
9053                40, 64 + 40, 41, 64 + 41,
9054                42, 64 + 42, 43, 64 + 43,
9055                44, 64 + 44, 45, 64 + 45,
9056                46, 64 + 46, 47, 64 + 47,
9057                56, 64 + 56, 57, 64 + 57,
9058                58, 64 + 58, 59, 64 + 59,
9059                60, 64 + 60, 61, 64 + 61,
9060                62, 64 + 62, 63, 64 + 63,
9061            ],
9062        );
9063        transmute(r)
9064    }
9065}
9066
9067/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9068///
9069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi8&expand=6037)
9070#[inline]
9071#[target_feature(enable = "avx512bw")]
9072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9073#[cfg_attr(test, assert_instr(vpunpckhbw))]
9074#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9075pub const fn _mm512_mask_unpackhi_epi8(
9076    src: __m512i,
9077    k: __mmask64,
9078    a: __m512i,
9079    b: __m512i,
9080) -> __m512i {
9081    unsafe {
9082        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
9083        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64()))
9084    }
9085}
9086
9087/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9088///
9089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi8&expand=6038)
9090#[inline]
9091#[target_feature(enable = "avx512bw")]
9092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9093#[cfg_attr(test, assert_instr(vpunpckhbw))]
9094#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9095pub const fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9096    unsafe {
9097        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
9098        transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO))
9099    }
9100}
9101
9102/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9103///
9104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi8&expand=6034)
9105#[inline]
9106#[target_feature(enable = "avx512bw,avx512vl")]
9107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9108#[cfg_attr(test, assert_instr(vpunpckhbw))]
9109#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9110pub const fn _mm256_mask_unpackhi_epi8(
9111    src: __m256i,
9112    k: __mmask32,
9113    a: __m256i,
9114    b: __m256i,
9115) -> __m256i {
9116    unsafe {
9117        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
9118        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32()))
9119    }
9120}
9121
9122/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9123///
9124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi8&expand=6035)
9125#[inline]
9126#[target_feature(enable = "avx512bw,avx512vl")]
9127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9128#[cfg_attr(test, assert_instr(vpunpckhbw))]
9129#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9130pub const fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9131    unsafe {
9132        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
9133        transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO))
9134    }
9135}
9136
9137/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9138///
9139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi8&expand=6031)
9140#[inline]
9141#[target_feature(enable = "avx512bw,avx512vl")]
9142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9143#[cfg_attr(test, assert_instr(vpunpckhbw))]
9144#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9145pub const fn _mm_mask_unpackhi_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9146    unsafe {
9147        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
9148        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16()))
9149    }
9150}
9151
9152/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9153///
9154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi8&expand=6032)
9155#[inline]
9156#[target_feature(enable = "avx512bw,avx512vl")]
9157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9158#[cfg_attr(test, assert_instr(vpunpckhbw))]
9159#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9160pub const fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9161    unsafe {
9162        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
9163        transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO))
9164    }
9165}
9166
9167/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
9168///
9169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi16&expand=6069)
9170#[inline]
9171#[target_feature(enable = "avx512bw")]
9172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9173#[cfg_attr(test, assert_instr(vpunpcklwd))]
9174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9175pub const fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
9176    unsafe {
9177        let a = a.as_i16x32();
9178        let b = b.as_i16x32();
9179        #[rustfmt::skip]
9180        let r: i16x32 = simd_shuffle!(
9181            a,
9182            b,
9183            [
9184               0,  32+0,   1, 32+1,
9185               2,  32+2,   3, 32+3,
9186               8,  32+8,   9, 32+9,
9187               10, 32+10, 11, 32+11,
9188               16, 32+16, 17, 32+17,
9189               18, 32+18, 19, 32+19,
9190               24, 32+24, 25, 32+25,
9191               26, 32+26, 27, 32+27
9192            ],
9193        );
9194        transmute(r)
9195    }
9196}
9197
9198/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9199///
9200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi16&expand=6067)
9201#[inline]
9202#[target_feature(enable = "avx512bw")]
9203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9204#[cfg_attr(test, assert_instr(vpunpcklwd))]
9205#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9206pub const fn _mm512_mask_unpacklo_epi16(
9207    src: __m512i,
9208    k: __mmask32,
9209    a: __m512i,
9210    b: __m512i,
9211) -> __m512i {
9212    unsafe {
9213        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
9214        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32()))
9215    }
9216}
9217
9218/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9219///
9220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi16&expand=6068)
9221#[inline]
9222#[target_feature(enable = "avx512bw")]
9223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9224#[cfg_attr(test, assert_instr(vpunpcklwd))]
9225#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9226pub const fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
9227    unsafe {
9228        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
9229        transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO))
9230    }
9231}
9232
9233/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9234///
9235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi16&expand=6064)
9236#[inline]
9237#[target_feature(enable = "avx512bw,avx512vl")]
9238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9239#[cfg_attr(test, assert_instr(vpunpcklwd))]
9240#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9241pub const fn _mm256_mask_unpacklo_epi16(
9242    src: __m256i,
9243    k: __mmask16,
9244    a: __m256i,
9245    b: __m256i,
9246) -> __m256i {
9247    unsafe {
9248        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
9249        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16()))
9250    }
9251}
9252
9253/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9254///
9255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi16&expand=6065)
9256#[inline]
9257#[target_feature(enable = "avx512bw,avx512vl")]
9258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9259#[cfg_attr(test, assert_instr(vpunpcklwd))]
9260#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9261pub const fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
9262    unsafe {
9263        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
9264        transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO))
9265    }
9266}
9267
9268/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9269///
9270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi16&expand=6061)
9271#[inline]
9272#[target_feature(enable = "avx512bw,avx512vl")]
9273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9274#[cfg_attr(test, assert_instr(vpunpcklwd))]
9275#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9276pub const fn _mm_mask_unpacklo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9277    unsafe {
9278        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
9279        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8()))
9280    }
9281}
9282
9283/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9284///
9285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi16&expand=6062)
9286#[inline]
9287#[target_feature(enable = "avx512bw,avx512vl")]
9288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9289#[cfg_attr(test, assert_instr(vpunpcklwd))]
9290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9291pub const fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9292    unsafe {
9293        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
9294        transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO))
9295    }
9296}
9297
9298/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
9299///
9300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi8&expand=6096)
9301#[inline]
9302#[target_feature(enable = "avx512bw")]
9303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9304#[cfg_attr(test, assert_instr(vpunpcklbw))]
9305#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9306pub const fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
9307    unsafe {
9308        let a = a.as_i8x64();
9309        let b = b.as_i8x64();
9310        #[rustfmt::skip]
9311        let r: i8x64 = simd_shuffle!(
9312            a,
9313            b,
9314            [
9315                0,  64+0,   1, 64+1,
9316                2,  64+2,   3, 64+3,
9317                4,  64+4,   5, 64+5,
9318                6,  64+6,   7, 64+7,
9319                16, 64+16, 17, 64+17,
9320                18, 64+18, 19, 64+19,
9321                20, 64+20, 21, 64+21,
9322                22, 64+22, 23, 64+23,
9323                32, 64+32, 33, 64+33,
9324                34, 64+34, 35, 64+35,
9325                36, 64+36, 37, 64+37,
9326                38, 64+38, 39, 64+39,
9327                48, 64+48, 49, 64+49,
9328                50, 64+50, 51, 64+51,
9329                52, 64+52, 53, 64+53,
9330                54, 64+54, 55, 64+55,
9331            ],
9332        );
9333        transmute(r)
9334    }
9335}
9336
9337/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9338///
9339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi8&expand=6094)
9340#[inline]
9341#[target_feature(enable = "avx512bw")]
9342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9343#[cfg_attr(test, assert_instr(vpunpcklbw))]
9344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9345pub const fn _mm512_mask_unpacklo_epi8(
9346    src: __m512i,
9347    k: __mmask64,
9348    a: __m512i,
9349    b: __m512i,
9350) -> __m512i {
9351    unsafe {
9352        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
9353        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64()))
9354    }
9355}
9356
9357/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9358///
9359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi8&expand=6095)
9360#[inline]
9361#[target_feature(enable = "avx512bw")]
9362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9363#[cfg_attr(test, assert_instr(vpunpcklbw))]
9364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9365pub const fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9366    unsafe {
9367        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
9368        transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO))
9369    }
9370}
9371
9372/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9373///
9374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi8&expand=6091)
9375#[inline]
9376#[target_feature(enable = "avx512bw,avx512vl")]
9377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9378#[cfg_attr(test, assert_instr(vpunpcklbw))]
9379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9380pub const fn _mm256_mask_unpacklo_epi8(
9381    src: __m256i,
9382    k: __mmask32,
9383    a: __m256i,
9384    b: __m256i,
9385) -> __m256i {
9386    unsafe {
9387        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
9388        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32()))
9389    }
9390}
9391
9392/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9393///
9394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi8&expand=6092)
9395#[inline]
9396#[target_feature(enable = "avx512bw,avx512vl")]
9397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9398#[cfg_attr(test, assert_instr(vpunpcklbw))]
9399#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9400pub const fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9401    unsafe {
9402        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
9403        transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO))
9404    }
9405}
9406
9407/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9408///
9409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi8&expand=6088)
9410#[inline]
9411#[target_feature(enable = "avx512bw,avx512vl")]
9412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9413#[cfg_attr(test, assert_instr(vpunpcklbw))]
9414#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9415pub const fn _mm_mask_unpacklo_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9416    unsafe {
9417        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
9418        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16()))
9419    }
9420}
9421
9422/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9423///
9424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi8&expand=6089)
9425#[inline]
9426#[target_feature(enable = "avx512bw,avx512vl")]
9427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9428#[cfg_attr(test, assert_instr(vpunpcklbw))]
9429#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9430pub const fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9431    unsafe {
9432        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
9433        transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO))
9434    }
9435}
9436
9437/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9438///
9439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi16&expand=3795)
9440#[inline]
9441#[target_feature(enable = "avx512bw")]
9442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9443#[cfg_attr(test, assert_instr(vmovdqu16))]
9444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9445pub const fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
9446    unsafe {
9447        let mov = a.as_i16x32();
9448        transmute(simd_select_bitmask(k, mov, src.as_i16x32()))
9449    }
9450}
9451
9452/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9453///
9454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi16&expand=3796)
9455#[inline]
9456#[target_feature(enable = "avx512bw")]
9457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9458#[cfg_attr(test, assert_instr(vmovdqu16))]
9459#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9460pub const fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
9461    unsafe {
9462        let mov = a.as_i16x32();
9463        transmute(simd_select_bitmask(k, mov, i16x32::ZERO))
9464    }
9465}
9466
9467/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9468///
9469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi16&expand=3793)
9470#[inline]
9471#[target_feature(enable = "avx512bw,avx512vl")]
9472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9473#[cfg_attr(test, assert_instr(vmovdqu16))]
9474#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9475pub const fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
9476    unsafe {
9477        let mov = a.as_i16x16();
9478        transmute(simd_select_bitmask(k, mov, src.as_i16x16()))
9479    }
9480}
9481
9482/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9483///
9484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi16&expand=3794)
9485#[inline]
9486#[target_feature(enable = "avx512bw,avx512vl")]
9487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9488#[cfg_attr(test, assert_instr(vmovdqu16))]
9489#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9490pub const fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
9491    unsafe {
9492        let mov = a.as_i16x16();
9493        transmute(simd_select_bitmask(k, mov, i16x16::ZERO))
9494    }
9495}
9496
9497/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9498///
9499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi16&expand=3791)
9500#[inline]
9501#[target_feature(enable = "avx512bw,avx512vl")]
9502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9503#[cfg_attr(test, assert_instr(vmovdqu16))]
9504#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9505pub const fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9506    unsafe {
9507        let mov = a.as_i16x8();
9508        transmute(simd_select_bitmask(k, mov, src.as_i16x8()))
9509    }
9510}
9511
9512/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9513///
9514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi16&expand=3792)
9515#[inline]
9516#[target_feature(enable = "avx512bw,avx512vl")]
9517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9518#[cfg_attr(test, assert_instr(vmovdqu16))]
9519#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9520pub const fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
9521    unsafe {
9522        let mov = a.as_i16x8();
9523        transmute(simd_select_bitmask(k, mov, i16x8::ZERO))
9524    }
9525}
9526
9527/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9528///
9529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi8&expand=3813)
9530#[inline]
9531#[target_feature(enable = "avx512bw")]
9532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9533#[cfg_attr(test, assert_instr(vmovdqu8))]
9534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9535pub const fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
9536    unsafe {
9537        let mov = a.as_i8x64();
9538        transmute(simd_select_bitmask(k, mov, src.as_i8x64()))
9539    }
9540}
9541
9542/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9543///
9544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi8&expand=3814)
9545#[inline]
9546#[target_feature(enable = "avx512bw")]
9547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9548#[cfg_attr(test, assert_instr(vmovdqu8))]
9549#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9550pub const fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
9551    unsafe {
9552        let mov = a.as_i8x64();
9553        transmute(simd_select_bitmask(k, mov, i8x64::ZERO))
9554    }
9555}
9556
9557/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9558///
9559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi8&expand=3811)
9560#[inline]
9561#[target_feature(enable = "avx512bw,avx512vl")]
9562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9563#[cfg_attr(test, assert_instr(vmovdqu8))]
9564#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9565pub const fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
9566    unsafe {
9567        let mov = a.as_i8x32();
9568        transmute(simd_select_bitmask(k, mov, src.as_i8x32()))
9569    }
9570}
9571
9572/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9573///
9574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi8&expand=3812)
9575#[inline]
9576#[target_feature(enable = "avx512bw,avx512vl")]
9577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9578#[cfg_attr(test, assert_instr(vmovdqu8))]
9579#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9580pub const fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
9581    unsafe {
9582        let mov = a.as_i8x32();
9583        transmute(simd_select_bitmask(k, mov, i8x32::ZERO))
9584    }
9585}
9586
9587/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9588///
9589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi8&expand=3809)
9590#[inline]
9591#[target_feature(enable = "avx512bw,avx512vl")]
9592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9593#[cfg_attr(test, assert_instr(vmovdqu8))]
9594#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9595pub const fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
9596    unsafe {
9597        let mov = a.as_i8x16();
9598        transmute(simd_select_bitmask(k, mov, src.as_i8x16()))
9599    }
9600}
9601
9602/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9603///
9604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi8&expand=3810)
9605#[inline]
9606#[target_feature(enable = "avx512bw,avx512vl")]
9607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9608#[cfg_attr(test, assert_instr(vmovdqu8))]
9609#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9610pub const fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
9611    unsafe {
9612        let mov = a.as_i8x16();
9613        transmute(simd_select_bitmask(k, mov, i8x16::ZERO))
9614    }
9615}
9616
9617/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9618///
9619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi16&expand=4942)
9620#[inline]
9621#[target_feature(enable = "avx512bw")]
9622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9623#[cfg_attr(test, assert_instr(vpbroadcastw))]
9624#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9625pub const fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
9626    unsafe {
9627        let r = _mm512_set1_epi16(a).as_i16x32();
9628        transmute(simd_select_bitmask(k, r, src.as_i16x32()))
9629    }
9630}
9631
9632/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9633///
9634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi16&expand=4943)
9635#[inline]
9636#[target_feature(enable = "avx512bw")]
9637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9638#[cfg_attr(test, assert_instr(vpbroadcastw))]
9639#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9640pub const fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
9641    unsafe {
9642        let r = _mm512_set1_epi16(a).as_i16x32();
9643        transmute(simd_select_bitmask(k, r, i16x32::ZERO))
9644    }
9645}
9646
9647/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9648///
9649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi16&expand=4939)
9650#[inline]
9651#[target_feature(enable = "avx512bw,avx512vl")]
9652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9653#[cfg_attr(test, assert_instr(vpbroadcastw))]
9654#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9655pub const fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
9656    unsafe {
9657        let r = _mm256_set1_epi16(a).as_i16x16();
9658        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
9659    }
9660}
9661
9662/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9663///
9664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi16&expand=4940)
9665#[inline]
9666#[target_feature(enable = "avx512bw,avx512vl")]
9667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9668#[cfg_attr(test, assert_instr(vpbroadcastw))]
9669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9670pub const fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
9671    unsafe {
9672        let r = _mm256_set1_epi16(a).as_i16x16();
9673        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
9674    }
9675}
9676
9677/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9678///
9679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi16&expand=4936)
9680#[inline]
9681#[target_feature(enable = "avx512bw,avx512vl")]
9682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9683#[cfg_attr(test, assert_instr(vpbroadcastw))]
9684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9685pub const fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
9686    unsafe {
9687        let r = _mm_set1_epi16(a).as_i16x8();
9688        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
9689    }
9690}
9691
9692/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9693///
9694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi16&expand=4937)
9695#[inline]
9696#[target_feature(enable = "avx512bw,avx512vl")]
9697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9698#[cfg_attr(test, assert_instr(vpbroadcastw))]
9699#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9700pub const fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
9701    unsafe {
9702        let r = _mm_set1_epi16(a).as_i16x8();
9703        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
9704    }
9705}
9706
9707/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9708///
9709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi8&expand=4970)
9710#[inline]
9711#[target_feature(enable = "avx512bw")]
9712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9713#[cfg_attr(test, assert_instr(vpbroadcast))]
9714#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9715pub const fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
9716    unsafe {
9717        let r = _mm512_set1_epi8(a).as_i8x64();
9718        transmute(simd_select_bitmask(k, r, src.as_i8x64()))
9719    }
9720}
9721
9722/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9723///
9724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi8&expand=4971)
9725#[inline]
9726#[target_feature(enable = "avx512bw")]
9727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9728#[cfg_attr(test, assert_instr(vpbroadcast))]
9729#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9730pub const fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
9731    unsafe {
9732        let r = _mm512_set1_epi8(a).as_i8x64();
9733        transmute(simd_select_bitmask(k, r, i8x64::ZERO))
9734    }
9735}
9736
9737/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9738///
9739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi8&expand=4967)
9740#[inline]
9741#[target_feature(enable = "avx512bw,avx512vl")]
9742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9743#[cfg_attr(test, assert_instr(vpbroadcast))]
9744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9745pub const fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
9746    unsafe {
9747        let r = _mm256_set1_epi8(a).as_i8x32();
9748        transmute(simd_select_bitmask(k, r, src.as_i8x32()))
9749    }
9750}
9751
9752/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9753///
9754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi8&expand=4968)
9755#[inline]
9756#[target_feature(enable = "avx512bw,avx512vl")]
9757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9758#[cfg_attr(test, assert_instr(vpbroadcast))]
9759#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9760pub const fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
9761    unsafe {
9762        let r = _mm256_set1_epi8(a).as_i8x32();
9763        transmute(simd_select_bitmask(k, r, i8x32::ZERO))
9764    }
9765}
9766
9767/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9768///
9769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi8&expand=4964)
9770#[inline]
9771#[target_feature(enable = "avx512bw,avx512vl")]
9772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9773#[cfg_attr(test, assert_instr(vpbroadcast))]
9774#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9775pub const fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
9776    unsafe {
9777        let r = _mm_set1_epi8(a).as_i8x16();
9778        transmute(simd_select_bitmask(k, r, src.as_i8x16()))
9779    }
9780}
9781
9782/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9783///
9784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi8&expand=4965)
9785#[inline]
9786#[target_feature(enable = "avx512bw,avx512vl")]
9787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9788#[cfg_attr(test, assert_instr(vpbroadcast))]
9789#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9790pub const fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
9791    unsafe {
9792        let r = _mm_set1_epi8(a).as_i8x16();
9793        transmute(simd_select_bitmask(k, r, i8x16::ZERO))
9794    }
9795}
9796
9797/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst.
9798///
9799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflelo_epi16&expand=5221)
9800#[inline]
9801#[target_feature(enable = "avx512bw")]
9802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9803#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9804#[rustc_legacy_const_generics(1)]
9805#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9806pub const fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9807    unsafe {
9808        static_assert_uimm_bits!(IMM8, 8);
9809        let a = a.as_i16x32();
9810        let r: i16x32 = simd_shuffle!(
9811            a,
9812            a,
9813            [
9814                IMM8 as u32 & 0b11,
9815                (IMM8 as u32 >> 2) & 0b11,
9816                (IMM8 as u32 >> 4) & 0b11,
9817                (IMM8 as u32 >> 6) & 0b11,
9818                4,
9819                5,
9820                6,
9821                7,
9822                (IMM8 as u32 & 0b11) + 8,
9823                ((IMM8 as u32 >> 2) & 0b11) + 8,
9824                ((IMM8 as u32 >> 4) & 0b11) + 8,
9825                ((IMM8 as u32 >> 6) & 0b11) + 8,
9826                12,
9827                13,
9828                14,
9829                15,
9830                (IMM8 as u32 & 0b11) + 16,
9831                ((IMM8 as u32 >> 2) & 0b11) + 16,
9832                ((IMM8 as u32 >> 4) & 0b11) + 16,
9833                ((IMM8 as u32 >> 6) & 0b11) + 16,
9834                20,
9835                21,
9836                22,
9837                23,
9838                (IMM8 as u32 & 0b11) + 24,
9839                ((IMM8 as u32 >> 2) & 0b11) + 24,
9840                ((IMM8 as u32 >> 4) & 0b11) + 24,
9841                ((IMM8 as u32 >> 6) & 0b11) + 24,
9842                28,
9843                29,
9844                30,
9845                31,
9846            ],
9847        );
9848        transmute(r)
9849    }
9850}
9851
9852/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9853///
9854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflelo_epi16&expand=5219)
9855#[inline]
9856#[target_feature(enable = "avx512bw")]
9857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9858#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9859#[rustc_legacy_const_generics(3)]
9860#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9861pub const fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
9862    src: __m512i,
9863    k: __mmask32,
9864    a: __m512i,
9865) -> __m512i {
9866    unsafe {
9867        static_assert_uimm_bits!(IMM8, 8);
9868        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9869        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
9870    }
9871}
9872
9873/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9874///
9875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflelo_epi16&expand=5220)
9876#[inline]
9877#[target_feature(enable = "avx512bw")]
9878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9879#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9880#[rustc_legacy_const_generics(2)]
9881#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9882pub const fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
9883    unsafe {
9884        static_assert_uimm_bits!(IMM8, 8);
9885        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9886        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
9887    }
9888}
9889
9890/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9891///
9892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflelo_epi16&expand=5216)
9893#[inline]
9894#[target_feature(enable = "avx512bw,avx512vl")]
9895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9896#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9897#[rustc_legacy_const_generics(3)]
9898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9899pub const fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
9900    src: __m256i,
9901    k: __mmask16,
9902    a: __m256i,
9903) -> __m256i {
9904    unsafe {
9905        static_assert_uimm_bits!(IMM8, 8);
9906        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9907        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
9908    }
9909}
9910
9911/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9912///
9913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflelo_epi16&expand=5217)
9914#[inline]
9915#[target_feature(enable = "avx512bw,avx512vl")]
9916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9917#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9918#[rustc_legacy_const_generics(2)]
9919#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9920pub const fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
9921    unsafe {
9922        static_assert_uimm_bits!(IMM8, 8);
9923        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9924        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
9925    }
9926}
9927
9928/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9929///
9930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflelo_epi16&expand=5213)
9931#[inline]
9932#[target_feature(enable = "avx512bw,avx512vl")]
9933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9934#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9935#[rustc_legacy_const_generics(3)]
9936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9937pub const fn _mm_mask_shufflelo_epi16<const IMM8: i32>(
9938    src: __m128i,
9939    k: __mmask8,
9940    a: __m128i,
9941) -> __m128i {
9942    unsafe {
9943        static_assert_uimm_bits!(IMM8, 8);
9944        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9945        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
9946    }
9947}
9948
9949/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9950///
9951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflelo_epi16&expand=5214)
9952#[inline]
9953#[target_feature(enable = "avx512bw,avx512vl")]
9954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9955#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9956#[rustc_legacy_const_generics(2)]
9957#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9958pub const fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
9959    unsafe {
9960        static_assert_uimm_bits!(IMM8, 8);
9961        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9962        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
9963    }
9964}
9965
9966/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst.
9967///
9968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflehi_epi16&expand=5212)
9969#[inline]
9970#[target_feature(enable = "avx512bw")]
9971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9972#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9973#[rustc_legacy_const_generics(1)]
9974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9975pub const fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9976    unsafe {
9977        static_assert_uimm_bits!(IMM8, 8);
9978        let a = a.as_i16x32();
9979        let r: i16x32 = simd_shuffle!(
9980            a,
9981            a,
9982            [
9983                0,
9984                1,
9985                2,
9986                3,
9987                (IMM8 as u32 & 0b11) + 4,
9988                ((IMM8 as u32 >> 2) & 0b11) + 4,
9989                ((IMM8 as u32 >> 4) & 0b11) + 4,
9990                ((IMM8 as u32 >> 6) & 0b11) + 4,
9991                8,
9992                9,
9993                10,
9994                11,
9995                (IMM8 as u32 & 0b11) + 12,
9996                ((IMM8 as u32 >> 2) & 0b11) + 12,
9997                ((IMM8 as u32 >> 4) & 0b11) + 12,
9998                ((IMM8 as u32 >> 6) & 0b11) + 12,
9999                16,
10000                17,
10001                18,
10002                19,
10003                (IMM8 as u32 & 0b11) + 20,
10004                ((IMM8 as u32 >> 2) & 0b11) + 20,
10005                ((IMM8 as u32 >> 4) & 0b11) + 20,
10006                ((IMM8 as u32 >> 6) & 0b11) + 20,
10007                24,
10008                25,
10009                26,
10010                27,
10011                (IMM8 as u32 & 0b11) + 28,
10012                ((IMM8 as u32 >> 2) & 0b11) + 28,
10013                ((IMM8 as u32 >> 4) & 0b11) + 28,
10014                ((IMM8 as u32 >> 6) & 0b11) + 28,
10015            ],
10016        );
10017        transmute(r)
10018    }
10019}
10020
10021/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10022///
10023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflehi_epi16&expand=5210)
10024#[inline]
10025#[target_feature(enable = "avx512bw")]
10026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10027#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
10028#[rustc_legacy_const_generics(3)]
10029#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10030pub const fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
10031    src: __m512i,
10032    k: __mmask32,
10033    a: __m512i,
10034) -> __m512i {
10035    unsafe {
10036        static_assert_uimm_bits!(IMM8, 8);
10037        let r = _mm512_shufflehi_epi16::<IMM8>(a);
10038        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
10039    }
10040}
10041
10042/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10043///
10044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflehi_epi16&expand=5211)
10045#[inline]
10046#[target_feature(enable = "avx512bw")]
10047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10048#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
10049#[rustc_legacy_const_generics(2)]
10050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10051pub const fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
10052    unsafe {
10053        static_assert_uimm_bits!(IMM8, 8);
10054        let r = _mm512_shufflehi_epi16::<IMM8>(a);
10055        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
10056    }
10057}
10058
10059/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10060///
10061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflehi_epi16&expand=5207)
10062#[inline]
10063#[target_feature(enable = "avx512bw,avx512vl")]
10064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10065#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10066#[rustc_legacy_const_generics(3)]
10067#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10068pub const fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
10069    src: __m256i,
10070    k: __mmask16,
10071    a: __m256i,
10072) -> __m256i {
10073    unsafe {
10074        static_assert_uimm_bits!(IMM8, 8);
10075        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
10076        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
10077    }
10078}
10079
10080/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10081///
10082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflehi_epi16&expand=5208)
10083#[inline]
10084#[target_feature(enable = "avx512bw,avx512vl")]
10085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10086#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10087#[rustc_legacy_const_generics(2)]
10088#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10089pub const fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
10090    unsafe {
10091        static_assert_uimm_bits!(IMM8, 8);
10092        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
10093        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
10094    }
10095}
10096
10097/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10098///
10099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflehi_epi16&expand=5204)
10100#[inline]
10101#[target_feature(enable = "avx512bw,avx512vl")]
10102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10103#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10104#[rustc_legacy_const_generics(3)]
10105#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10106pub const fn _mm_mask_shufflehi_epi16<const IMM8: i32>(
10107    src: __m128i,
10108    k: __mmask8,
10109    a: __m128i,
10110) -> __m128i {
10111    unsafe {
10112        static_assert_uimm_bits!(IMM8, 8);
10113        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
10114        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
10115    }
10116}
10117
10118/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10119///
10120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflehi_epi16&expand=5205)
10121#[inline]
10122#[target_feature(enable = "avx512bw,avx512vl")]
10123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10124#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10125#[rustc_legacy_const_generics(2)]
10126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10127pub const fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
10128    unsafe {
10129        static_assert_uimm_bits!(IMM8, 8);
10130        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
10131        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
10132    }
10133}
10134
10135/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst.
10136///
10137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi8&expand=5159)
10138#[inline]
10139#[target_feature(enable = "avx512bw")]
10140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10141#[cfg_attr(test, assert_instr(vpshufb))]
10142pub fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i {
10143    unsafe { transmute(vpshufb(a.as_i8x64(), b.as_i8x64())) }
10144}
10145
10146/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10147///
10148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi8&expand=5157)
10149#[inline]
10150#[target_feature(enable = "avx512bw")]
10151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10152#[cfg_attr(test, assert_instr(vpshufb))]
10153pub fn _mm512_mask_shuffle_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
10154    unsafe {
10155        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
10156        transmute(simd_select_bitmask(k, shuffle, src.as_i8x64()))
10157    }
10158}
10159
10160/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10161///
10162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi8&expand=5158)
10163#[inline]
10164#[target_feature(enable = "avx512bw")]
10165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10166#[cfg_attr(test, assert_instr(vpshufb))]
10167pub fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
10168    unsafe {
10169        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
10170        transmute(simd_select_bitmask(k, shuffle, i8x64::ZERO))
10171    }
10172}
10173
10174/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10175///
10176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi8&expand=5154)
10177#[inline]
10178#[target_feature(enable = "avx512bw,avx512vl")]
10179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10180#[cfg_attr(test, assert_instr(vpshufb))]
10181pub fn _mm256_mask_shuffle_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
10182    unsafe {
10183        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
10184        transmute(simd_select_bitmask(k, shuffle, src.as_i8x32()))
10185    }
10186}
10187
10188/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10189///
10190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi8&expand=5155)
10191#[inline]
10192#[target_feature(enable = "avx512bw,avx512vl")]
10193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10194#[cfg_attr(test, assert_instr(vpshufb))]
10195pub fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
10196    unsafe {
10197        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
10198        transmute(simd_select_bitmask(k, shuffle, i8x32::ZERO))
10199    }
10200}
10201
10202/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10203///
10204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi8&expand=5151)
10205#[inline]
10206#[target_feature(enable = "avx512bw,avx512vl")]
10207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10208#[cfg_attr(test, assert_instr(vpshufb))]
10209pub fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
10210    unsafe {
10211        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
10212        transmute(simd_select_bitmask(k, shuffle, src.as_i8x16()))
10213    }
10214}
10215
10216/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10217///
10218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi8&expand=5152)
10219#[inline]
10220#[target_feature(enable = "avx512bw,avx512vl")]
10221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10222#[cfg_attr(test, assert_instr(vpshufb))]
10223pub fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
10224    unsafe {
10225        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
10226        transmute(simd_select_bitmask(k, shuffle, i8x16::ZERO))
10227    }
10228}
10229
10230/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10231///
10232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi16_mask&expand=5884)
10233#[inline]
10234#[target_feature(enable = "avx512bw")]
10235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10236#[cfg_attr(test, assert_instr(vptestmw))]
10237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10238pub const fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
10239    let and = _mm512_and_si512(a, b);
10240    let zero = _mm512_setzero_si512();
10241    _mm512_cmpneq_epi16_mask(and, zero)
10242}
10243
10244/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10245///
10246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi16_mask&expand=5883)
10247#[inline]
10248#[target_feature(enable = "avx512bw")]
10249#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10250#[cfg_attr(test, assert_instr(vptestmw))]
10251#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10252pub const fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
10253    let and = _mm512_and_si512(a, b);
10254    let zero = _mm512_setzero_si512();
10255    _mm512_mask_cmpneq_epi16_mask(k, and, zero)
10256}
10257
10258/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10259///
10260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi16_mask&expand=5882)
10261#[inline]
10262#[target_feature(enable = "avx512bw,avx512vl")]
10263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10264#[cfg_attr(test, assert_instr(vptestmw))]
10265#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10266pub const fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
10267    let and = _mm256_and_si256(a, b);
10268    let zero = _mm256_setzero_si256();
10269    _mm256_cmpneq_epi16_mask(and, zero)
10270}
10271
10272/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10273///
10274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi16_mask&expand=5881)
10275#[inline]
10276#[target_feature(enable = "avx512bw,avx512vl")]
10277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10278#[cfg_attr(test, assert_instr(vptestmw))]
10279#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10280pub const fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
10281    let and = _mm256_and_si256(a, b);
10282    let zero = _mm256_setzero_si256();
10283    _mm256_mask_cmpneq_epi16_mask(k, and, zero)
10284}
10285
10286/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10287///
10288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi16_mask&expand=5880)
10289#[inline]
10290#[target_feature(enable = "avx512bw,avx512vl")]
10291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10292#[cfg_attr(test, assert_instr(vptestmw))]
10293#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10294pub const fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
10295    let and = _mm_and_si128(a, b);
10296    let zero = _mm_setzero_si128();
10297    _mm_cmpneq_epi16_mask(and, zero)
10298}
10299
10300/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10301///
10302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi16_mask&expand=5879)
10303#[inline]
10304#[target_feature(enable = "avx512bw,avx512vl")]
10305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10306#[cfg_attr(test, assert_instr(vptestmw))]
10307#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10308pub const fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
10309    let and = _mm_and_si128(a, b);
10310    let zero = _mm_setzero_si128();
10311    _mm_mask_cmpneq_epi16_mask(k, and, zero)
10312}
10313
10314/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10315///
10316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi8_mask&expand=5902)
10317#[inline]
10318#[target_feature(enable = "avx512bw")]
10319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10320#[cfg_attr(test, assert_instr(vptestmb))]
10321#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10322pub const fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
10323    let and = _mm512_and_si512(a, b);
10324    let zero = _mm512_setzero_si512();
10325    _mm512_cmpneq_epi8_mask(and, zero)
10326}
10327
10328/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10329///
10330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi8_mask&expand=5901)
10331#[inline]
10332#[target_feature(enable = "avx512bw")]
10333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10334#[cfg_attr(test, assert_instr(vptestmb))]
10335#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10336pub const fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
10337    let and = _mm512_and_si512(a, b);
10338    let zero = _mm512_setzero_si512();
10339    _mm512_mask_cmpneq_epi8_mask(k, and, zero)
10340}
10341
10342/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10343///
10344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi8_mask&expand=5900)
10345#[inline]
10346#[target_feature(enable = "avx512bw,avx512vl")]
10347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10348#[cfg_attr(test, assert_instr(vptestmb))]
10349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10350pub const fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
10351    let and = _mm256_and_si256(a, b);
10352    let zero = _mm256_setzero_si256();
10353    _mm256_cmpneq_epi8_mask(and, zero)
10354}
10355
10356/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10357///
10358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi8_mask&expand=5899)
10359#[inline]
10360#[target_feature(enable = "avx512bw,avx512vl")]
10361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10362#[cfg_attr(test, assert_instr(vptestmb))]
10363#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10364pub const fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
10365    let and = _mm256_and_si256(a, b);
10366    let zero = _mm256_setzero_si256();
10367    _mm256_mask_cmpneq_epi8_mask(k, and, zero)
10368}
10369
10370/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10371///
10372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi8_mask&expand=5898)
10373#[inline]
10374#[target_feature(enable = "avx512bw,avx512vl")]
10375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10376#[cfg_attr(test, assert_instr(vptestmb))]
10377#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10378pub const fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
10379    let and = _mm_and_si128(a, b);
10380    let zero = _mm_setzero_si128();
10381    _mm_cmpneq_epi8_mask(and, zero)
10382}
10383
10384/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10385///
10386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi8_mask&expand=5897)
10387#[inline]
10388#[target_feature(enable = "avx512bw,avx512vl")]
10389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10390#[cfg_attr(test, assert_instr(vptestmb))]
10391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10392pub const fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
10393    let and = _mm_and_si128(a, b);
10394    let zero = _mm_setzero_si128();
10395    _mm_mask_cmpneq_epi8_mask(k, and, zero)
10396}
10397
10398/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10399///
10400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi16_mask&expand=5915)
10401#[inline]
10402#[target_feature(enable = "avx512bw")]
10403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10404#[cfg_attr(test, assert_instr(vptestnmw))]
10405#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10406pub const fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
10407    let and = _mm512_and_si512(a, b);
10408    let zero = _mm512_setzero_si512();
10409    _mm512_cmpeq_epi16_mask(and, zero)
10410}
10411
10412/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10413///
10414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi16_mask&expand=5914)
10415#[inline]
10416#[target_feature(enable = "avx512bw")]
10417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10418#[cfg_attr(test, assert_instr(vptestnmw))]
10419#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10420pub const fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
10421    let and = _mm512_and_si512(a, b);
10422    let zero = _mm512_setzero_si512();
10423    _mm512_mask_cmpeq_epi16_mask(k, and, zero)
10424}
10425
10426/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10427///
10428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi16_mask&expand=5913)
10429#[inline]
10430#[target_feature(enable = "avx512bw,avx512vl")]
10431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10432#[cfg_attr(test, assert_instr(vptestnmw))]
10433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10434pub const fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
10435    let and = _mm256_and_si256(a, b);
10436    let zero = _mm256_setzero_si256();
10437    _mm256_cmpeq_epi16_mask(and, zero)
10438}
10439
10440/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10441///
10442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi16_mask&expand=5912)
10443#[inline]
10444#[target_feature(enable = "avx512bw,avx512vl")]
10445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10446#[cfg_attr(test, assert_instr(vptestnmw))]
10447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10448pub const fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
10449    let and = _mm256_and_si256(a, b);
10450    let zero = _mm256_setzero_si256();
10451    _mm256_mask_cmpeq_epi16_mask(k, and, zero)
10452}
10453
10454/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10455///
10456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi16_mask&expand=5911)
10457#[inline]
10458#[target_feature(enable = "avx512bw,avx512vl")]
10459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10460#[cfg_attr(test, assert_instr(vptestnmw))]
10461#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10462pub const fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
10463    let and = _mm_and_si128(a, b);
10464    let zero = _mm_setzero_si128();
10465    _mm_cmpeq_epi16_mask(and, zero)
10466}
10467
10468/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10469///
10470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi16_mask&expand=5910)
10471#[inline]
10472#[target_feature(enable = "avx512bw,avx512vl")]
10473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10474#[cfg_attr(test, assert_instr(vptestnmw))]
10475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10476pub const fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
10477    let and = _mm_and_si128(a, b);
10478    let zero = _mm_setzero_si128();
10479    _mm_mask_cmpeq_epi16_mask(k, and, zero)
10480}
10481
10482/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10483///
10484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi8_mask&expand=5933)
10485#[inline]
10486#[target_feature(enable = "avx512bw")]
10487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10488#[cfg_attr(test, assert_instr(vptestnmb))]
10489#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10490pub const fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
10491    let and = _mm512_and_si512(a, b);
10492    let zero = _mm512_setzero_si512();
10493    _mm512_cmpeq_epi8_mask(and, zero)
10494}
10495
10496/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10497///
10498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi8_mask&expand=5932)
10499#[inline]
10500#[target_feature(enable = "avx512bw")]
10501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10502#[cfg_attr(test, assert_instr(vptestnmb))]
10503#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10504pub const fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
10505    let and = _mm512_and_si512(a, b);
10506    let zero = _mm512_setzero_si512();
10507    _mm512_mask_cmpeq_epi8_mask(k, and, zero)
10508}
10509
10510/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10511///
10512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi8_mask&expand=5931)
10513#[inline]
10514#[target_feature(enable = "avx512bw,avx512vl")]
10515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10516#[cfg_attr(test, assert_instr(vptestnmb))]
10517#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10518pub const fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
10519    let and = _mm256_and_si256(a, b);
10520    let zero = _mm256_setzero_si256();
10521    _mm256_cmpeq_epi8_mask(and, zero)
10522}
10523
10524/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10525///
10526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi8_mask&expand=5930)
10527#[inline]
10528#[target_feature(enable = "avx512bw,avx512vl")]
10529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10530#[cfg_attr(test, assert_instr(vptestnmb))]
10531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10532pub const fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
10533    let and = _mm256_and_si256(a, b);
10534    let zero = _mm256_setzero_si256();
10535    _mm256_mask_cmpeq_epi8_mask(k, and, zero)
10536}
10537
10538/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10539///
10540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi8_mask&expand=5929)
10541#[inline]
10542#[target_feature(enable = "avx512bw,avx512vl")]
10543#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10544#[cfg_attr(test, assert_instr(vptestnmb))]
10545#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10546pub const fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
10547    let and = _mm_and_si128(a, b);
10548    let zero = _mm_setzero_si128();
10549    _mm_cmpeq_epi8_mask(and, zero)
10550}
10551
10552/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10553///
10554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi8_mask&expand=5928)
10555#[inline]
10556#[target_feature(enable = "avx512bw,avx512vl")]
10557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10558#[cfg_attr(test, assert_instr(vptestnmb))]
10559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10560pub const fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
10561    let and = _mm_and_si128(a, b);
10562    let zero = _mm_setzero_si128();
10563    _mm_mask_cmpeq_epi8_mask(k, and, zero)
10564}
10565
10566/// Store 64-bit mask from a into memory.
10567///
10568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask64&expand=5578)
10569#[inline]
10570#[target_feature(enable = "avx512bw")]
10571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10572#[cfg_attr(test, assert_instr(mov))] //should be kmovq
10573#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10574pub const unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
10575    ptr::write(mem_addr as *mut __mmask64, a);
10576}
10577
10578/// Store 32-bit mask from a into memory.
10579///
10580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask32&expand=5577)
10581#[inline]
10582#[target_feature(enable = "avx512bw")]
10583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10584#[cfg_attr(test, assert_instr(mov))] //should be kmovd
10585#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10586pub const unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
10587    ptr::write(mem_addr as *mut __mmask32, a);
10588}
10589
10590/// Load 64-bit mask from memory into k.
10591///
10592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask64&expand=3318)
10593#[inline]
10594#[target_feature(enable = "avx512bw")]
10595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10596#[cfg_attr(test, assert_instr(mov))] //should be kmovq
10597#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10598pub const unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
10599    ptr::read(mem_addr as *const __mmask64)
10600}
10601
10602/// Load 32-bit mask from memory into k.
10603///
10604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask32&expand=3317)
10605#[inline]
10606#[target_feature(enable = "avx512bw")]
10607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10608#[cfg_attr(test, assert_instr(mov))] //should be kmovd
10609#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10610pub const unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
10611    ptr::read(mem_addr as *const __mmask32)
10612}
10613
10614/// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst.
10615///
10616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sad_epu8&expand=4855)
10617#[inline]
10618#[target_feature(enable = "avx512bw")]
10619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10620#[cfg_attr(test, assert_instr(vpsadbw))]
10621pub fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i {
10622    unsafe { transmute(vpsadbw(a.as_u8x64(), b.as_u8x64())) }
10623}
10624
10625/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10626///
10627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dbsad_epu8&expand=2114)
10628#[inline]
10629#[target_feature(enable = "avx512bw")]
10630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10631#[rustc_legacy_const_generics(2)]
10632#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10633pub fn _mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
10634    unsafe {
10635        static_assert_uimm_bits!(IMM8, 8);
10636        let a = a.as_u8x64();
10637        let b = b.as_u8x64();
10638        let r = vdbpsadbw(a, b, IMM8);
10639        transmute(r)
10640    }
10641}
10642
10643/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10644///
10645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dbsad_epu8&expand=2115)
10646#[inline]
10647#[target_feature(enable = "avx512bw")]
10648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10649#[rustc_legacy_const_generics(4)]
10650#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10651pub fn _mm512_mask_dbsad_epu8<const IMM8: i32>(
10652    src: __m512i,
10653    k: __mmask32,
10654    a: __m512i,
10655    b: __m512i,
10656) -> __m512i {
10657    unsafe {
10658        static_assert_uimm_bits!(IMM8, 8);
10659        let a = a.as_u8x64();
10660        let b = b.as_u8x64();
10661        let r = vdbpsadbw(a, b, IMM8);
10662        transmute(simd_select_bitmask(k, r, src.as_u16x32()))
10663    }
10664}
10665
10666/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10667///
10668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dbsad_epu8&expand=2116)
10669#[inline]
10670#[target_feature(enable = "avx512bw")]
10671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10672#[rustc_legacy_const_generics(3)]
10673#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10674pub fn _mm512_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
10675    unsafe {
10676        static_assert_uimm_bits!(IMM8, 8);
10677        let a = a.as_u8x64();
10678        let b = b.as_u8x64();
10679        let r = vdbpsadbw(a, b, IMM8);
10680        transmute(simd_select_bitmask(k, r, u16x32::ZERO))
10681    }
10682}
10683
10684/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10685///
10686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dbsad_epu8&expand=2111)
10687#[inline]
10688#[target_feature(enable = "avx512bw,avx512vl")]
10689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10690#[rustc_legacy_const_generics(2)]
10691#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10692pub fn _mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
10693    unsafe {
10694        static_assert_uimm_bits!(IMM8, 8);
10695        let a = a.as_u8x32();
10696        let b = b.as_u8x32();
10697        let r = vdbpsadbw256(a, b, IMM8);
10698        transmute(r)
10699    }
10700}
10701
10702/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10703///
10704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dbsad_epu8&expand=2112)
10705#[inline]
10706#[target_feature(enable = "avx512bw,avx512vl")]
10707#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10708#[rustc_legacy_const_generics(4)]
10709#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10710pub fn _mm256_mask_dbsad_epu8<const IMM8: i32>(
10711    src: __m256i,
10712    k: __mmask16,
10713    a: __m256i,
10714    b: __m256i,
10715) -> __m256i {
10716    unsafe {
10717        static_assert_uimm_bits!(IMM8, 8);
10718        let a = a.as_u8x32();
10719        let b = b.as_u8x32();
10720        let r = vdbpsadbw256(a, b, IMM8);
10721        transmute(simd_select_bitmask(k, r, src.as_u16x16()))
10722    }
10723}
10724
10725/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10726///
10727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dbsad_epu8&expand=2113)
10728#[inline]
10729#[target_feature(enable = "avx512bw,avx512vl")]
10730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10731#[rustc_legacy_const_generics(3)]
10732#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10733pub fn _mm256_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
10734    unsafe {
10735        static_assert_uimm_bits!(IMM8, 8);
10736        let a = a.as_u8x32();
10737        let b = b.as_u8x32();
10738        let r = vdbpsadbw256(a, b, IMM8);
10739        transmute(simd_select_bitmask(k, r, u16x16::ZERO))
10740    }
10741}
10742
10743/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10744///
10745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dbsad_epu8&expand=2108)
10746#[inline]
10747#[target_feature(enable = "avx512bw,avx512vl")]
10748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10749#[rustc_legacy_const_generics(2)]
10750#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10751pub fn _mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
10752    unsafe {
10753        static_assert_uimm_bits!(IMM8, 8);
10754        let a = a.as_u8x16();
10755        let b = b.as_u8x16();
10756        let r = vdbpsadbw128(a, b, IMM8);
10757        transmute(r)
10758    }
10759}
10760
10761/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10762///
10763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dbsad_epu8&expand=2109)
10764#[inline]
10765#[target_feature(enable = "avx512bw,avx512vl")]
10766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10767#[rustc_legacy_const_generics(4)]
10768#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10769pub fn _mm_mask_dbsad_epu8<const IMM8: i32>(
10770    src: __m128i,
10771    k: __mmask8,
10772    a: __m128i,
10773    b: __m128i,
10774) -> __m128i {
10775    unsafe {
10776        static_assert_uimm_bits!(IMM8, 8);
10777        let a = a.as_u8x16();
10778        let b = b.as_u8x16();
10779        let r = vdbpsadbw128(a, b, IMM8);
10780        transmute(simd_select_bitmask(k, r, src.as_u16x8()))
10781    }
10782}
10783
10784/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10785///
10786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dbsad_epu8&expand=2110)
10787#[inline]
10788#[target_feature(enable = "avx512bw,avx512vl")]
10789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10790#[rustc_legacy_const_generics(3)]
10791#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10792pub fn _mm_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
10793    unsafe {
10794        static_assert_uimm_bits!(IMM8, 8);
10795        let a = a.as_u8x16();
10796        let b = b.as_u8x16();
10797        let r = vdbpsadbw128(a, b, IMM8);
10798        transmute(simd_select_bitmask(k, r, u16x8::ZERO))
10799    }
10800}
10801
10802/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10803///
10804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi16_mask&expand=3873)
10805#[inline]
10806#[target_feature(enable = "avx512bw")]
10807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10808#[cfg_attr(test, assert_instr(vpmovw2m))]
10809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10810pub const fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
10811    let filter = _mm512_set1_epi16(1 << 15);
10812    let a = _mm512_and_si512(a, filter);
10813    _mm512_cmpeq_epi16_mask(a, filter)
10814}
10815
10816/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10817///
10818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi16_mask&expand=3872)
10819#[inline]
10820#[target_feature(enable = "avx512bw,avx512vl")]
10821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10822#[cfg_attr(test, assert_instr(vpmovw2m))]
10823#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10824pub const fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
10825    let filter = _mm256_set1_epi16(1 << 15);
10826    let a = _mm256_and_si256(a, filter);
10827    _mm256_cmpeq_epi16_mask(a, filter)
10828}
10829
10830/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10831///
10832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi16_mask&expand=3871)
10833#[inline]
10834#[target_feature(enable = "avx512bw,avx512vl")]
10835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10836#[cfg_attr(test, assert_instr(vpmovw2m))]
10837#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10838pub const fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
10839    let filter = _mm_set1_epi16(1 << 15);
10840    let a = _mm_and_si128(a, filter);
10841    _mm_cmpeq_epi16_mask(a, filter)
10842}
10843
10844/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10845///
10846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi8_mask&expand=3883)
10847#[inline]
10848#[target_feature(enable = "avx512bw")]
10849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10850#[cfg_attr(test, assert_instr(vpmovb2m))]
10851#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10852pub const fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
10853    let filter = _mm512_set1_epi8(1 << 7);
10854    let a = _mm512_and_si512(a, filter);
10855    _mm512_cmpeq_epi8_mask(a, filter)
10856}
10857
10858/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10859///
10860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi8_mask&expand=3882)
10861#[inline]
10862#[target_feature(enable = "avx512bw,avx512vl")]
10863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10864#[cfg_attr(test, assert_instr(vpmovmskb))]
10865// should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10866// using vpmovb2m plus converting the mask register to a standard register.
10867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10868pub const fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
10869    let filter = _mm256_set1_epi8(1 << 7);
10870    let a = _mm256_and_si256(a, filter);
10871    _mm256_cmpeq_epi8_mask(a, filter)
10872}
10873
10874/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10875///
10876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi8_mask&expand=3881)
10877#[inline]
10878#[target_feature(enable = "avx512bw,avx512vl")]
10879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10880#[cfg_attr(test, assert_instr(vpmovmskb))]
10881// should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10882// using vpmovb2m plus converting the mask register to a standard register.
10883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10884pub const fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
10885    let filter = _mm_set1_epi8(1 << 7);
10886    let a = _mm_and_si128(a, filter);
10887    _mm_cmpeq_epi8_mask(a, filter)
10888}
10889
10890/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10891///
10892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi16&expand=3886)
10893#[inline]
10894#[target_feature(enable = "avx512bw")]
10895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10896#[cfg_attr(test, assert_instr(vpmovm2w))]
10897#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10898pub const fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
10899    unsafe {
10900        let one = _mm512_set1_epi16(
10901            1 << 15
10902                | 1 << 14
10903                | 1 << 13
10904                | 1 << 12
10905                | 1 << 11
10906                | 1 << 10
10907                | 1 << 9
10908                | 1 << 8
10909                | 1 << 7
10910                | 1 << 6
10911                | 1 << 5
10912                | 1 << 4
10913                | 1 << 3
10914                | 1 << 2
10915                | 1 << 1
10916                | 1 << 0,
10917        )
10918        .as_i16x32();
10919        transmute(simd_select_bitmask(k, one, i16x32::ZERO))
10920    }
10921}
10922
10923/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10924///
10925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi16&expand=3885)
10926#[inline]
10927#[target_feature(enable = "avx512bw,avx512vl")]
10928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10929#[cfg_attr(test, assert_instr(vpmovm2w))]
10930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10931pub const fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
10932    unsafe {
10933        let one = _mm256_set1_epi16(
10934            1 << 15
10935                | 1 << 14
10936                | 1 << 13
10937                | 1 << 12
10938                | 1 << 11
10939                | 1 << 10
10940                | 1 << 9
10941                | 1 << 8
10942                | 1 << 7
10943                | 1 << 6
10944                | 1 << 5
10945                | 1 << 4
10946                | 1 << 3
10947                | 1 << 2
10948                | 1 << 1
10949                | 1 << 0,
10950        )
10951        .as_i16x16();
10952        transmute(simd_select_bitmask(k, one, i16x16::ZERO))
10953    }
10954}
10955
10956/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10957///
10958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi16&expand=3884)
10959#[inline]
10960#[target_feature(enable = "avx512bw,avx512vl")]
10961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10962#[cfg_attr(test, assert_instr(vpmovm2w))]
10963#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10964pub const fn _mm_movm_epi16(k: __mmask8) -> __m128i {
10965    unsafe {
10966        let one = _mm_set1_epi16(
10967            1 << 15
10968                | 1 << 14
10969                | 1 << 13
10970                | 1 << 12
10971                | 1 << 11
10972                | 1 << 10
10973                | 1 << 9
10974                | 1 << 8
10975                | 1 << 7
10976                | 1 << 6
10977                | 1 << 5
10978                | 1 << 4
10979                | 1 << 3
10980                | 1 << 2
10981                | 1 << 1
10982                | 1 << 0,
10983        )
10984        .as_i16x8();
10985        transmute(simd_select_bitmask(k, one, i16x8::ZERO))
10986    }
10987}
10988
10989/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10990///
10991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi8&expand=3895)
10992#[inline]
10993#[target_feature(enable = "avx512bw")]
10994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10995#[cfg_attr(test, assert_instr(vpmovm2b))]
10996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10997pub const fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
10998    unsafe {
10999        let one =
11000            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
11001                .as_i8x64();
11002        transmute(simd_select_bitmask(k, one, i8x64::ZERO))
11003    }
11004}
11005
11006/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
11007///
11008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi8&expand=3894)
11009#[inline]
11010#[target_feature(enable = "avx512bw,avx512vl")]
11011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11012#[cfg_attr(test, assert_instr(vpmovm2b))]
11013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11014pub const fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
11015    unsafe {
11016        let one =
11017            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
11018                .as_i8x32();
11019        transmute(simd_select_bitmask(k, one, i8x32::ZERO))
11020    }
11021}
11022
11023/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
11024///
11025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi8&expand=3893)
11026#[inline]
11027#[target_feature(enable = "avx512bw,avx512vl")]
11028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11029#[cfg_attr(test, assert_instr(vpmovm2b))]
11030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11031pub const fn _mm_movm_epi8(k: __mmask16) -> __m128i {
11032    unsafe {
11033        let one =
11034            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
11035                .as_i8x16();
11036        transmute(simd_select_bitmask(k, one, i8x16::ZERO))
11037    }
11038}
11039
11040/// Convert 32-bit mask a into an integer value, and store the result in dst.
11041///
11042/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_cvtmask32_u32)
11043#[inline]
11044#[target_feature(enable = "avx512bw")]
11045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11047pub const fn _cvtmask32_u32(a: __mmask32) -> u32 {
11048    a
11049}
11050
11051/// Convert integer value a into an 32-bit mask, and store the result in k.
11052///
11053/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask32)
11054#[inline]
11055#[target_feature(enable = "avx512bw")]
11056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11057#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11058pub const fn _cvtu32_mask32(a: u32) -> __mmask32 {
11059    a
11060}
11061
11062/// Add 32-bit masks in a and b, and store the result in k.
11063///
11064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask32&expand=3207)
11065#[inline]
11066#[target_feature(enable = "avx512bw")]
11067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11068#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11069pub const fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11070    a.wrapping_add(b)
11071}
11072
11073/// Add 64-bit masks in a and b, and store the result in k.
11074///
11075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask64&expand=3208)
11076#[inline]
11077#[target_feature(enable = "avx512bw")]
11078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11079#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11080pub const fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11081    a.wrapping_add(b)
11082}
11083
11084/// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
11085///
11086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask32&expand=3213)
11087#[inline]
11088#[target_feature(enable = "avx512bw")]
11089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11091pub const fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11092    a & b
11093}
11094
11095/// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
11096///
11097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask64&expand=3214)
11098#[inline]
11099#[target_feature(enable = "avx512bw")]
11100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11101#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11102pub const fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11103    a & b
11104}
11105
11106/// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
11107///
11108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask32&expand=3234)
11109#[inline]
11110#[target_feature(enable = "avx512bw")]
11111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11112#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11113pub const fn _knot_mask32(a: __mmask32) -> __mmask32 {
11114    !a
11115}
11116
11117/// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
11118///
11119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask64&expand=3235)
11120#[inline]
11121#[target_feature(enable = "avx512bw")]
11122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11123#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11124pub const fn _knot_mask64(a: __mmask64) -> __mmask64 {
11125    !a
11126}
11127
11128/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
11129///
11130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask32&expand=3219)
11131#[inline]
11132#[target_feature(enable = "avx512bw")]
11133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11134#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11135pub const fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11136    _knot_mask32(a) & b
11137}
11138
11139/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
11140///
11141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask64&expand=3220)
11142#[inline]
11143#[target_feature(enable = "avx512bw")]
11144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11145#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11146pub const fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11147    _knot_mask64(a) & b
11148}
11149
11150/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
11151///
11152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask32&expand=3240)
11153#[inline]
11154#[target_feature(enable = "avx512bw")]
11155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11157pub const fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11158    a | b
11159}
11160
11161/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
11162///
11163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask64&expand=3241)
11164#[inline]
11165#[target_feature(enable = "avx512bw")]
11166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11167#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11168pub const fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11169    a | b
11170}
11171
11172/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
11173///
11174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask32&expand=3292)
11175#[inline]
11176#[target_feature(enable = "avx512bw")]
11177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11178#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11179pub const fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11180    a ^ b
11181}
11182
11183/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
11184///
11185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask64&expand=3293)
11186#[inline]
11187#[target_feature(enable = "avx512bw")]
11188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11189#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11190pub const fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11191    a ^ b
11192}
11193
11194/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
11195///
11196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask32&expand=3286)
11197#[inline]
11198#[target_feature(enable = "avx512bw")]
11199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11200#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11201pub const fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11202    _knot_mask32(a ^ b)
11203}
11204
11205/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
11206///
11207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask64&expand=3287)
11208#[inline]
11209#[target_feature(enable = "avx512bw")]
11210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11211#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11212pub const fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11213    _knot_mask64(a ^ b)
11214}
11215
11216/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11217/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
11218///
11219/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask32_u8)
11220#[inline]
11221#[target_feature(enable = "avx512bw")]
11222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11223#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11224pub const unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8) -> u8 {
11225    let tmp = _kor_mask32(a, b);
11226    *all_ones = (tmp == 0xffffffff) as u8;
11227    (tmp == 0) as u8
11228}
11229
11230/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11231/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
11232///
11233/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask64_u8)
11234#[inline]
11235#[target_feature(enable = "avx512bw")]
11236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11238pub const unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) -> u8 {
11239    let tmp = _kor_mask64(a, b);
11240    *all_ones = (tmp == 0xffffffff_ffffffff) as u8;
11241    (tmp == 0) as u8
11242}
11243
11244/// Compute the bitwise OR of 32-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
11245/// store 0 in dst.
11246///
11247/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask32_u8)
11248#[inline]
11249#[target_feature(enable = "avx512bw")]
11250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11251#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11252pub const fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11253    (_kor_mask32(a, b) == 0xffffffff) as u8
11254}
11255
11256/// Compute the bitwise OR of 64-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
11257/// store 0 in dst.
11258///
11259/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask64_u8)
11260#[inline]
11261#[target_feature(enable = "avx512bw")]
11262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11263#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11264pub const fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11265    (_kor_mask64(a, b) == 0xffffffff_ffffffff) as u8
11266}
11267
11268/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11269/// store 0 in dst.
11270///
11271/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask32_u8)
11272#[inline]
11273#[target_feature(enable = "avx512bw")]
11274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11275#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11276pub const fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11277    (_kor_mask32(a, b) == 0) as u8
11278}
11279
11280/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11281/// store 0 in dst.
11282///
11283/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask64_u8)
11284#[inline]
11285#[target_feature(enable = "avx512bw")]
11286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11287#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11288pub const fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11289    (_kor_mask64(a, b) == 0) as u8
11290}
11291
11292/// Shift the bits of 32-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11293///
11294/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask32)
11295#[inline]
11296#[target_feature(enable = "avx512bw")]
11297#[rustc_legacy_const_generics(1)]
11298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11299#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11300pub const fn _kshiftli_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
11301    a.unbounded_shl(COUNT)
11302}
11303
11304/// Shift the bits of 64-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11305///
11306/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask64)
11307#[inline]
11308#[target_feature(enable = "avx512bw")]
11309#[rustc_legacy_const_generics(1)]
11310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11312pub const fn _kshiftli_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
11313    a.unbounded_shl(COUNT)
11314}
11315
11316/// Shift the bits of 32-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11317///
11318/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask32)
11319#[inline]
11320#[target_feature(enable = "avx512bw")]
11321#[rustc_legacy_const_generics(1)]
11322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11323#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11324pub const fn _kshiftri_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
11325    a.unbounded_shr(COUNT)
11326}
11327
11328/// Shift the bits of 64-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11329///
11330/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask64)
11331#[inline]
11332#[target_feature(enable = "avx512bw")]
11333#[rustc_legacy_const_generics(1)]
11334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11335#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11336pub const fn _kshiftri_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
11337    a.unbounded_shr(COUNT)
11338}
11339
11340/// Compute the bitwise AND of 32-bit masks a and b, and if the result is all zeros, store 1 in dst,
11341/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
11342/// zeros, store 1 in and_not, otherwise store 0 in and_not.
11343///
11344/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask32_u8)
11345#[inline]
11346#[target_feature(enable = "avx512bw")]
11347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11348#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11349pub const unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) -> u8 {
11350    *and_not = (_kandn_mask32(a, b) == 0) as u8;
11351    (_kand_mask32(a, b) == 0) as u8
11352}
11353
11354/// Compute the bitwise AND of 64-bit masks a and b, and if the result is all zeros, store 1 in dst,
11355/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
11356/// zeros, store 1 in and_not, otherwise store 0 in and_not.
11357///
11358/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask64_u8)
11359#[inline]
11360#[target_feature(enable = "avx512bw")]
11361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11362#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11363pub const unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> u8 {
11364    *and_not = (_kandn_mask64(a, b) == 0) as u8;
11365    (_kand_mask64(a, b) == 0) as u8
11366}
11367
11368/// Compute the bitwise NOT of 32-bit mask a and then AND with 16-bit mask b, if the result is all
11369/// zeros, store 1 in dst, otherwise store 0 in dst.
11370///
11371/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask32_u8)
11372#[inline]
11373#[target_feature(enable = "avx512bw")]
11374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11375#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11376pub const fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11377    (_kandn_mask32(a, b) == 0) as u8
11378}
11379
11380/// Compute the bitwise NOT of 64-bit mask a and then AND with 8-bit mask b, if the result is all
11381/// zeros, store 1 in dst, otherwise store 0 in dst.
11382///
11383/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask64_u8)
11384#[inline]
11385#[target_feature(enable = "avx512bw")]
11386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11387#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11388pub const fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11389    (_kandn_mask64(a, b) == 0) as u8
11390}
11391
11392/// Compute the bitwise AND of 32-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
11393/// store 0 in dst.
11394///
11395/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask32_u8)
11396#[inline]
11397#[target_feature(enable = "avx512bw")]
11398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11399#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11400pub const fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11401    (_kand_mask32(a, b) == 0) as u8
11402}
11403
11404/// Compute the bitwise AND of 64-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
11405/// store 0 in dst.
11406///
11407/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask64_u8)
11408#[inline]
11409#[target_feature(enable = "avx512bw")]
11410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11411#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11412pub const fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11413    (_kand_mask64(a, b) == 0) as u8
11414}
11415
11416/// Unpack and interleave 16 bits from masks a and b, and store the 32-bit result in k.
11417///
11418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackw)
11419#[inline]
11420#[target_feature(enable = "avx512bw")]
11421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11422#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckwd
11423#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11424pub const fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 {
11425    ((a & 0xffff) << 16) | (b & 0xffff)
11426}
11427
11428/// Unpack and interleave 32 bits from masks a and b, and store the 64-bit result in k.
11429///
11430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackd)
11431#[inline]
11432#[target_feature(enable = "avx512bw")]
11433#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11434#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckdq
11435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11436pub const fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 {
11437    ((a & 0xffffffff) << 32) | (b & 0xffffffff)
11438}
11439
11440/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
11441///
11442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi8&expand=1407)
11443#[inline]
11444#[target_feature(enable = "avx512bw")]
11445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11446#[cfg_attr(test, assert_instr(vpmovwb))]
11447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11448pub const fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
11449    unsafe {
11450        let a = a.as_i16x32();
11451        transmute::<i8x32, _>(simd_cast(a))
11452    }
11453}
11454
11455/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11456///
11457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi8&expand=1408)
11458#[inline]
11459#[target_feature(enable = "avx512bw")]
11460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11461#[cfg_attr(test, assert_instr(vpmovwb))]
11462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11463pub const fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
11464    unsafe {
11465        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
11466        transmute(simd_select_bitmask(k, convert, src.as_i8x32()))
11467    }
11468}
11469
11470/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11471///
11472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi8&expand=1409)
11473#[inline]
11474#[target_feature(enable = "avx512bw")]
11475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11476#[cfg_attr(test, assert_instr(vpmovwb))]
11477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11478pub const fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
11479    unsafe {
11480        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
11481        transmute(simd_select_bitmask(k, convert, i8x32::ZERO))
11482    }
11483}
11484
11485/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
11486///
11487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi8&expand=1404)
11488#[inline]
11489#[target_feature(enable = "avx512bw,avx512vl")]
11490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11491#[cfg_attr(test, assert_instr(vpmovwb))]
11492#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11493pub const fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
11494    unsafe {
11495        let a = a.as_i16x16();
11496        transmute::<i8x16, _>(simd_cast(a))
11497    }
11498}
11499
11500/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11501///
11502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi8&expand=1405)
11503#[inline]
11504#[target_feature(enable = "avx512bw,avx512vl")]
11505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11506#[cfg_attr(test, assert_instr(vpmovwb))]
11507#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11508pub const fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
11509    unsafe {
11510        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
11511        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
11512    }
11513}
11514
11515/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11516///
11517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi8&expand=1406)
11518#[inline]
11519#[target_feature(enable = "avx512bw,avx512vl")]
11520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11521#[cfg_attr(test, assert_instr(vpmovwb))]
11522#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11523pub const fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
11524    unsafe {
11525        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
11526        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
11527    }
11528}
11529
11530/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
11531///
11532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi8&expand=1401)
11533#[inline]
11534#[target_feature(enable = "avx512bw,avx512vl")]
11535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11536#[cfg_attr(test, assert_instr(vpmovwb))]
11537#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11538pub const fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
11539    unsafe {
11540        let a = a.as_i16x8();
11541        let v256: i16x16 = simd_shuffle!(
11542            a,
11543            i16x8::ZERO,
11544            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
11545        );
11546        transmute::<i8x16, _>(simd_cast(v256))
11547    }
11548}
11549
11550/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11551///
11552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi8&expand=1402)
11553#[inline]
11554#[target_feature(enable = "avx512bw,avx512vl")]
11555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11556#[cfg_attr(test, assert_instr(vpmovwb))]
11557#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11558pub const fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11559    unsafe {
11560        let a = _mm_cvtepi16_epi8(a).as_i8x16();
11561        let src = simd_shuffle!(
11562            src.as_i8x16(),
11563            i8x16::ZERO,
11564            [0, 1, 2, 3, 4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16]
11565        );
11566        simd_select_bitmask(k as u16, a, src).as_m128i()
11567    }
11568}
11569
11570/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11571///
11572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi8&expand=1403)
11573#[inline]
11574#[target_feature(enable = "avx512bw,avx512vl")]
11575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11576#[cfg_attr(test, assert_instr(vpmovwb))]
11577#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11578pub const fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
11579    _mm_mask_cvtepi16_epi8(_mm_setzero_si128(), k, a)
11580}
11581
11582/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi16_epi8&expand=1807)
11585#[inline]
11586#[target_feature(enable = "avx512bw")]
11587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11588#[cfg_attr(test, assert_instr(vpmovswb))]
11589#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11590pub const fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
11591    unsafe {
11592        simd_cast::<_, i8x32>(simd_imax(
11593            simd_imin(a.as_i16x32(), i16x32::splat(i8::MAX as _)),
11594            i16x32::splat(i8::MIN as _),
11595        ))
11596        .as_m256i()
11597    }
11598}
11599
11600/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11601///
11602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_epi8&expand=1808)
11603#[inline]
11604#[target_feature(enable = "avx512bw")]
11605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11606#[cfg_attr(test, assert_instr(vpmovswb))]
11607#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11608pub const fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
11609    unsafe {
11610        simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), src.as_i8x32()).as_m256i()
11611    }
11612}
11613
11614/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11615///
11616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi16_epi8&expand=1809)
11617#[inline]
11618#[target_feature(enable = "avx512bw")]
11619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11620#[cfg_attr(test, assert_instr(vpmovswb))]
11621#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11622pub const fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
11623    unsafe { simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), i8x32::ZERO).as_m256i() }
11624}
11625
11626/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
11627///
11628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi16_epi8&expand=1804)
11629#[inline]
11630#[target_feature(enable = "avx512bw,avx512vl")]
11631#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11632#[cfg_attr(test, assert_instr(vpmovswb))]
11633#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11634pub const fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
11635    unsafe {
11636        simd_cast::<_, i8x16>(simd_imax(
11637            simd_imin(a.as_i16x16(), i16x16::splat(i8::MAX as _)),
11638            i16x16::splat(i8::MIN as _),
11639        ))
11640        .as_m128i()
11641    }
11642}
11643
11644/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11645///
11646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_epi8&expand=1805)
11647#[inline]
11648#[target_feature(enable = "avx512bw,avx512vl")]
11649#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11650#[cfg_attr(test, assert_instr(vpmovswb))]
11651#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11652pub const fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
11653    unsafe {
11654        simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), src.as_i8x16()).as_m128i()
11655    }
11656}
11657
11658/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11659///
11660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi16_epi8&expand=1806)
11661#[inline]
11662#[target_feature(enable = "avx512bw,avx512vl")]
11663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11664#[cfg_attr(test, assert_instr(vpmovswb))]
11665#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11666pub const fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
11667    unsafe { simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), i8x16::ZERO).as_m128i() }
11668}
11669
11670/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
11671///
11672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi16_epi8&expand=1801)
11673#[inline]
11674#[target_feature(enable = "avx512bw,avx512vl")]
11675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11676#[cfg_attr(test, assert_instr(vpmovswb))]
11677pub fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i {
11678    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, 0b11111111)) }
11679}
11680
11681/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11682///
11683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_epi8&expand=1802)
11684#[inline]
11685#[target_feature(enable = "avx512bw,avx512vl")]
11686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11687#[cfg_attr(test, assert_instr(vpmovswb))]
11688pub fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11689    unsafe { transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k)) }
11690}
11691
11692/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11693///
11694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi16_epi8&expand=1803)
11695#[inline]
11696#[target_feature(enable = "avx512bw,avx512vl")]
11697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11698#[cfg_attr(test, assert_instr(vpmovswb))]
11699pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
11700    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, k)) }
11701}
11702
11703/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
11704///
11705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi16_epi8&expand=2042)
11706#[inline]
11707#[target_feature(enable = "avx512bw")]
11708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11709#[cfg_attr(test, assert_instr(vpmovuswb))]
11710#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11711pub const fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
11712    unsafe {
11713        simd_cast::<_, u8x32>(simd_imin(a.as_u16x32(), u16x32::splat(u8::MAX as _))).as_m256i()
11714    }
11715}
11716
11717/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11718///
11719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_epi8&expand=2043)
11720#[inline]
11721#[target_feature(enable = "avx512bw")]
11722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11723#[cfg_attr(test, assert_instr(vpmovuswb))]
11724#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11725pub const fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
11726    unsafe {
11727        simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), src.as_u8x32()).as_m256i()
11728    }
11729}
11730
11731/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11732///
11733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi16_epi8&expand=2044)
11734#[inline]
11735#[target_feature(enable = "avx512bw")]
11736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11737#[cfg_attr(test, assert_instr(vpmovuswb))]
11738#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11739pub const fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
11740    unsafe { simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), u8x32::ZERO).as_m256i() }
11741}
11742
11743/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
11744///
11745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi16_epi8&expand=2039)
11746#[inline]
11747#[target_feature(enable = "avx512bw,avx512vl")]
11748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11749#[cfg_attr(test, assert_instr(vpmovuswb))]
11750#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11751pub const fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
11752    unsafe {
11753        simd_cast::<_, u8x16>(simd_imin(a.as_u16x16(), u16x16::splat(u8::MAX as _))).as_m128i()
11754    }
11755}
11756
11757/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11758///
11759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_epi8&expand=2040)
11760#[inline]
11761#[target_feature(enable = "avx512bw,avx512vl")]
11762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11763#[cfg_attr(test, assert_instr(vpmovuswb))]
11764#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11765pub const fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
11766    unsafe {
11767        simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), src.as_u8x16()).as_m128i()
11768    }
11769}
11770
11771/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11772///
11773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi16_epi8&expand=2041)
11774#[inline]
11775#[target_feature(enable = "avx512bw,avx512vl")]
11776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11777#[cfg_attr(test, assert_instr(vpmovuswb))]
11778#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11779pub const fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
11780    unsafe { simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), u8x16::ZERO).as_m128i() }
11781}
11782
11783/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
11784///
11785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi16_epi8&expand=2036)
11786#[inline]
11787#[target_feature(enable = "avx512bw,avx512vl")]
11788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11789#[cfg_attr(test, assert_instr(vpmovuswb))]
11790pub fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i {
11791    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, 0b11111111)) }
11792}
11793
11794/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11795///
11796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_epi8&expand=2037)
11797#[inline]
11798#[target_feature(enable = "avx512bw,avx512vl")]
11799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11800#[cfg_attr(test, assert_instr(vpmovuswb))]
11801pub fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11802    unsafe { transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k)) }
11803}
11804
11805/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11806///
11807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi16_epi8&expand=2038)
11808#[inline]
11809#[target_feature(enable = "avx512bw,avx512vl")]
11810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11811#[cfg_attr(test, assert_instr(vpmovuswb))]
11812pub fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
11813    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, k)) }
11814}
11815
11816/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst.
11817///
11818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi16&expand=1526)
11819#[inline]
11820#[target_feature(enable = "avx512bw")]
11821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11822#[cfg_attr(test, assert_instr(vpmovsxbw))]
11823#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11824pub const fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
11825    unsafe {
11826        let a = a.as_i8x32();
11827        transmute::<i16x32, _>(simd_cast(a))
11828    }
11829}
11830
11831/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11832///
11833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi16&expand=1527)
11834#[inline]
11835#[target_feature(enable = "avx512bw")]
11836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11837#[cfg_attr(test, assert_instr(vpmovsxbw))]
11838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11839pub const fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
11840    unsafe {
11841        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
11842        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
11843    }
11844}
11845
11846/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11847///
11848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi16&expand=1528)
11849#[inline]
11850#[target_feature(enable = "avx512bw")]
11851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11852#[cfg_attr(test, assert_instr(vpmovsxbw))]
11853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11854pub const fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
11855    unsafe {
11856        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
11857        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
11858    }
11859}
11860
11861/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11862///
11863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi16&expand=1524)
11864#[inline]
11865#[target_feature(enable = "avx512bw,avx512vl")]
11866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11867#[cfg_attr(test, assert_instr(vpmovsxbw))]
11868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11869pub const fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
11870    unsafe {
11871        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
11872        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11873    }
11874}
11875
11876/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11877///
11878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi16&expand=1525)
11879#[inline]
11880#[target_feature(enable = "avx512bw,avx512vl")]
11881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11882#[cfg_attr(test, assert_instr(vpmovsxbw))]
11883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11884pub const fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
11885    unsafe {
11886        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
11887        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
11888    }
11889}
11890
11891/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11892///
11893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi16&expand=1521)
11894#[inline]
11895#[target_feature(enable = "avx512bw,avx512vl")]
11896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11897#[cfg_attr(test, assert_instr(vpmovsxbw))]
11898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11899pub const fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11900    unsafe {
11901        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11902        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
11903    }
11904}
11905
11906/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11907///
11908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi16&expand=1522)
11909#[inline]
11910#[target_feature(enable = "avx512bw,avx512vl")]
11911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11912#[cfg_attr(test, assert_instr(vpmovsxbw))]
11913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11914pub const fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
11915    unsafe {
11916        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11917        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
11918    }
11919}
11920
11921/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst.
11922///
11923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi16&expand=1612)
11924#[inline]
11925#[target_feature(enable = "avx512bw")]
11926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11927#[cfg_attr(test, assert_instr(vpmovzxbw))]
11928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11929pub const fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
11930    unsafe {
11931        let a = a.as_u8x32();
11932        transmute::<i16x32, _>(simd_cast(a))
11933    }
11934}
11935
11936/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11937///
11938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi16&expand=1613)
11939#[inline]
11940#[target_feature(enable = "avx512bw")]
11941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11942#[cfg_attr(test, assert_instr(vpmovzxbw))]
11943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11944pub const fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
11945    unsafe {
11946        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11947        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
11948    }
11949}
11950
11951/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11952///
11953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi16&expand=1614)
11954#[inline]
11955#[target_feature(enable = "avx512bw")]
11956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11957#[cfg_attr(test, assert_instr(vpmovzxbw))]
11958#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11959pub const fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
11960    unsafe {
11961        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11962        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
11963    }
11964}
11965
11966/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11967///
11968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi16&expand=1610)
11969#[inline]
11970#[target_feature(enable = "avx512bw,avx512vl")]
11971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11972#[cfg_attr(test, assert_instr(vpmovzxbw))]
11973#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11974pub const fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
11975    unsafe {
11976        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11977        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11978    }
11979}
11980
11981/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11982///
11983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi16&expand=1611)
11984#[inline]
11985#[target_feature(enable = "avx512bw,avx512vl")]
11986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11987#[cfg_attr(test, assert_instr(vpmovzxbw))]
11988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11989pub const fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
11990    unsafe {
11991        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11992        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
11993    }
11994}
11995
11996/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11997///
11998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi16&expand=1607)
11999#[inline]
12000#[target_feature(enable = "avx512bw,avx512vl")]
12001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12002#[cfg_attr(test, assert_instr(vpmovzxbw))]
12003#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12004pub const fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12005    unsafe {
12006        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
12007        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
12008    }
12009}
12010
12011/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12012///
12013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi16&expand=1608)
12014#[inline]
12015#[target_feature(enable = "avx512bw,avx512vl")]
12016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12017#[cfg_attr(test, assert_instr(vpmovzxbw))]
12018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12019pub const fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
12020    unsafe {
12021        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
12022        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
12023    }
12024}
12025
12026/// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst.
12027///
12028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bslli_epi128&expand=591)
12029#[inline]
12030#[target_feature(enable = "avx512bw")]
12031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12032#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
12033#[rustc_legacy_const_generics(1)]
12034#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12035pub const fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
12036    unsafe {
12037        static_assert_uimm_bits!(IMM8, 8);
12038        const fn mask(shift: i32, i: u32) -> u32 {
12039            let shift = shift as u32 & 0xff;
12040            if shift > 15 || i % 16 < shift {
12041                0
12042            } else {
12043                64 + (i - shift)
12044            }
12045        }
12046        let a = a.as_i8x64();
12047        let zero = i8x64::ZERO;
12048        let r: i8x64 = simd_shuffle!(
12049            zero,
12050            a,
12051            [
12052                mask(IMM8, 0),
12053                mask(IMM8, 1),
12054                mask(IMM8, 2),
12055                mask(IMM8, 3),
12056                mask(IMM8, 4),
12057                mask(IMM8, 5),
12058                mask(IMM8, 6),
12059                mask(IMM8, 7),
12060                mask(IMM8, 8),
12061                mask(IMM8, 9),
12062                mask(IMM8, 10),
12063                mask(IMM8, 11),
12064                mask(IMM8, 12),
12065                mask(IMM8, 13),
12066                mask(IMM8, 14),
12067                mask(IMM8, 15),
12068                mask(IMM8, 16),
12069                mask(IMM8, 17),
12070                mask(IMM8, 18),
12071                mask(IMM8, 19),
12072                mask(IMM8, 20),
12073                mask(IMM8, 21),
12074                mask(IMM8, 22),
12075                mask(IMM8, 23),
12076                mask(IMM8, 24),
12077                mask(IMM8, 25),
12078                mask(IMM8, 26),
12079                mask(IMM8, 27),
12080                mask(IMM8, 28),
12081                mask(IMM8, 29),
12082                mask(IMM8, 30),
12083                mask(IMM8, 31),
12084                mask(IMM8, 32),
12085                mask(IMM8, 33),
12086                mask(IMM8, 34),
12087                mask(IMM8, 35),
12088                mask(IMM8, 36),
12089                mask(IMM8, 37),
12090                mask(IMM8, 38),
12091                mask(IMM8, 39),
12092                mask(IMM8, 40),
12093                mask(IMM8, 41),
12094                mask(IMM8, 42),
12095                mask(IMM8, 43),
12096                mask(IMM8, 44),
12097                mask(IMM8, 45),
12098                mask(IMM8, 46),
12099                mask(IMM8, 47),
12100                mask(IMM8, 48),
12101                mask(IMM8, 49),
12102                mask(IMM8, 50),
12103                mask(IMM8, 51),
12104                mask(IMM8, 52),
12105                mask(IMM8, 53),
12106                mask(IMM8, 54),
12107                mask(IMM8, 55),
12108                mask(IMM8, 56),
12109                mask(IMM8, 57),
12110                mask(IMM8, 58),
12111                mask(IMM8, 59),
12112                mask(IMM8, 60),
12113                mask(IMM8, 61),
12114                mask(IMM8, 62),
12115                mask(IMM8, 63),
12116            ],
12117        );
12118        transmute(r)
12119    }
12120}
12121
12122/// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst.
12123///
12124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bsrli_epi128&expand=594)
12125#[inline]
12126#[target_feature(enable = "avx512bw")]
12127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12128#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
12129#[rustc_legacy_const_generics(1)]
12130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12131pub const fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
12132    unsafe {
12133        static_assert_uimm_bits!(IMM8, 8);
12134        const fn mask(shift: i32, i: u32) -> u32 {
12135            let shift = shift as u32 & 0xff;
12136            if shift > 15 || (15 - (i % 16)) < shift {
12137                0
12138            } else {
12139                64 + (i + shift)
12140            }
12141        }
12142        let a = a.as_i8x64();
12143        let zero = i8x64::ZERO;
12144        let r: i8x64 = simd_shuffle!(
12145            zero,
12146            a,
12147            [
12148                mask(IMM8, 0),
12149                mask(IMM8, 1),
12150                mask(IMM8, 2),
12151                mask(IMM8, 3),
12152                mask(IMM8, 4),
12153                mask(IMM8, 5),
12154                mask(IMM8, 6),
12155                mask(IMM8, 7),
12156                mask(IMM8, 8),
12157                mask(IMM8, 9),
12158                mask(IMM8, 10),
12159                mask(IMM8, 11),
12160                mask(IMM8, 12),
12161                mask(IMM8, 13),
12162                mask(IMM8, 14),
12163                mask(IMM8, 15),
12164                mask(IMM8, 16),
12165                mask(IMM8, 17),
12166                mask(IMM8, 18),
12167                mask(IMM8, 19),
12168                mask(IMM8, 20),
12169                mask(IMM8, 21),
12170                mask(IMM8, 22),
12171                mask(IMM8, 23),
12172                mask(IMM8, 24),
12173                mask(IMM8, 25),
12174                mask(IMM8, 26),
12175                mask(IMM8, 27),
12176                mask(IMM8, 28),
12177                mask(IMM8, 29),
12178                mask(IMM8, 30),
12179                mask(IMM8, 31),
12180                mask(IMM8, 32),
12181                mask(IMM8, 33),
12182                mask(IMM8, 34),
12183                mask(IMM8, 35),
12184                mask(IMM8, 36),
12185                mask(IMM8, 37),
12186                mask(IMM8, 38),
12187                mask(IMM8, 39),
12188                mask(IMM8, 40),
12189                mask(IMM8, 41),
12190                mask(IMM8, 42),
12191                mask(IMM8, 43),
12192                mask(IMM8, 44),
12193                mask(IMM8, 45),
12194                mask(IMM8, 46),
12195                mask(IMM8, 47),
12196                mask(IMM8, 48),
12197                mask(IMM8, 49),
12198                mask(IMM8, 50),
12199                mask(IMM8, 51),
12200                mask(IMM8, 52),
12201                mask(IMM8, 53),
12202                mask(IMM8, 54),
12203                mask(IMM8, 55),
12204                mask(IMM8, 56),
12205                mask(IMM8, 57),
12206                mask(IMM8, 58),
12207                mask(IMM8, 59),
12208                mask(IMM8, 60),
12209                mask(IMM8, 61),
12210                mask(IMM8, 62),
12211                mask(IMM8, 63),
12212            ],
12213        );
12214        transmute(r)
12215    }
12216}
12217
12218/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst.
12219/// Unlike [`_mm_alignr_epi8`], [`_mm256_alignr_epi8`] functions, where the entire input vectors are concatenated to the temporary result,
12220/// this concatenation happens in 4 steps, where each step builds 32-byte temporary result.
12221///
12222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi8&expand=263)
12223#[inline]
12224#[target_feature(enable = "avx512bw")]
12225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12226#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
12227#[rustc_legacy_const_generics(2)]
12228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12229pub const fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
12230    const fn mask(shift: u32, i: u32) -> u32 {
12231        let shift = shift % 16;
12232        let mod_i = i % 16;
12233        if mod_i < (16 - shift) {
12234            i + shift
12235        } else {
12236            i + 48 + shift
12237        }
12238    }
12239
12240    // If palignr is shifting the pair of vectors more than the size of two
12241    // lanes, emit zero.
12242    if IMM8 >= 32 {
12243        return _mm512_setzero_si512();
12244    }
12245    // If palignr is shifting the pair of input vectors more than one lane,
12246    // but less than two lanes, convert to shifting in zeroes.
12247    let (a, b) = if IMM8 > 16 {
12248        (_mm512_setzero_si512(), a)
12249    } else {
12250        (a, b)
12251    };
12252    unsafe {
12253        if IMM8 == 16 {
12254            return transmute(a);
12255        }
12256
12257        let r: i8x64 = simd_shuffle!(
12258            b.as_i8x64(),
12259            a.as_i8x64(),
12260            [
12261                mask(IMM8 as u32, 0),
12262                mask(IMM8 as u32, 1),
12263                mask(IMM8 as u32, 2),
12264                mask(IMM8 as u32, 3),
12265                mask(IMM8 as u32, 4),
12266                mask(IMM8 as u32, 5),
12267                mask(IMM8 as u32, 6),
12268                mask(IMM8 as u32, 7),
12269                mask(IMM8 as u32, 8),
12270                mask(IMM8 as u32, 9),
12271                mask(IMM8 as u32, 10),
12272                mask(IMM8 as u32, 11),
12273                mask(IMM8 as u32, 12),
12274                mask(IMM8 as u32, 13),
12275                mask(IMM8 as u32, 14),
12276                mask(IMM8 as u32, 15),
12277                mask(IMM8 as u32, 16),
12278                mask(IMM8 as u32, 17),
12279                mask(IMM8 as u32, 18),
12280                mask(IMM8 as u32, 19),
12281                mask(IMM8 as u32, 20),
12282                mask(IMM8 as u32, 21),
12283                mask(IMM8 as u32, 22),
12284                mask(IMM8 as u32, 23),
12285                mask(IMM8 as u32, 24),
12286                mask(IMM8 as u32, 25),
12287                mask(IMM8 as u32, 26),
12288                mask(IMM8 as u32, 27),
12289                mask(IMM8 as u32, 28),
12290                mask(IMM8 as u32, 29),
12291                mask(IMM8 as u32, 30),
12292                mask(IMM8 as u32, 31),
12293                mask(IMM8 as u32, 32),
12294                mask(IMM8 as u32, 33),
12295                mask(IMM8 as u32, 34),
12296                mask(IMM8 as u32, 35),
12297                mask(IMM8 as u32, 36),
12298                mask(IMM8 as u32, 37),
12299                mask(IMM8 as u32, 38),
12300                mask(IMM8 as u32, 39),
12301                mask(IMM8 as u32, 40),
12302                mask(IMM8 as u32, 41),
12303                mask(IMM8 as u32, 42),
12304                mask(IMM8 as u32, 43),
12305                mask(IMM8 as u32, 44),
12306                mask(IMM8 as u32, 45),
12307                mask(IMM8 as u32, 46),
12308                mask(IMM8 as u32, 47),
12309                mask(IMM8 as u32, 48),
12310                mask(IMM8 as u32, 49),
12311                mask(IMM8 as u32, 50),
12312                mask(IMM8 as u32, 51),
12313                mask(IMM8 as u32, 52),
12314                mask(IMM8 as u32, 53),
12315                mask(IMM8 as u32, 54),
12316                mask(IMM8 as u32, 55),
12317                mask(IMM8 as u32, 56),
12318                mask(IMM8 as u32, 57),
12319                mask(IMM8 as u32, 58),
12320                mask(IMM8 as u32, 59),
12321                mask(IMM8 as u32, 60),
12322                mask(IMM8 as u32, 61),
12323                mask(IMM8 as u32, 62),
12324                mask(IMM8 as u32, 63),
12325            ],
12326        );
12327        transmute(r)
12328    }
12329}
12330
12331/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12332///
12333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi8&expand=264)
12334#[inline]
12335#[target_feature(enable = "avx512bw")]
12336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12337#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
12338#[rustc_legacy_const_generics(4)]
12339#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12340pub const fn _mm512_mask_alignr_epi8<const IMM8: i32>(
12341    src: __m512i,
12342    k: __mmask64,
12343    a: __m512i,
12344    b: __m512i,
12345) -> __m512i {
12346    unsafe {
12347        static_assert_uimm_bits!(IMM8, 8);
12348        let r = _mm512_alignr_epi8::<IMM8>(a, b);
12349        transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64()))
12350    }
12351}
12352
12353/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12354///
12355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi8&expand=265)
12356#[inline]
12357#[target_feature(enable = "avx512bw")]
12358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12359#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
12360#[rustc_legacy_const_generics(3)]
12361#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12362pub const fn _mm512_maskz_alignr_epi8<const IMM8: i32>(
12363    k: __mmask64,
12364    a: __m512i,
12365    b: __m512i,
12366) -> __m512i {
12367    unsafe {
12368        static_assert_uimm_bits!(IMM8, 8);
12369        let r = _mm512_alignr_epi8::<IMM8>(a, b);
12370        transmute(simd_select_bitmask(k, r.as_i8x64(), i8x64::ZERO))
12371    }
12372}
12373
12374/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12375///
12376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi8&expand=261)
12377#[inline]
12378#[target_feature(enable = "avx512bw,avx512vl")]
12379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12380#[rustc_legacy_const_generics(4)]
12381#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12382#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12383pub const fn _mm256_mask_alignr_epi8<const IMM8: i32>(
12384    src: __m256i,
12385    k: __mmask32,
12386    a: __m256i,
12387    b: __m256i,
12388) -> __m256i {
12389    unsafe {
12390        static_assert_uimm_bits!(IMM8, 8);
12391        let r = _mm256_alignr_epi8::<IMM8>(a, b);
12392        transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32()))
12393    }
12394}
12395
12396/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12397///
12398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi8&expand=262)
12399#[inline]
12400#[target_feature(enable = "avx512bw,avx512vl")]
12401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12402#[rustc_legacy_const_generics(3)]
12403#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12404#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12405pub const fn _mm256_maskz_alignr_epi8<const IMM8: i32>(
12406    k: __mmask32,
12407    a: __m256i,
12408    b: __m256i,
12409) -> __m256i {
12410    unsafe {
12411        static_assert_uimm_bits!(IMM8, 8);
12412        let r = _mm256_alignr_epi8::<IMM8>(a, b);
12413        transmute(simd_select_bitmask(k, r.as_i8x32(), i8x32::ZERO))
12414    }
12415}
12416
12417/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12418///
12419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi8&expand=258)
12420#[inline]
12421#[target_feature(enable = "avx512bw,avx512vl")]
12422#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12423#[rustc_legacy_const_generics(4)]
12424#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12425#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12426pub const fn _mm_mask_alignr_epi8<const IMM8: i32>(
12427    src: __m128i,
12428    k: __mmask16,
12429    a: __m128i,
12430    b: __m128i,
12431) -> __m128i {
12432    unsafe {
12433        static_assert_uimm_bits!(IMM8, 8);
12434        let r = _mm_alignr_epi8::<IMM8>(a, b);
12435        transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16()))
12436    }
12437}
12438
12439/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12440///
12441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi8&expand=259)
12442#[inline]
12443#[target_feature(enable = "avx512bw,avx512vl")]
12444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12445#[rustc_legacy_const_generics(3)]
12446#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12448pub const fn _mm_maskz_alignr_epi8<const IMM8: i32>(
12449    k: __mmask16,
12450    a: __m128i,
12451    b: __m128i,
12452) -> __m128i {
12453    unsafe {
12454        static_assert_uimm_bits!(IMM8, 8);
12455        let r = _mm_alignr_epi8::<IMM8>(a, b);
12456        transmute(simd_select_bitmask(k, r.as_i8x16(), i8x16::ZERO))
12457    }
12458}
12459
12460/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12461///
12462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
12463#[inline]
12464#[target_feature(enable = "avx512bw")]
12465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12466#[cfg_attr(test, assert_instr(vpmovswb))]
12467pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
12468    vpmovswbmem(mem_addr, a.as_i16x32(), k);
12469}
12470
12471/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12472///
12473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
12474#[inline]
12475#[target_feature(enable = "avx512bw,avx512vl")]
12476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12477#[cfg_attr(test, assert_instr(vpmovswb))]
12478pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
12479    let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
12480
12481    let max = simd_splat(i16::from(i8::MAX));
12482    let min = simd_splat(i16::from(i8::MIN));
12483
12484    let v = simd_imax(simd_imin(a.as_i16x16(), max), min);
12485    let truncated: i8x16 = simd_cast(v);
12486    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, truncated);
12487}
12488
12489/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12490///
12491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
12492#[inline]
12493#[target_feature(enable = "avx512bw,avx512vl")]
12494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12495#[cfg_attr(test, assert_instr(vpmovswb))]
12496pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
12497    let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
12498
12499    let max = simd_splat(i16::from(i8::MAX));
12500    let min = simd_splat(i16::from(i8::MIN));
12501
12502    let v = simd_imax(simd_imin(a.as_i16x8(), max), min);
12503    let truncated: i8x8 = simd_cast(v);
12504    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, truncated);
12505}
12506
12507/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12508///
12509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
12510#[inline]
12511#[target_feature(enable = "avx512bw")]
12512#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12513#[cfg_attr(test, assert_instr(vpmovwb))]
12514#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
12515pub const unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
12516    let result = _mm512_cvtepi16_epi8(a).as_i8x32();
12517    let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
12518    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
12519}
12520
12521/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12522///
12523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
12524#[inline]
12525#[target_feature(enable = "avx512bw,avx512vl")]
12526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12527#[cfg_attr(test, assert_instr(vpmovwb))]
12528#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
12529pub const unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
12530    let result = _mm256_cvtepi16_epi8(a).as_i8x16();
12531    let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
12532    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
12533}
12534
12535/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12536///
12537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
12538#[inline]
12539#[target_feature(enable = "avx512bw,avx512vl")]
12540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12541#[cfg_attr(test, assert_instr(vpmovwb))]
12542#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
12543pub const unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
12544    let result: i8x8 = simd_shuffle!(
12545        _mm_cvtepi16_epi8(a).as_i8x16(),
12546        i8x16::ZERO,
12547        [0, 1, 2, 3, 4, 5, 6, 7]
12548    );
12549    let mask = simd_select_bitmask(k, i8x8::splat(!0), i8x8::ZERO);
12550    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
12551}
12552
12553/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12554///
12555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
12556#[inline]
12557#[target_feature(enable = "avx512bw")]
12558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12559#[cfg_attr(test, assert_instr(vpmovuswb))]
12560pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
12561    vpmovuswbmem(mem_addr, a.as_i16x32(), k);
12562}
12563
12564/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12565///
12566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
12567#[inline]
12568#[target_feature(enable = "avx512bw,avx512vl")]
12569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12570#[cfg_attr(test, assert_instr(vpmovuswb))]
12571pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
12572    let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
12573    let mem_addr = mem_addr.cast::<u8>();
12574    let max = simd_splat(u16::from(u8::MAX));
12575
12576    let truncated: u8x16 = simd_cast(simd_imin(a.as_u16x16(), max));
12577    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, truncated);
12578}
12579
12580/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12581///
12582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
12583#[inline]
12584#[target_feature(enable = "avx512bw,avx512vl")]
12585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12586#[cfg_attr(test, assert_instr(vpmovuswb))]
12587pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
12588    let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
12589    let mem_addr = mem_addr.cast::<u8>();
12590    let max = simd_splat(u16::from(u8::MAX));
12591
12592    let v = a.as_u16x8();
12593    let v = simd_imin(v, max);
12594
12595    let truncated: u8x8 = simd_cast(v);
12596    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, truncated);
12597}
12598
12599#[allow(improper_ctypes)]
12600unsafe extern "C" {
12601    #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
12602    fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
12603
12604    #[link_name = "llvm.x86.avx512.pmaddw.d.512"]
12605    fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
12606    #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
12607    fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
12608
12609    #[link_name = "llvm.x86.avx512.packssdw.512"]
12610    fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
12611    #[link_name = "llvm.x86.avx512.packsswb.512"]
12612    fn vpacksswb(a: i16x32, b: i16x32) -> i8x64;
12613    #[link_name = "llvm.x86.avx512.packusdw.512"]
12614    fn vpackusdw(a: i32x16, b: i32x16) -> u16x32;
12615    #[link_name = "llvm.x86.avx512.packuswb.512"]
12616    fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
12617
12618    #[link_name = "llvm.x86.avx512.psll.w.512"]
12619    fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
12620
12621    #[link_name = "llvm.x86.avx512.psrl.w.512"]
12622    fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
12623
12624    #[link_name = "llvm.x86.avx512.psra.w.512"]
12625    fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
12626
12627    #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
12628    fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
12629    #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
12630    fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16;
12631    #[link_name = "llvm.x86.avx512.vpermi2var.hi.128"]
12632    fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8;
12633
12634    #[link_name = "llvm.x86.avx512.permvar.hi.512"]
12635    fn vpermw(a: i16x32, idx: i16x32) -> i16x32;
12636    #[link_name = "llvm.x86.avx512.permvar.hi.256"]
12637    fn vpermw256(a: i16x16, idx: i16x16) -> i16x16;
12638    #[link_name = "llvm.x86.avx512.permvar.hi.128"]
12639    fn vpermw128(a: i16x8, idx: i16x8) -> i16x8;
12640
12641    #[link_name = "llvm.x86.avx512.pshuf.b.512"]
12642    fn vpshufb(a: i8x64, b: i8x64) -> i8x64;
12643
12644    #[link_name = "llvm.x86.avx512.psad.bw.512"]
12645    fn vpsadbw(a: u8x64, b: u8x64) -> u64x8;
12646
12647    #[link_name = "llvm.x86.avx512.dbpsadbw.512"]
12648    fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32;
12649    #[link_name = "llvm.x86.avx512.dbpsadbw.256"]
12650    fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
12651    #[link_name = "llvm.x86.avx512.dbpsadbw.128"]
12652    fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
12653
12654    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
12655    fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
12656
12657    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
12658    fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
12659
12660    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
12661    fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
12662
12663    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
12664    fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
12665}
12666
12667#[cfg(test)]
12668mod tests {
12669    use crate::core_arch::assert_eq_const as assert_eq;
12670
12671    use stdarch_test::simd_test;
12672
12673    use crate::core_arch::x86::*;
12674    use crate::hint::black_box;
12675    use crate::mem::{self};
12676
12677    #[simd_test(enable = "avx512bw")]
12678    const fn test_mm512_abs_epi16() {
12679        let a = _mm512_set1_epi16(-1);
12680        let r = _mm512_abs_epi16(a);
12681        let e = _mm512_set1_epi16(1);
12682        assert_eq_m512i(r, e);
12683    }
12684
12685    #[simd_test(enable = "avx512bw")]
12686    const fn test_mm512_mask_abs_epi16() {
12687        let a = _mm512_set1_epi16(-1);
12688        let r = _mm512_mask_abs_epi16(a, 0, a);
12689        assert_eq_m512i(r, a);
12690        let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a);
12691        #[rustfmt::skip]
12692        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12693                                 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12694        assert_eq_m512i(r, e);
12695    }
12696
12697    #[simd_test(enable = "avx512bw")]
12698    const fn test_mm512_maskz_abs_epi16() {
12699        let a = _mm512_set1_epi16(-1);
12700        let r = _mm512_maskz_abs_epi16(0, a);
12701        assert_eq_m512i(r, _mm512_setzero_si512());
12702        let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a);
12703        #[rustfmt::skip]
12704        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12705                                  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12706        assert_eq_m512i(r, e);
12707    }
12708
12709    #[simd_test(enable = "avx512bw,avx512vl")]
12710    const fn test_mm256_mask_abs_epi16() {
12711        let a = _mm256_set1_epi16(-1);
12712        let r = _mm256_mask_abs_epi16(a, 0, a);
12713        assert_eq_m256i(r, a);
12714        let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a);
12715        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12716        assert_eq_m256i(r, e);
12717    }
12718
12719    #[simd_test(enable = "avx512bw,avx512vl")]
12720    const fn test_mm256_maskz_abs_epi16() {
12721        let a = _mm256_set1_epi16(-1);
12722        let r = _mm256_maskz_abs_epi16(0, a);
12723        assert_eq_m256i(r, _mm256_setzero_si256());
12724        let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a);
12725        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12726        assert_eq_m256i(r, e);
12727    }
12728
12729    #[simd_test(enable = "avx512bw,avx512vl")]
12730    const fn test_mm_mask_abs_epi16() {
12731        let a = _mm_set1_epi16(-1);
12732        let r = _mm_mask_abs_epi16(a, 0, a);
12733        assert_eq_m128i(r, a);
12734        let r = _mm_mask_abs_epi16(a, 0b00001111, a);
12735        let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1);
12736        assert_eq_m128i(r, e);
12737    }
12738
12739    #[simd_test(enable = "avx512bw,avx512vl")]
12740    const fn test_mm_maskz_abs_epi16() {
12741        let a = _mm_set1_epi16(-1);
12742        let r = _mm_maskz_abs_epi16(0, a);
12743        assert_eq_m128i(r, _mm_setzero_si128());
12744        let r = _mm_maskz_abs_epi16(0b00001111, a);
12745        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
12746        assert_eq_m128i(r, e);
12747    }
12748
12749    #[simd_test(enable = "avx512bw")]
12750    const fn test_mm512_abs_epi8() {
12751        let a = _mm512_set1_epi8(-1);
12752        let r = _mm512_abs_epi8(a);
12753        let e = _mm512_set1_epi8(1);
12754        assert_eq_m512i(r, e);
12755    }
12756
12757    #[simd_test(enable = "avx512bw")]
12758    const fn test_mm512_mask_abs_epi8() {
12759        let a = _mm512_set1_epi8(-1);
12760        let r = _mm512_mask_abs_epi8(a, 0, a);
12761        assert_eq_m512i(r, a);
12762        let r = _mm512_mask_abs_epi8(
12763            a,
12764            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12765            a,
12766        );
12767        #[rustfmt::skip]
12768        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12769                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12770                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12771                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12772        assert_eq_m512i(r, e);
12773    }
12774
12775    #[simd_test(enable = "avx512bw")]
12776    const fn test_mm512_maskz_abs_epi8() {
12777        let a = _mm512_set1_epi8(-1);
12778        let r = _mm512_maskz_abs_epi8(0, a);
12779        assert_eq_m512i(r, _mm512_setzero_si512());
12780        let r = _mm512_maskz_abs_epi8(
12781            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12782            a,
12783        );
12784        #[rustfmt::skip]
12785        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12786                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12787                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12788                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12789        assert_eq_m512i(r, e);
12790    }
12791
12792    #[simd_test(enable = "avx512bw,avx512vl")]
12793    const fn test_mm256_mask_abs_epi8() {
12794        let a = _mm256_set1_epi8(-1);
12795        let r = _mm256_mask_abs_epi8(a, 0, a);
12796        assert_eq_m256i(r, a);
12797        let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a);
12798        #[rustfmt::skip]
12799        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12800                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12801        assert_eq_m256i(r, e);
12802    }
12803
12804    #[simd_test(enable = "avx512bw,avx512vl")]
12805    const fn test_mm256_maskz_abs_epi8() {
12806        let a = _mm256_set1_epi8(-1);
12807        let r = _mm256_maskz_abs_epi8(0, a);
12808        assert_eq_m256i(r, _mm256_setzero_si256());
12809        let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a);
12810        #[rustfmt::skip]
12811        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12812                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12813        assert_eq_m256i(r, e);
12814    }
12815
12816    #[simd_test(enable = "avx512bw,avx512vl")]
12817    const fn test_mm_mask_abs_epi8() {
12818        let a = _mm_set1_epi8(-1);
12819        let r = _mm_mask_abs_epi8(a, 0, a);
12820        assert_eq_m128i(r, a);
12821        let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a);
12822        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12823        assert_eq_m128i(r, e);
12824    }
12825
12826    #[simd_test(enable = "avx512bw,avx512vl")]
12827    const fn test_mm_maskz_abs_epi8() {
12828        let a = _mm_set1_epi8(-1);
12829        let r = _mm_maskz_abs_epi8(0, a);
12830        assert_eq_m128i(r, _mm_setzero_si128());
12831        let r = _mm_maskz_abs_epi8(0b00000000_11111111, a);
12832        #[rustfmt::skip]
12833        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12834        assert_eq_m128i(r, e);
12835    }
12836
12837    #[simd_test(enable = "avx512bw")]
12838    const fn test_mm512_add_epi16() {
12839        let a = _mm512_set1_epi16(1);
12840        let b = _mm512_set1_epi16(2);
12841        let r = _mm512_add_epi16(a, b);
12842        let e = _mm512_set1_epi16(3);
12843        assert_eq_m512i(r, e);
12844    }
12845
12846    #[simd_test(enable = "avx512bw")]
12847    const fn test_mm512_mask_add_epi16() {
12848        let a = _mm512_set1_epi16(1);
12849        let b = _mm512_set1_epi16(2);
12850        let r = _mm512_mask_add_epi16(a, 0, a, b);
12851        assert_eq_m512i(r, a);
12852        let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
12853        #[rustfmt::skip]
12854        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12855                                 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12856        assert_eq_m512i(r, e);
12857    }
12858
12859    #[simd_test(enable = "avx512bw")]
12860    const fn test_mm512_maskz_add_epi16() {
12861        let a = _mm512_set1_epi16(1);
12862        let b = _mm512_set1_epi16(2);
12863        let r = _mm512_maskz_add_epi16(0, a, b);
12864        assert_eq_m512i(r, _mm512_setzero_si512());
12865        let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b);
12866        #[rustfmt::skip]
12867        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12868                                 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12869        assert_eq_m512i(r, e);
12870    }
12871
12872    #[simd_test(enable = "avx512bw,avx512vl")]
12873    const fn test_mm256_mask_add_epi16() {
12874        let a = _mm256_set1_epi16(1);
12875        let b = _mm256_set1_epi16(2);
12876        let r = _mm256_mask_add_epi16(a, 0, a, b);
12877        assert_eq_m256i(r, a);
12878        let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b);
12879        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12880        assert_eq_m256i(r, e);
12881    }
12882
12883    #[simd_test(enable = "avx512bw,avx512vl")]
12884    const fn test_mm256_maskz_add_epi16() {
12885        let a = _mm256_set1_epi16(1);
12886        let b = _mm256_set1_epi16(2);
12887        let r = _mm256_maskz_add_epi16(0, a, b);
12888        assert_eq_m256i(r, _mm256_setzero_si256());
12889        let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b);
12890        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12891        assert_eq_m256i(r, e);
12892    }
12893
12894    #[simd_test(enable = "avx512bw,avx512vl")]
12895    const fn test_mm_mask_add_epi16() {
12896        let a = _mm_set1_epi16(1);
12897        let b = _mm_set1_epi16(2);
12898        let r = _mm_mask_add_epi16(a, 0, a, b);
12899        assert_eq_m128i(r, a);
12900        let r = _mm_mask_add_epi16(a, 0b00001111, a, b);
12901        let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3);
12902        assert_eq_m128i(r, e);
12903    }
12904
12905    #[simd_test(enable = "avx512bw,avx512vl")]
12906    const fn test_mm_maskz_add_epi16() {
12907        let a = _mm_set1_epi16(1);
12908        let b = _mm_set1_epi16(2);
12909        let r = _mm_maskz_add_epi16(0, a, b);
12910        assert_eq_m128i(r, _mm_setzero_si128());
12911        let r = _mm_maskz_add_epi16(0b00001111, a, b);
12912        let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3);
12913        assert_eq_m128i(r, e);
12914    }
12915
12916    #[simd_test(enable = "avx512bw")]
12917    const fn test_mm512_add_epi8() {
12918        let a = _mm512_set1_epi8(1);
12919        let b = _mm512_set1_epi8(2);
12920        let r = _mm512_add_epi8(a, b);
12921        let e = _mm512_set1_epi8(3);
12922        assert_eq_m512i(r, e);
12923    }
12924
12925    #[simd_test(enable = "avx512bw")]
12926    const fn test_mm512_mask_add_epi8() {
12927        let a = _mm512_set1_epi8(1);
12928        let b = _mm512_set1_epi8(2);
12929        let r = _mm512_mask_add_epi8(a, 0, a, b);
12930        assert_eq_m512i(r, a);
12931        let r = _mm512_mask_add_epi8(
12932            a,
12933            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12934            a,
12935            b,
12936        );
12937        #[rustfmt::skip]
12938        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12939                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12940                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12941                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12942        assert_eq_m512i(r, e);
12943    }
12944
12945    #[simd_test(enable = "avx512bw")]
12946    const fn test_mm512_maskz_add_epi8() {
12947        let a = _mm512_set1_epi8(1);
12948        let b = _mm512_set1_epi8(2);
12949        let r = _mm512_maskz_add_epi8(0, a, b);
12950        assert_eq_m512i(r, _mm512_setzero_si512());
12951        let r = _mm512_maskz_add_epi8(
12952            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12953            a,
12954            b,
12955        );
12956        #[rustfmt::skip]
12957        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12958                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12959                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12960                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12961        assert_eq_m512i(r, e);
12962    }
12963
12964    #[simd_test(enable = "avx512bw,avx512vl")]
12965    const fn test_mm256_mask_add_epi8() {
12966        let a = _mm256_set1_epi8(1);
12967        let b = _mm256_set1_epi8(2);
12968        let r = _mm256_mask_add_epi8(a, 0, a, b);
12969        assert_eq_m256i(r, a);
12970        let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12971        #[rustfmt::skip]
12972        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12973                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12974        assert_eq_m256i(r, e);
12975    }
12976
12977    #[simd_test(enable = "avx512bw,avx512vl")]
12978    const fn test_mm256_maskz_add_epi8() {
12979        let a = _mm256_set1_epi8(1);
12980        let b = _mm256_set1_epi8(2);
12981        let r = _mm256_maskz_add_epi8(0, a, b);
12982        assert_eq_m256i(r, _mm256_setzero_si256());
12983        let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b);
12984        #[rustfmt::skip]
12985        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12986                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12987        assert_eq_m256i(r, e);
12988    }
12989
12990    #[simd_test(enable = "avx512bw,avx512vl")]
12991    const fn test_mm_mask_add_epi8() {
12992        let a = _mm_set1_epi8(1);
12993        let b = _mm_set1_epi8(2);
12994        let r = _mm_mask_add_epi8(a, 0, a, b);
12995        assert_eq_m128i(r, a);
12996        let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b);
12997        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12998        assert_eq_m128i(r, e);
12999    }
13000
13001    #[simd_test(enable = "avx512bw,avx512vl")]
13002    const fn test_mm_maskz_add_epi8() {
13003        let a = _mm_set1_epi8(1);
13004        let b = _mm_set1_epi8(2);
13005        let r = _mm_maskz_add_epi8(0, a, b);
13006        assert_eq_m128i(r, _mm_setzero_si128());
13007        let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b);
13008        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
13009        assert_eq_m128i(r, e);
13010    }
13011
13012    #[simd_test(enable = "avx512bw")]
13013    const fn test_mm512_adds_epu16() {
13014        let a = _mm512_set1_epi16(1);
13015        let b = _mm512_set1_epi16(u16::MAX as i16);
13016        let r = _mm512_adds_epu16(a, b);
13017        let e = _mm512_set1_epi16(u16::MAX as i16);
13018        assert_eq_m512i(r, e);
13019    }
13020
13021    #[simd_test(enable = "avx512bw")]
13022    const fn test_mm512_mask_adds_epu16() {
13023        let a = _mm512_set1_epi16(1);
13024        let b = _mm512_set1_epi16(u16::MAX as i16);
13025        let r = _mm512_mask_adds_epu16(a, 0, a, b);
13026        assert_eq_m512i(r, a);
13027        let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13028        #[rustfmt::skip]
13029        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13030                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13031        assert_eq_m512i(r, e);
13032    }
13033
13034    #[simd_test(enable = "avx512bw")]
13035    const fn test_mm512_maskz_adds_epu16() {
13036        let a = _mm512_set1_epi16(1);
13037        let b = _mm512_set1_epi16(u16::MAX as i16);
13038        let r = _mm512_maskz_adds_epu16(0, a, b);
13039        assert_eq_m512i(r, _mm512_setzero_si512());
13040        let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b);
13041        #[rustfmt::skip]
13042        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13043                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13044        assert_eq_m512i(r, e);
13045    }
13046
13047    #[simd_test(enable = "avx512bw,avx512vl")]
13048    const fn test_mm256_mask_adds_epu16() {
13049        let a = _mm256_set1_epi16(1);
13050        let b = _mm256_set1_epi16(u16::MAX as i16);
13051        let r = _mm256_mask_adds_epu16(a, 0, a, b);
13052        assert_eq_m256i(r, a);
13053        let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b);
13054        #[rustfmt::skip]
13055        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13056        assert_eq_m256i(r, e);
13057    }
13058
13059    #[simd_test(enable = "avx512bw,avx512vl")]
13060    const fn test_mm256_maskz_adds_epu16() {
13061        let a = _mm256_set1_epi16(1);
13062        let b = _mm256_set1_epi16(u16::MAX as i16);
13063        let r = _mm256_maskz_adds_epu16(0, a, b);
13064        assert_eq_m256i(r, _mm256_setzero_si256());
13065        let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b);
13066        #[rustfmt::skip]
13067        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13068        assert_eq_m256i(r, e);
13069    }
13070
13071    #[simd_test(enable = "avx512bw,avx512vl")]
13072    const fn test_mm_mask_adds_epu16() {
13073        let a = _mm_set1_epi16(1);
13074        let b = _mm_set1_epi16(u16::MAX as i16);
13075        let r = _mm_mask_adds_epu16(a, 0, a, b);
13076        assert_eq_m128i(r, a);
13077        let r = _mm_mask_adds_epu16(a, 0b00001111, a, b);
13078        #[rustfmt::skip]
13079        let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13080        assert_eq_m128i(r, e);
13081    }
13082
13083    #[simd_test(enable = "avx512bw,avx512vl")]
13084    const fn test_mm_maskz_adds_epu16() {
13085        let a = _mm_set1_epi16(1);
13086        let b = _mm_set1_epi16(u16::MAX as i16);
13087        let r = _mm_maskz_adds_epu16(0, a, b);
13088        assert_eq_m128i(r, _mm_setzero_si128());
13089        let r = _mm_maskz_adds_epu16(0b00001111, a, b);
13090        #[rustfmt::skip]
13091        let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13092        assert_eq_m128i(r, e);
13093    }
13094
13095    #[simd_test(enable = "avx512bw")]
13096    const fn test_mm512_adds_epu8() {
13097        let a = _mm512_set1_epi8(1);
13098        let b = _mm512_set1_epi8(u8::MAX as i8);
13099        let r = _mm512_adds_epu8(a, b);
13100        let e = _mm512_set1_epi8(u8::MAX as i8);
13101        assert_eq_m512i(r, e);
13102    }
13103
13104    #[simd_test(enable = "avx512bw")]
13105    const fn test_mm512_mask_adds_epu8() {
13106        let a = _mm512_set1_epi8(1);
13107        let b = _mm512_set1_epi8(u8::MAX as i8);
13108        let r = _mm512_mask_adds_epu8(a, 0, a, b);
13109        assert_eq_m512i(r, a);
13110        let r = _mm512_mask_adds_epu8(
13111            a,
13112            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13113            a,
13114            b,
13115        );
13116        #[rustfmt::skip]
13117        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13118                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13119                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13120                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13121        assert_eq_m512i(r, e);
13122    }
13123
13124    #[simd_test(enable = "avx512bw")]
13125    const fn test_mm512_maskz_adds_epu8() {
13126        let a = _mm512_set1_epi8(1);
13127        let b = _mm512_set1_epi8(u8::MAX as i8);
13128        let r = _mm512_maskz_adds_epu8(0, a, b);
13129        assert_eq_m512i(r, _mm512_setzero_si512());
13130        let r = _mm512_maskz_adds_epu8(
13131            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13132            a,
13133            b,
13134        );
13135        #[rustfmt::skip]
13136        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13137                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13138                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13139                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13140        assert_eq_m512i(r, e);
13141    }
13142
13143    #[simd_test(enable = "avx512bw,avx512vl")]
13144    const fn test_mm256_mask_adds_epu8() {
13145        let a = _mm256_set1_epi8(1);
13146        let b = _mm256_set1_epi8(u8::MAX as i8);
13147        let r = _mm256_mask_adds_epu8(a, 0, a, b);
13148        assert_eq_m256i(r, a);
13149        let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
13150        #[rustfmt::skip]
13151        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13152                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13153        assert_eq_m256i(r, e);
13154    }
13155
13156    #[simd_test(enable = "avx512bw,avx512vl")]
13157    const fn test_mm256_maskz_adds_epu8() {
13158        let a = _mm256_set1_epi8(1);
13159        let b = _mm256_set1_epi8(u8::MAX as i8);
13160        let r = _mm256_maskz_adds_epu8(0, a, b);
13161        assert_eq_m256i(r, _mm256_setzero_si256());
13162        let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b);
13163        #[rustfmt::skip]
13164        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13165                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13166        assert_eq_m256i(r, e);
13167    }
13168
13169    #[simd_test(enable = "avx512bw,avx512vl")]
13170    const fn test_mm_mask_adds_epu8() {
13171        let a = _mm_set1_epi8(1);
13172        let b = _mm_set1_epi8(u8::MAX as i8);
13173        let r = _mm_mask_adds_epu8(a, 0, a, b);
13174        assert_eq_m128i(r, a);
13175        let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b);
13176        #[rustfmt::skip]
13177        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13178        assert_eq_m128i(r, e);
13179    }
13180
13181    #[simd_test(enable = "avx512bw,avx512vl")]
13182    const fn test_mm_maskz_adds_epu8() {
13183        let a = _mm_set1_epi8(1);
13184        let b = _mm_set1_epi8(u8::MAX as i8);
13185        let r = _mm_maskz_adds_epu8(0, a, b);
13186        assert_eq_m128i(r, _mm_setzero_si128());
13187        let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b);
13188        #[rustfmt::skip]
13189        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13190        assert_eq_m128i(r, e);
13191    }
13192
13193    #[simd_test(enable = "avx512bw")]
13194    const fn test_mm512_adds_epi16() {
13195        let a = _mm512_set1_epi16(1);
13196        let b = _mm512_set1_epi16(i16::MAX);
13197        let r = _mm512_adds_epi16(a, b);
13198        let e = _mm512_set1_epi16(i16::MAX);
13199        assert_eq_m512i(r, e);
13200    }
13201
13202    #[simd_test(enable = "avx512bw")]
13203    const fn test_mm512_mask_adds_epi16() {
13204        let a = _mm512_set1_epi16(1);
13205        let b = _mm512_set1_epi16(i16::MAX);
13206        let r = _mm512_mask_adds_epi16(a, 0, a, b);
13207        assert_eq_m512i(r, a);
13208        let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13209        #[rustfmt::skip]
13210        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13211                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13212        assert_eq_m512i(r, e);
13213    }
13214
13215    #[simd_test(enable = "avx512bw")]
13216    const fn test_mm512_maskz_adds_epi16() {
13217        let a = _mm512_set1_epi16(1);
13218        let b = _mm512_set1_epi16(i16::MAX);
13219        let r = _mm512_maskz_adds_epi16(0, a, b);
13220        assert_eq_m512i(r, _mm512_setzero_si512());
13221        let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b);
13222        #[rustfmt::skip]
13223        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13224                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13225        assert_eq_m512i(r, e);
13226    }
13227
13228    #[simd_test(enable = "avx512bw,avx512vl")]
13229    const fn test_mm256_mask_adds_epi16() {
13230        let a = _mm256_set1_epi16(1);
13231        let b = _mm256_set1_epi16(i16::MAX);
13232        let r = _mm256_mask_adds_epi16(a, 0, a, b);
13233        assert_eq_m256i(r, a);
13234        let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b);
13235        #[rustfmt::skip]
13236        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13237        assert_eq_m256i(r, e);
13238    }
13239
13240    #[simd_test(enable = "avx512bw,avx512vl")]
13241    const fn test_mm256_maskz_adds_epi16() {
13242        let a = _mm256_set1_epi16(1);
13243        let b = _mm256_set1_epi16(i16::MAX);
13244        let r = _mm256_maskz_adds_epi16(0, a, b);
13245        assert_eq_m256i(r, _mm256_setzero_si256());
13246        let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b);
13247        #[rustfmt::skip]
13248        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13249        assert_eq_m256i(r, e);
13250    }
13251
13252    #[simd_test(enable = "avx512bw,avx512vl")]
13253    const fn test_mm_mask_adds_epi16() {
13254        let a = _mm_set1_epi16(1);
13255        let b = _mm_set1_epi16(i16::MAX);
13256        let r = _mm_mask_adds_epi16(a, 0, a, b);
13257        assert_eq_m128i(r, a);
13258        let r = _mm_mask_adds_epi16(a, 0b00001111, a, b);
13259        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13260        assert_eq_m128i(r, e);
13261    }
13262
13263    #[simd_test(enable = "avx512bw,avx512vl")]
13264    const fn test_mm_maskz_adds_epi16() {
13265        let a = _mm_set1_epi16(1);
13266        let b = _mm_set1_epi16(i16::MAX);
13267        let r = _mm_maskz_adds_epi16(0, a, b);
13268        assert_eq_m128i(r, _mm_setzero_si128());
13269        let r = _mm_maskz_adds_epi16(0b00001111, a, b);
13270        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13271        assert_eq_m128i(r, e);
13272    }
13273
13274    #[simd_test(enable = "avx512bw")]
13275    const fn test_mm512_adds_epi8() {
13276        let a = _mm512_set1_epi8(1);
13277        let b = _mm512_set1_epi8(i8::MAX);
13278        let r = _mm512_adds_epi8(a, b);
13279        let e = _mm512_set1_epi8(i8::MAX);
13280        assert_eq_m512i(r, e);
13281    }
13282
13283    #[simd_test(enable = "avx512bw")]
13284    const fn test_mm512_mask_adds_epi8() {
13285        let a = _mm512_set1_epi8(1);
13286        let b = _mm512_set1_epi8(i8::MAX);
13287        let r = _mm512_mask_adds_epi8(a, 0, a, b);
13288        assert_eq_m512i(r, a);
13289        let r = _mm512_mask_adds_epi8(
13290            a,
13291            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13292            a,
13293            b,
13294        );
13295        #[rustfmt::skip]
13296        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13297                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13298                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13299                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13300        assert_eq_m512i(r, e);
13301    }
13302
13303    #[simd_test(enable = "avx512bw")]
13304    const fn test_mm512_maskz_adds_epi8() {
13305        let a = _mm512_set1_epi8(1);
13306        let b = _mm512_set1_epi8(i8::MAX);
13307        let r = _mm512_maskz_adds_epi8(0, a, b);
13308        assert_eq_m512i(r, _mm512_setzero_si512());
13309        let r = _mm512_maskz_adds_epi8(
13310            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13311            a,
13312            b,
13313        );
13314        #[rustfmt::skip]
13315        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13316                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13317                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13318                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13319        assert_eq_m512i(r, e);
13320    }
13321
13322    #[simd_test(enable = "avx512bw,avx512vl")]
13323    const fn test_mm256_mask_adds_epi8() {
13324        let a = _mm256_set1_epi8(1);
13325        let b = _mm256_set1_epi8(i8::MAX);
13326        let r = _mm256_mask_adds_epi8(a, 0, a, b);
13327        assert_eq_m256i(r, a);
13328        let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
13329        #[rustfmt::skip]
13330        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13331                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13332        assert_eq_m256i(r, e);
13333    }
13334
13335    #[simd_test(enable = "avx512bw,avx512vl")]
13336    const fn test_mm256_maskz_adds_epi8() {
13337        let a = _mm256_set1_epi8(1);
13338        let b = _mm256_set1_epi8(i8::MAX);
13339        let r = _mm256_maskz_adds_epi8(0, a, b);
13340        assert_eq_m256i(r, _mm256_setzero_si256());
13341        let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b);
13342        #[rustfmt::skip]
13343        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13344                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13345        assert_eq_m256i(r, e);
13346    }
13347
13348    #[simd_test(enable = "avx512bw,avx512vl")]
13349    const fn test_mm_mask_adds_epi8() {
13350        let a = _mm_set1_epi8(1);
13351        let b = _mm_set1_epi8(i8::MAX);
13352        let r = _mm_mask_adds_epi8(a, 0, a, b);
13353        assert_eq_m128i(r, a);
13354        let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b);
13355        #[rustfmt::skip]
13356        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13357        assert_eq_m128i(r, e);
13358    }
13359
13360    #[simd_test(enable = "avx512bw,avx512vl")]
13361    const fn test_mm_maskz_adds_epi8() {
13362        let a = _mm_set1_epi8(1);
13363        let b = _mm_set1_epi8(i8::MAX);
13364        let r = _mm_maskz_adds_epi8(0, a, b);
13365        assert_eq_m128i(r, _mm_setzero_si128());
13366        let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b);
13367        #[rustfmt::skip]
13368        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13369        assert_eq_m128i(r, e);
13370    }
13371
13372    #[simd_test(enable = "avx512bw")]
13373    const fn test_mm512_sub_epi16() {
13374        let a = _mm512_set1_epi16(1);
13375        let b = _mm512_set1_epi16(2);
13376        let r = _mm512_sub_epi16(a, b);
13377        let e = _mm512_set1_epi16(-1);
13378        assert_eq_m512i(r, e);
13379    }
13380
13381    #[simd_test(enable = "avx512bw")]
13382    const fn test_mm512_mask_sub_epi16() {
13383        let a = _mm512_set1_epi16(1);
13384        let b = _mm512_set1_epi16(2);
13385        let r = _mm512_mask_sub_epi16(a, 0, a, b);
13386        assert_eq_m512i(r, a);
13387        let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
13388        #[rustfmt::skip]
13389        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13390                                 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13391        assert_eq_m512i(r, e);
13392    }
13393
13394    #[simd_test(enable = "avx512bw")]
13395    const fn test_mm512_maskz_sub_epi16() {
13396        let a = _mm512_set1_epi16(1);
13397        let b = _mm512_set1_epi16(2);
13398        let r = _mm512_maskz_sub_epi16(0, a, b);
13399        assert_eq_m512i(r, _mm512_setzero_si512());
13400        let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b);
13401        #[rustfmt::skip]
13402        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13403                                 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13404        assert_eq_m512i(r, e);
13405    }
13406
13407    #[simd_test(enable = "avx512bw,avx512vl")]
13408    const fn test_mm256_mask_sub_epi16() {
13409        let a = _mm256_set1_epi16(1);
13410        let b = _mm256_set1_epi16(2);
13411        let r = _mm256_mask_sub_epi16(a, 0, a, b);
13412        assert_eq_m256i(r, a);
13413        let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b);
13414        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13415        assert_eq_m256i(r, e);
13416    }
13417
13418    #[simd_test(enable = "avx512bw,avx512vl")]
13419    const fn test_mm256_maskz_sub_epi16() {
13420        let a = _mm256_set1_epi16(1);
13421        let b = _mm256_set1_epi16(2);
13422        let r = _mm256_maskz_sub_epi16(0, a, b);
13423        assert_eq_m256i(r, _mm256_setzero_si256());
13424        let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b);
13425        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13426        assert_eq_m256i(r, e);
13427    }
13428
13429    #[simd_test(enable = "avx512bw,avx512vl")]
13430    const fn test_mm_mask_sub_epi16() {
13431        let a = _mm_set1_epi16(1);
13432        let b = _mm_set1_epi16(2);
13433        let r = _mm_mask_sub_epi16(a, 0, a, b);
13434        assert_eq_m128i(r, a);
13435        let r = _mm_mask_sub_epi16(a, 0b00001111, a, b);
13436        let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1);
13437        assert_eq_m128i(r, e);
13438    }
13439
13440    #[simd_test(enable = "avx512bw,avx512vl")]
13441    const fn test_mm_maskz_sub_epi16() {
13442        let a = _mm_set1_epi16(1);
13443        let b = _mm_set1_epi16(2);
13444        let r = _mm_maskz_sub_epi16(0, a, b);
13445        assert_eq_m128i(r, _mm_setzero_si128());
13446        let r = _mm_maskz_sub_epi16(0b00001111, a, b);
13447        let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1);
13448        assert_eq_m128i(r, e);
13449    }
13450
13451    #[simd_test(enable = "avx512bw")]
13452    const fn test_mm512_sub_epi8() {
13453        let a = _mm512_set1_epi8(1);
13454        let b = _mm512_set1_epi8(2);
13455        let r = _mm512_sub_epi8(a, b);
13456        let e = _mm512_set1_epi8(-1);
13457        assert_eq_m512i(r, e);
13458    }
13459
13460    #[simd_test(enable = "avx512bw")]
13461    const fn test_mm512_mask_sub_epi8() {
13462        let a = _mm512_set1_epi8(1);
13463        let b = _mm512_set1_epi8(2);
13464        let r = _mm512_mask_sub_epi8(a, 0, a, b);
13465        assert_eq_m512i(r, a);
13466        let r = _mm512_mask_sub_epi8(
13467            a,
13468            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13469            a,
13470            b,
13471        );
13472        #[rustfmt::skip]
13473        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13474                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13475                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13476                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13477        assert_eq_m512i(r, e);
13478    }
13479
13480    #[simd_test(enable = "avx512bw")]
13481    const fn test_mm512_maskz_sub_epi8() {
13482        let a = _mm512_set1_epi8(1);
13483        let b = _mm512_set1_epi8(2);
13484        let r = _mm512_maskz_sub_epi8(0, a, b);
13485        assert_eq_m512i(r, _mm512_setzero_si512());
13486        let r = _mm512_maskz_sub_epi8(
13487            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13488            a,
13489            b,
13490        );
13491        #[rustfmt::skip]
13492        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13493                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13494                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13495                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13496        assert_eq_m512i(r, e);
13497    }
13498
13499    #[simd_test(enable = "avx512bw,avx512vl")]
13500    const fn test_mm256_mask_sub_epi8() {
13501        let a = _mm256_set1_epi8(1);
13502        let b = _mm256_set1_epi8(2);
13503        let r = _mm256_mask_sub_epi8(a, 0, a, b);
13504        assert_eq_m256i(r, a);
13505        let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
13506        #[rustfmt::skip]
13507        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13508                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13509        assert_eq_m256i(r, e);
13510    }
13511
13512    #[simd_test(enable = "avx512bw,avx512vl")]
13513    const fn test_mm256_maskz_sub_epi8() {
13514        let a = _mm256_set1_epi8(1);
13515        let b = _mm256_set1_epi8(2);
13516        let r = _mm256_maskz_sub_epi8(0, a, b);
13517        assert_eq_m256i(r, _mm256_setzero_si256());
13518        let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b);
13519        #[rustfmt::skip]
13520        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13521                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13522        assert_eq_m256i(r, e);
13523    }
13524
13525    #[simd_test(enable = "avx512bw,avx512vl")]
13526    const fn test_mm_mask_sub_epi8() {
13527        let a = _mm_set1_epi8(1);
13528        let b = _mm_set1_epi8(2);
13529        let r = _mm_mask_sub_epi8(a, 0, a, b);
13530        assert_eq_m128i(r, a);
13531        let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b);
13532        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13533        assert_eq_m128i(r, e);
13534    }
13535
13536    #[simd_test(enable = "avx512bw,avx512vl")]
13537    const fn test_mm_maskz_sub_epi8() {
13538        let a = _mm_set1_epi8(1);
13539        let b = _mm_set1_epi8(2);
13540        let r = _mm_maskz_sub_epi8(0, a, b);
13541        assert_eq_m128i(r, _mm_setzero_si128());
13542        let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b);
13543        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13544        assert_eq_m128i(r, e);
13545    }
13546
13547    #[simd_test(enable = "avx512bw")]
13548    const fn test_mm512_subs_epu16() {
13549        let a = _mm512_set1_epi16(1);
13550        let b = _mm512_set1_epi16(u16::MAX as i16);
13551        let r = _mm512_subs_epu16(a, b);
13552        let e = _mm512_set1_epi16(0);
13553        assert_eq_m512i(r, e);
13554    }
13555
13556    #[simd_test(enable = "avx512bw")]
13557    const fn test_mm512_mask_subs_epu16() {
13558        let a = _mm512_set1_epi16(1);
13559        let b = _mm512_set1_epi16(u16::MAX as i16);
13560        let r = _mm512_mask_subs_epu16(a, 0, a, b);
13561        assert_eq_m512i(r, a);
13562        let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13563        #[rustfmt::skip]
13564        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13565                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13566        assert_eq_m512i(r, e);
13567    }
13568
13569    #[simd_test(enable = "avx512bw")]
13570    const fn test_mm512_maskz_subs_epu16() {
13571        let a = _mm512_set1_epi16(1);
13572        let b = _mm512_set1_epi16(u16::MAX as i16);
13573        let r = _mm512_maskz_subs_epu16(0, a, b);
13574        assert_eq_m512i(r, _mm512_setzero_si512());
13575        let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b);
13576        #[rustfmt::skip]
13577        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13578                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13579        assert_eq_m512i(r, e);
13580    }
13581
13582    #[simd_test(enable = "avx512bw,avx512vl")]
13583    const fn test_mm256_mask_subs_epu16() {
13584        let a = _mm256_set1_epi16(1);
13585        let b = _mm256_set1_epi16(u16::MAX as i16);
13586        let r = _mm256_mask_subs_epu16(a, 0, a, b);
13587        assert_eq_m256i(r, a);
13588        let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b);
13589        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13590        assert_eq_m256i(r, e);
13591    }
13592
13593    #[simd_test(enable = "avx512bw,avx512vl")]
13594    const fn test_mm256_maskz_subs_epu16() {
13595        let a = _mm256_set1_epi16(1);
13596        let b = _mm256_set1_epi16(u16::MAX as i16);
13597        let r = _mm256_maskz_subs_epu16(0, a, b);
13598        assert_eq_m256i(r, _mm256_setzero_si256());
13599        let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b);
13600        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13601        assert_eq_m256i(r, e);
13602    }
13603
13604    #[simd_test(enable = "avx512bw,avx512vl")]
13605    const fn test_mm_mask_subs_epu16() {
13606        let a = _mm_set1_epi16(1);
13607        let b = _mm_set1_epi16(u16::MAX as i16);
13608        let r = _mm_mask_subs_epu16(a, 0, a, b);
13609        assert_eq_m128i(r, a);
13610        let r = _mm_mask_subs_epu16(a, 0b00001111, a, b);
13611        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13612        assert_eq_m128i(r, e);
13613    }
13614
13615    #[simd_test(enable = "avx512bw,avx512vl")]
13616    const fn test_mm_maskz_subs_epu16() {
13617        let a = _mm_set1_epi16(1);
13618        let b = _mm_set1_epi16(u16::MAX as i16);
13619        let r = _mm_maskz_subs_epu16(0, a, b);
13620        assert_eq_m128i(r, _mm_setzero_si128());
13621        let r = _mm_maskz_subs_epu16(0b00001111, a, b);
13622        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13623        assert_eq_m128i(r, e);
13624    }
13625
13626    #[simd_test(enable = "avx512bw")]
13627    const fn test_mm512_subs_epu8() {
13628        let a = _mm512_set1_epi8(1);
13629        let b = _mm512_set1_epi8(u8::MAX as i8);
13630        let r = _mm512_subs_epu8(a, b);
13631        let e = _mm512_set1_epi8(0);
13632        assert_eq_m512i(r, e);
13633    }
13634
13635    #[simd_test(enable = "avx512bw")]
13636    const fn test_mm512_mask_subs_epu8() {
13637        let a = _mm512_set1_epi8(1);
13638        let b = _mm512_set1_epi8(u8::MAX as i8);
13639        let r = _mm512_mask_subs_epu8(a, 0, a, b);
13640        assert_eq_m512i(r, a);
13641        let r = _mm512_mask_subs_epu8(
13642            a,
13643            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13644            a,
13645            b,
13646        );
13647        #[rustfmt::skip]
13648        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13649                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13650                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13651                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13652        assert_eq_m512i(r, e);
13653    }
13654
13655    #[simd_test(enable = "avx512bw")]
13656    const fn test_mm512_maskz_subs_epu8() {
13657        let a = _mm512_set1_epi8(1);
13658        let b = _mm512_set1_epi8(u8::MAX as i8);
13659        let r = _mm512_maskz_subs_epu8(0, a, b);
13660        assert_eq_m512i(r, _mm512_setzero_si512());
13661        let r = _mm512_maskz_subs_epu8(
13662            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13663            a,
13664            b,
13665        );
13666        #[rustfmt::skip]
13667        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13668                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13669                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13670                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13671        assert_eq_m512i(r, e);
13672    }
13673
13674    #[simd_test(enable = "avx512bw,avx512vl")]
13675    const fn test_mm256_mask_subs_epu8() {
13676        let a = _mm256_set1_epi8(1);
13677        let b = _mm256_set1_epi8(u8::MAX as i8);
13678        let r = _mm256_mask_subs_epu8(a, 0, a, b);
13679        assert_eq_m256i(r, a);
13680        let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
13681        #[rustfmt::skip]
13682        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13683                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13684        assert_eq_m256i(r, e);
13685    }
13686
13687    #[simd_test(enable = "avx512bw,avx512vl")]
13688    const fn test_mm256_maskz_subs_epu8() {
13689        let a = _mm256_set1_epi8(1);
13690        let b = _mm256_set1_epi8(u8::MAX as i8);
13691        let r = _mm256_maskz_subs_epu8(0, a, b);
13692        assert_eq_m256i(r, _mm256_setzero_si256());
13693        let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b);
13694        #[rustfmt::skip]
13695        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13696                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13697        assert_eq_m256i(r, e);
13698    }
13699
13700    #[simd_test(enable = "avx512bw,avx512vl")]
13701    const fn test_mm_mask_subs_epu8() {
13702        let a = _mm_set1_epi8(1);
13703        let b = _mm_set1_epi8(u8::MAX as i8);
13704        let r = _mm_mask_subs_epu8(a, 0, a, b);
13705        assert_eq_m128i(r, a);
13706        let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b);
13707        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13708        assert_eq_m128i(r, e);
13709    }
13710
13711    #[simd_test(enable = "avx512bw,avx512vl")]
13712    const fn test_mm_maskz_subs_epu8() {
13713        let a = _mm_set1_epi8(1);
13714        let b = _mm_set1_epi8(u8::MAX as i8);
13715        let r = _mm_maskz_subs_epu8(0, a, b);
13716        assert_eq_m128i(r, _mm_setzero_si128());
13717        let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b);
13718        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13719        assert_eq_m128i(r, e);
13720    }
13721
13722    #[simd_test(enable = "avx512bw")]
13723    const fn test_mm512_subs_epi16() {
13724        let a = _mm512_set1_epi16(-1);
13725        let b = _mm512_set1_epi16(i16::MAX);
13726        let r = _mm512_subs_epi16(a, b);
13727        let e = _mm512_set1_epi16(i16::MIN);
13728        assert_eq_m512i(r, e);
13729    }
13730
13731    #[simd_test(enable = "avx512bw")]
13732    const fn test_mm512_mask_subs_epi16() {
13733        let a = _mm512_set1_epi16(-1);
13734        let b = _mm512_set1_epi16(i16::MAX);
13735        let r = _mm512_mask_subs_epi16(a, 0, a, b);
13736        assert_eq_m512i(r, a);
13737        let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13738        #[rustfmt::skip]
13739        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13740                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13741        assert_eq_m512i(r, e);
13742    }
13743
13744    #[simd_test(enable = "avx512bw")]
13745    const fn test_mm512_maskz_subs_epi16() {
13746        let a = _mm512_set1_epi16(-1);
13747        let b = _mm512_set1_epi16(i16::MAX);
13748        let r = _mm512_maskz_subs_epi16(0, a, b);
13749        assert_eq_m512i(r, _mm512_setzero_si512());
13750        let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b);
13751        #[rustfmt::skip]
13752        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13753                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13754        assert_eq_m512i(r, e);
13755    }
13756
13757    #[simd_test(enable = "avx512bw,avx512vl")]
13758    const fn test_mm256_mask_subs_epi16() {
13759        let a = _mm256_set1_epi16(-1);
13760        let b = _mm256_set1_epi16(i16::MAX);
13761        let r = _mm256_mask_subs_epi16(a, 0, a, b);
13762        assert_eq_m256i(r, a);
13763        let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b);
13764        #[rustfmt::skip]
13765        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13766        assert_eq_m256i(r, e);
13767    }
13768
13769    #[simd_test(enable = "avx512bw,avx512vl")]
13770    const fn test_mm256_maskz_subs_epi16() {
13771        let a = _mm256_set1_epi16(-1);
13772        let b = _mm256_set1_epi16(i16::MAX);
13773        let r = _mm256_maskz_subs_epi16(0, a, b);
13774        assert_eq_m256i(r, _mm256_setzero_si256());
13775        let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b);
13776        #[rustfmt::skip]
13777        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13778        assert_eq_m256i(r, e);
13779    }
13780
13781    #[simd_test(enable = "avx512bw,avx512vl")]
13782    const fn test_mm_mask_subs_epi16() {
13783        let a = _mm_set1_epi16(-1);
13784        let b = _mm_set1_epi16(i16::MAX);
13785        let r = _mm_mask_subs_epi16(a, 0, a, b);
13786        assert_eq_m128i(r, a);
13787        let r = _mm_mask_subs_epi16(a, 0b00001111, a, b);
13788        let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13789        assert_eq_m128i(r, e);
13790    }
13791
13792    #[simd_test(enable = "avx512bw,avx512vl")]
13793    const fn test_mm_maskz_subs_epi16() {
13794        let a = _mm_set1_epi16(-1);
13795        let b = _mm_set1_epi16(i16::MAX);
13796        let r = _mm_maskz_subs_epi16(0, a, b);
13797        assert_eq_m128i(r, _mm_setzero_si128());
13798        let r = _mm_maskz_subs_epi16(0b00001111, a, b);
13799        let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13800        assert_eq_m128i(r, e);
13801    }
13802
13803    #[simd_test(enable = "avx512bw")]
13804    const fn test_mm512_subs_epi8() {
13805        let a = _mm512_set1_epi8(-1);
13806        let b = _mm512_set1_epi8(i8::MAX);
13807        let r = _mm512_subs_epi8(a, b);
13808        let e = _mm512_set1_epi8(i8::MIN);
13809        assert_eq_m512i(r, e);
13810    }
13811
13812    #[simd_test(enable = "avx512bw")]
13813    const fn test_mm512_mask_subs_epi8() {
13814        let a = _mm512_set1_epi8(-1);
13815        let b = _mm512_set1_epi8(i8::MAX);
13816        let r = _mm512_mask_subs_epi8(a, 0, a, b);
13817        assert_eq_m512i(r, a);
13818        let r = _mm512_mask_subs_epi8(
13819            a,
13820            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13821            a,
13822            b,
13823        );
13824        #[rustfmt::skip]
13825        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13826                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13827                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13828                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13829        assert_eq_m512i(r, e);
13830    }
13831
13832    #[simd_test(enable = "avx512bw")]
13833    const fn test_mm512_maskz_subs_epi8() {
13834        let a = _mm512_set1_epi8(-1);
13835        let b = _mm512_set1_epi8(i8::MAX);
13836        let r = _mm512_maskz_subs_epi8(0, a, b);
13837        assert_eq_m512i(r, _mm512_setzero_si512());
13838        let r = _mm512_maskz_subs_epi8(
13839            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13840            a,
13841            b,
13842        );
13843        #[rustfmt::skip]
13844        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13845                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13846                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13847                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13848        assert_eq_m512i(r, e);
13849    }
13850
13851    #[simd_test(enable = "avx512bw,avx512vl")]
13852    const fn test_mm256_mask_subs_epi8() {
13853        let a = _mm256_set1_epi8(-1);
13854        let b = _mm256_set1_epi8(i8::MAX);
13855        let r = _mm256_mask_subs_epi8(a, 0, a, b);
13856        assert_eq_m256i(r, a);
13857        let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
13858        #[rustfmt::skip]
13859        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13860                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13861        assert_eq_m256i(r, e);
13862    }
13863
13864    #[simd_test(enable = "avx512bw,avx512vl")]
13865    const fn test_mm256_maskz_subs_epi8() {
13866        let a = _mm256_set1_epi8(-1);
13867        let b = _mm256_set1_epi8(i8::MAX);
13868        let r = _mm256_maskz_subs_epi8(0, a, b);
13869        assert_eq_m256i(r, _mm256_setzero_si256());
13870        let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b);
13871        #[rustfmt::skip]
13872        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13873                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13874        assert_eq_m256i(r, e);
13875    }
13876
13877    #[simd_test(enable = "avx512bw,avx512vl")]
13878    const fn test_mm_mask_subs_epi8() {
13879        let a = _mm_set1_epi8(-1);
13880        let b = _mm_set1_epi8(i8::MAX);
13881        let r = _mm_mask_subs_epi8(a, 0, a, b);
13882        assert_eq_m128i(r, a);
13883        let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b);
13884        #[rustfmt::skip]
13885        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13886        assert_eq_m128i(r, e);
13887    }
13888
13889    #[simd_test(enable = "avx512bw,avx512vl")]
13890    const fn test_mm_maskz_subs_epi8() {
13891        let a = _mm_set1_epi8(-1);
13892        let b = _mm_set1_epi8(i8::MAX);
13893        let r = _mm_maskz_subs_epi8(0, a, b);
13894        assert_eq_m128i(r, _mm_setzero_si128());
13895        let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b);
13896        #[rustfmt::skip]
13897        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13898        assert_eq_m128i(r, e);
13899    }
13900
13901    #[simd_test(enable = "avx512bw")]
13902    const fn test_mm512_mulhi_epu16() {
13903        let a = _mm512_set1_epi16(1);
13904        let b = _mm512_set1_epi16(1);
13905        let r = _mm512_mulhi_epu16(a, b);
13906        let e = _mm512_set1_epi16(0);
13907        assert_eq_m512i(r, e);
13908    }
13909
13910    #[simd_test(enable = "avx512bw")]
13911    const fn test_mm512_mask_mulhi_epu16() {
13912        let a = _mm512_set1_epi16(1);
13913        let b = _mm512_set1_epi16(1);
13914        let r = _mm512_mask_mulhi_epu16(a, 0, a, b);
13915        assert_eq_m512i(r, a);
13916        let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13917        #[rustfmt::skip]
13918        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13919                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13920        assert_eq_m512i(r, e);
13921    }
13922
13923    #[simd_test(enable = "avx512bw")]
13924    const fn test_mm512_maskz_mulhi_epu16() {
13925        let a = _mm512_set1_epi16(1);
13926        let b = _mm512_set1_epi16(1);
13927        let r = _mm512_maskz_mulhi_epu16(0, a, b);
13928        assert_eq_m512i(r, _mm512_setzero_si512());
13929        let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b);
13930        #[rustfmt::skip]
13931        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13932                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13933        assert_eq_m512i(r, e);
13934    }
13935
13936    #[simd_test(enable = "avx512bw,avx512vl")]
13937    const fn test_mm256_mask_mulhi_epu16() {
13938        let a = _mm256_set1_epi16(1);
13939        let b = _mm256_set1_epi16(1);
13940        let r = _mm256_mask_mulhi_epu16(a, 0, a, b);
13941        assert_eq_m256i(r, a);
13942        let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b);
13943        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13944        assert_eq_m256i(r, e);
13945    }
13946
13947    #[simd_test(enable = "avx512bw,avx512vl")]
13948    const fn test_mm256_maskz_mulhi_epu16() {
13949        let a = _mm256_set1_epi16(1);
13950        let b = _mm256_set1_epi16(1);
13951        let r = _mm256_maskz_mulhi_epu16(0, a, b);
13952        assert_eq_m256i(r, _mm256_setzero_si256());
13953        let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b);
13954        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13955        assert_eq_m256i(r, e);
13956    }
13957
13958    #[simd_test(enable = "avx512bw,avx512vl")]
13959    const fn test_mm_mask_mulhi_epu16() {
13960        let a = _mm_set1_epi16(1);
13961        let b = _mm_set1_epi16(1);
13962        let r = _mm_mask_mulhi_epu16(a, 0, a, b);
13963        assert_eq_m128i(r, a);
13964        let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b);
13965        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13966        assert_eq_m128i(r, e);
13967    }
13968
13969    #[simd_test(enable = "avx512bw,avx512vl")]
13970    const fn test_mm_maskz_mulhi_epu16() {
13971        let a = _mm_set1_epi16(1);
13972        let b = _mm_set1_epi16(1);
13973        let r = _mm_maskz_mulhi_epu16(0, a, b);
13974        assert_eq_m128i(r, _mm_setzero_si128());
13975        let r = _mm_maskz_mulhi_epu16(0b00001111, a, b);
13976        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13977        assert_eq_m128i(r, e);
13978    }
13979
13980    #[simd_test(enable = "avx512bw")]
13981    const fn test_mm512_mulhi_epi16() {
13982        let a = _mm512_set1_epi16(1);
13983        let b = _mm512_set1_epi16(1);
13984        let r = _mm512_mulhi_epi16(a, b);
13985        let e = _mm512_set1_epi16(0);
13986        assert_eq_m512i(r, e);
13987    }
13988
13989    #[simd_test(enable = "avx512bw")]
13990    const fn test_mm512_mask_mulhi_epi16() {
13991        let a = _mm512_set1_epi16(1);
13992        let b = _mm512_set1_epi16(1);
13993        let r = _mm512_mask_mulhi_epi16(a, 0, a, b);
13994        assert_eq_m512i(r, a);
13995        let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13996        #[rustfmt::skip]
13997        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13998                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13999        assert_eq_m512i(r, e);
14000    }
14001
14002    #[simd_test(enable = "avx512bw")]
14003    const fn test_mm512_maskz_mulhi_epi16() {
14004        let a = _mm512_set1_epi16(1);
14005        let b = _mm512_set1_epi16(1);
14006        let r = _mm512_maskz_mulhi_epi16(0, a, b);
14007        assert_eq_m512i(r, _mm512_setzero_si512());
14008        let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b);
14009        #[rustfmt::skip]
14010        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14011                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14012        assert_eq_m512i(r, e);
14013    }
14014
14015    #[simd_test(enable = "avx512bw,avx512vl")]
14016    const fn test_mm256_mask_mulhi_epi16() {
14017        let a = _mm256_set1_epi16(1);
14018        let b = _mm256_set1_epi16(1);
14019        let r = _mm256_mask_mulhi_epi16(a, 0, a, b);
14020        assert_eq_m256i(r, a);
14021        let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b);
14022        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14023        assert_eq_m256i(r, e);
14024    }
14025
14026    #[simd_test(enable = "avx512bw,avx512vl")]
14027    const fn test_mm256_maskz_mulhi_epi16() {
14028        let a = _mm256_set1_epi16(1);
14029        let b = _mm256_set1_epi16(1);
14030        let r = _mm256_maskz_mulhi_epi16(0, a, b);
14031        assert_eq_m256i(r, _mm256_setzero_si256());
14032        let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b);
14033        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14034        assert_eq_m256i(r, e);
14035    }
14036
14037    #[simd_test(enable = "avx512bw,avx512vl")]
14038    const fn test_mm_mask_mulhi_epi16() {
14039        let a = _mm_set1_epi16(1);
14040        let b = _mm_set1_epi16(1);
14041        let r = _mm_mask_mulhi_epi16(a, 0, a, b);
14042        assert_eq_m128i(r, a);
14043        let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b);
14044        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
14045        assert_eq_m128i(r, e);
14046    }
14047
14048    #[simd_test(enable = "avx512bw,avx512vl")]
14049    const fn test_mm_maskz_mulhi_epi16() {
14050        let a = _mm_set1_epi16(1);
14051        let b = _mm_set1_epi16(1);
14052        let r = _mm_maskz_mulhi_epi16(0, a, b);
14053        assert_eq_m128i(r, _mm_setzero_si128());
14054        let r = _mm_maskz_mulhi_epi16(0b00001111, a, b);
14055        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
14056        assert_eq_m128i(r, e);
14057    }
14058
14059    #[simd_test(enable = "avx512bw")]
14060    fn test_mm512_mulhrs_epi16() {
14061        let a = _mm512_set1_epi16(1);
14062        let b = _mm512_set1_epi16(1);
14063        let r = _mm512_mulhrs_epi16(a, b);
14064        let e = _mm512_set1_epi16(0);
14065        assert_eq_m512i(r, e);
14066    }
14067
14068    #[simd_test(enable = "avx512bw")]
14069    fn test_mm512_mask_mulhrs_epi16() {
14070        let a = _mm512_set1_epi16(1);
14071        let b = _mm512_set1_epi16(1);
14072        let r = _mm512_mask_mulhrs_epi16(a, 0, a, b);
14073        assert_eq_m512i(r, a);
14074        let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
14075        #[rustfmt::skip]
14076        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14077                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14078        assert_eq_m512i(r, e);
14079    }
14080
14081    #[simd_test(enable = "avx512bw")]
14082    fn test_mm512_maskz_mulhrs_epi16() {
14083        let a = _mm512_set1_epi16(1);
14084        let b = _mm512_set1_epi16(1);
14085        let r = _mm512_maskz_mulhrs_epi16(0, a, b);
14086        assert_eq_m512i(r, _mm512_setzero_si512());
14087        let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b);
14088        #[rustfmt::skip]
14089        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14090                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14091        assert_eq_m512i(r, e);
14092    }
14093
14094    #[simd_test(enable = "avx512bw,avx512vl")]
14095    fn test_mm256_mask_mulhrs_epi16() {
14096        let a = _mm256_set1_epi16(1);
14097        let b = _mm256_set1_epi16(1);
14098        let r = _mm256_mask_mulhrs_epi16(a, 0, a, b);
14099        assert_eq_m256i(r, a);
14100        let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b);
14101        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14102        assert_eq_m256i(r, e);
14103    }
14104
14105    #[simd_test(enable = "avx512bw,avx512vl")]
14106    fn test_mm256_maskz_mulhrs_epi16() {
14107        let a = _mm256_set1_epi16(1);
14108        let b = _mm256_set1_epi16(1);
14109        let r = _mm256_maskz_mulhrs_epi16(0, a, b);
14110        assert_eq_m256i(r, _mm256_setzero_si256());
14111        let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b);
14112        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14113        assert_eq_m256i(r, e);
14114    }
14115
14116    #[simd_test(enable = "avx512bw,avx512vl")]
14117    fn test_mm_mask_mulhrs_epi16() {
14118        let a = _mm_set1_epi16(1);
14119        let b = _mm_set1_epi16(1);
14120        let r = _mm_mask_mulhrs_epi16(a, 0, a, b);
14121        assert_eq_m128i(r, a);
14122        let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b);
14123        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
14124        assert_eq_m128i(r, e);
14125    }
14126
14127    #[simd_test(enable = "avx512bw,avx512vl")]
14128    fn test_mm_maskz_mulhrs_epi16() {
14129        let a = _mm_set1_epi16(1);
14130        let b = _mm_set1_epi16(1);
14131        let r = _mm_maskz_mulhrs_epi16(0, a, b);
14132        assert_eq_m128i(r, _mm_setzero_si128());
14133        let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b);
14134        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
14135        assert_eq_m128i(r, e);
14136    }
14137
14138    #[simd_test(enable = "avx512bw")]
14139    const fn test_mm512_mullo_epi16() {
14140        let a = _mm512_set1_epi16(1);
14141        let b = _mm512_set1_epi16(1);
14142        let r = _mm512_mullo_epi16(a, b);
14143        let e = _mm512_set1_epi16(1);
14144        assert_eq_m512i(r, e);
14145    }
14146
14147    #[simd_test(enable = "avx512bw")]
14148    const fn test_mm512_mask_mullo_epi16() {
14149        let a = _mm512_set1_epi16(1);
14150        let b = _mm512_set1_epi16(1);
14151        let r = _mm512_mask_mullo_epi16(a, 0, a, b);
14152        assert_eq_m512i(r, a);
14153        let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
14154        #[rustfmt::skip]
14155        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14156                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14157        assert_eq_m512i(r, e);
14158    }
14159
14160    #[simd_test(enable = "avx512bw")]
14161    const fn test_mm512_maskz_mullo_epi16() {
14162        let a = _mm512_set1_epi16(1);
14163        let b = _mm512_set1_epi16(1);
14164        let r = _mm512_maskz_mullo_epi16(0, a, b);
14165        assert_eq_m512i(r, _mm512_setzero_si512());
14166        let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b);
14167        #[rustfmt::skip]
14168        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14169                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14170        assert_eq_m512i(r, e);
14171    }
14172
14173    #[simd_test(enable = "avx512bw,avx512vl")]
14174    const fn test_mm256_mask_mullo_epi16() {
14175        let a = _mm256_set1_epi16(1);
14176        let b = _mm256_set1_epi16(1);
14177        let r = _mm256_mask_mullo_epi16(a, 0, a, b);
14178        assert_eq_m256i(r, a);
14179        let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b);
14180        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14181        assert_eq_m256i(r, e);
14182    }
14183
14184    #[simd_test(enable = "avx512bw,avx512vl")]
14185    const fn test_mm256_maskz_mullo_epi16() {
14186        let a = _mm256_set1_epi16(1);
14187        let b = _mm256_set1_epi16(1);
14188        let r = _mm256_maskz_mullo_epi16(0, a, b);
14189        assert_eq_m256i(r, _mm256_setzero_si256());
14190        let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b);
14191        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14192        assert_eq_m256i(r, e);
14193    }
14194
14195    #[simd_test(enable = "avx512bw,avx512vl")]
14196    const fn test_mm_mask_mullo_epi16() {
14197        let a = _mm_set1_epi16(1);
14198        let b = _mm_set1_epi16(1);
14199        let r = _mm_mask_mullo_epi16(a, 0, a, b);
14200        assert_eq_m128i(r, a);
14201        let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b);
14202        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
14203        assert_eq_m128i(r, e);
14204    }
14205
14206    #[simd_test(enable = "avx512bw,avx512vl")]
14207    const fn test_mm_maskz_mullo_epi16() {
14208        let a = _mm_set1_epi16(1);
14209        let b = _mm_set1_epi16(1);
14210        let r = _mm_maskz_mullo_epi16(0, a, b);
14211        assert_eq_m128i(r, _mm_setzero_si128());
14212        let r = _mm_maskz_mullo_epi16(0b00001111, a, b);
14213        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
14214        assert_eq_m128i(r, e);
14215    }
14216
14217    #[simd_test(enable = "avx512bw")]
14218    const fn test_mm512_max_epu16() {
14219        #[rustfmt::skip]
14220        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14221                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14222        #[rustfmt::skip]
14223        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14224                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14225        let r = _mm512_max_epu16(a, b);
14226        #[rustfmt::skip]
14227        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14228                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14229        assert_eq_m512i(r, e);
14230    }
14231
14232    #[simd_test(enable = "avx512bw")]
14233    const fn test_mm512_mask_max_epu16() {
14234        #[rustfmt::skip]
14235        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14236                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14237        #[rustfmt::skip]
14238        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14239                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14240        let r = _mm512_mask_max_epu16(a, 0, a, b);
14241        assert_eq_m512i(r, a);
14242        let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
14243        #[rustfmt::skip]
14244        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14245                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14246        assert_eq_m512i(r, e);
14247    }
14248
14249    #[simd_test(enable = "avx512bw")]
14250    const fn test_mm512_maskz_max_epu16() {
14251        #[rustfmt::skip]
14252        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14253                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14254        #[rustfmt::skip]
14255        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14256                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14257        let r = _mm512_maskz_max_epu16(0, a, b);
14258        assert_eq_m512i(r, _mm512_setzero_si512());
14259        let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b);
14260        #[rustfmt::skip]
14261        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14262                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14263        assert_eq_m512i(r, e);
14264    }
14265
14266    #[simd_test(enable = "avx512bw,avx512vl")]
14267    const fn test_mm256_mask_max_epu16() {
14268        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14269        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14270        let r = _mm256_mask_max_epu16(a, 0, a, b);
14271        assert_eq_m256i(r, a);
14272        let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b);
14273        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14274        assert_eq_m256i(r, e);
14275    }
14276
14277    #[simd_test(enable = "avx512bw,avx512vl")]
14278    const fn test_mm256_maskz_max_epu16() {
14279        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14280        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14281        let r = _mm256_maskz_max_epu16(0, a, b);
14282        assert_eq_m256i(r, _mm256_setzero_si256());
14283        let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b);
14284        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14285        assert_eq_m256i(r, e);
14286    }
14287
14288    #[simd_test(enable = "avx512bw,avx512vl")]
14289    const fn test_mm_mask_max_epu16() {
14290        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14291        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14292        let r = _mm_mask_max_epu16(a, 0, a, b);
14293        assert_eq_m128i(r, a);
14294        let r = _mm_mask_max_epu16(a, 0b00001111, a, b);
14295        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14296        assert_eq_m128i(r, e);
14297    }
14298
14299    #[simd_test(enable = "avx512bw,avx512vl")]
14300    const fn test_mm_maskz_max_epu16() {
14301        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14302        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14303        let r = _mm_maskz_max_epu16(0, a, b);
14304        assert_eq_m128i(r, _mm_setzero_si128());
14305        let r = _mm_maskz_max_epu16(0b00001111, a, b);
14306        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
14307        assert_eq_m128i(r, e);
14308    }
14309
14310    #[simd_test(enable = "avx512bw")]
14311    const fn test_mm512_max_epu8() {
14312        #[rustfmt::skip]
14313        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14314                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14315                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14316                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14317        #[rustfmt::skip]
14318        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14319                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14320                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14321                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14322        let r = _mm512_max_epu8(a, b);
14323        #[rustfmt::skip]
14324        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14325                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14326                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14327                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14328        assert_eq_m512i(r, e);
14329    }
14330
14331    #[simd_test(enable = "avx512bw")]
14332    const fn test_mm512_mask_max_epu8() {
14333        #[rustfmt::skip]
14334        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14335                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14336                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14337                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14338        #[rustfmt::skip]
14339        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14340                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14341                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14342                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14343        let r = _mm512_mask_max_epu8(a, 0, a, b);
14344        assert_eq_m512i(r, a);
14345        let r = _mm512_mask_max_epu8(
14346            a,
14347            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14348            a,
14349            b,
14350        );
14351        #[rustfmt::skip]
14352        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14353                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14354                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14355                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14356        assert_eq_m512i(r, e);
14357    }
14358
14359    #[simd_test(enable = "avx512bw")]
14360    const fn test_mm512_maskz_max_epu8() {
14361        #[rustfmt::skip]
14362        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14363                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14364                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14365                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14366        #[rustfmt::skip]
14367        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14368                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14369                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14370                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14371        let r = _mm512_maskz_max_epu8(0, a, b);
14372        assert_eq_m512i(r, _mm512_setzero_si512());
14373        let r = _mm512_maskz_max_epu8(
14374            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14375            a,
14376            b,
14377        );
14378        #[rustfmt::skip]
14379        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14380                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14381                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14382                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14383        assert_eq_m512i(r, e);
14384    }
14385
14386    #[simd_test(enable = "avx512bw,avx512vl")]
14387    const fn test_mm256_mask_max_epu8() {
14388        #[rustfmt::skip]
14389        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14390                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14391        #[rustfmt::skip]
14392        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14393                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14394        let r = _mm256_mask_max_epu8(a, 0, a, b);
14395        assert_eq_m256i(r, a);
14396        let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
14397        #[rustfmt::skip]
14398        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14399                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14400        assert_eq_m256i(r, e);
14401    }
14402
14403    #[simd_test(enable = "avx512bw,avx512vl")]
14404    const fn test_mm256_maskz_max_epu8() {
14405        #[rustfmt::skip]
14406        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14407                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14408        #[rustfmt::skip]
14409        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14410                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14411        let r = _mm256_maskz_max_epu8(0, a, b);
14412        assert_eq_m256i(r, _mm256_setzero_si256());
14413        let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b);
14414        #[rustfmt::skip]
14415        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14416                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14417        assert_eq_m256i(r, e);
14418    }
14419
14420    #[simd_test(enable = "avx512bw,avx512vl")]
14421    const fn test_mm_mask_max_epu8() {
14422        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14423        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14424        let r = _mm_mask_max_epu8(a, 0, a, b);
14425        assert_eq_m128i(r, a);
14426        let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b);
14427        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14428        assert_eq_m128i(r, e);
14429    }
14430
14431    #[simd_test(enable = "avx512bw,avx512vl")]
14432    const fn test_mm_maskz_max_epu8() {
14433        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14434        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14435        let r = _mm_maskz_max_epu8(0, a, b);
14436        assert_eq_m128i(r, _mm_setzero_si128());
14437        let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b);
14438        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14439        assert_eq_m128i(r, e);
14440    }
14441
14442    #[simd_test(enable = "avx512bw")]
14443    const fn test_mm512_max_epi16() {
14444        #[rustfmt::skip]
14445        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14446                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14447        #[rustfmt::skip]
14448        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14449                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14450        let r = _mm512_max_epi16(a, b);
14451        #[rustfmt::skip]
14452        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14453                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14454        assert_eq_m512i(r, e);
14455    }
14456
14457    #[simd_test(enable = "avx512bw")]
14458    const fn test_mm512_mask_max_epi16() {
14459        #[rustfmt::skip]
14460        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14461                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14462        #[rustfmt::skip]
14463        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14464                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14465        let r = _mm512_mask_max_epi16(a, 0, a, b);
14466        assert_eq_m512i(r, a);
14467        let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
14468        #[rustfmt::skip]
14469        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14470                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14471        assert_eq_m512i(r, e);
14472    }
14473
14474    #[simd_test(enable = "avx512bw")]
14475    const fn test_mm512_maskz_max_epi16() {
14476        #[rustfmt::skip]
14477        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14478                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14479        #[rustfmt::skip]
14480        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14481                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14482        let r = _mm512_maskz_max_epi16(0, a, b);
14483        assert_eq_m512i(r, _mm512_setzero_si512());
14484        let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b);
14485        #[rustfmt::skip]
14486        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14487                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14488        assert_eq_m512i(r, e);
14489    }
14490
14491    #[simd_test(enable = "avx512bw,avx512vl")]
14492    const fn test_mm256_mask_max_epi16() {
14493        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14494        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14495        let r = _mm256_mask_max_epi16(a, 0, a, b);
14496        assert_eq_m256i(r, a);
14497        let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b);
14498        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14499        assert_eq_m256i(r, e);
14500    }
14501
14502    #[simd_test(enable = "avx512bw,avx512vl")]
14503    const fn test_mm256_maskz_max_epi16() {
14504        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14505        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14506        let r = _mm256_maskz_max_epi16(0, a, b);
14507        assert_eq_m256i(r, _mm256_setzero_si256());
14508        let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b);
14509        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14510        assert_eq_m256i(r, e);
14511    }
14512
14513    #[simd_test(enable = "avx512bw,avx512vl")]
14514    const fn test_mm_mask_max_epi16() {
14515        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14516        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14517        let r = _mm_mask_max_epi16(a, 0, a, b);
14518        assert_eq_m128i(r, a);
14519        let r = _mm_mask_max_epi16(a, 0b00001111, a, b);
14520        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14521        assert_eq_m128i(r, e);
14522    }
14523
14524    #[simd_test(enable = "avx512bw,avx512vl")]
14525    const fn test_mm_maskz_max_epi16() {
14526        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14527        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14528        let r = _mm_maskz_max_epi16(0, a, b);
14529        assert_eq_m128i(r, _mm_setzero_si128());
14530        let r = _mm_maskz_max_epi16(0b00001111, a, b);
14531        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
14532        assert_eq_m128i(r, e);
14533    }
14534
14535    #[simd_test(enable = "avx512bw")]
14536    const fn test_mm512_max_epi8() {
14537        #[rustfmt::skip]
14538        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14539                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14540                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14541                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14542        #[rustfmt::skip]
14543        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14544                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14545                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14546                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14547        let r = _mm512_max_epi8(a, b);
14548        #[rustfmt::skip]
14549        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14550                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14551                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14552                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14553        assert_eq_m512i(r, e);
14554    }
14555
14556    #[simd_test(enable = "avx512bw")]
14557    const fn test_mm512_mask_max_epi8() {
14558        #[rustfmt::skip]
14559        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14560                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14561                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14562                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14563        #[rustfmt::skip]
14564        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14565                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14566                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14567                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14568        let r = _mm512_mask_max_epi8(a, 0, a, b);
14569        assert_eq_m512i(r, a);
14570        let r = _mm512_mask_max_epi8(
14571            a,
14572            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14573            a,
14574            b,
14575        );
14576        #[rustfmt::skip]
14577        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14578                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14579                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14580                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14581        assert_eq_m512i(r, e);
14582    }
14583
14584    #[simd_test(enable = "avx512bw")]
14585    const fn test_mm512_maskz_max_epi8() {
14586        #[rustfmt::skip]
14587        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14588                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14589                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14590                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14591        #[rustfmt::skip]
14592        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14593                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14594                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14595                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14596        let r = _mm512_maskz_max_epi8(0, a, b);
14597        assert_eq_m512i(r, _mm512_setzero_si512());
14598        let r = _mm512_maskz_max_epi8(
14599            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14600            a,
14601            b,
14602        );
14603        #[rustfmt::skip]
14604        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14605                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14606                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14607                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14608        assert_eq_m512i(r, e);
14609    }
14610
14611    #[simd_test(enable = "avx512bw,avx512vl")]
14612    const fn test_mm256_mask_max_epi8() {
14613        #[rustfmt::skip]
14614        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14615                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14616        #[rustfmt::skip]
14617        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14618                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14619        let r = _mm256_mask_max_epi8(a, 0, a, b);
14620        assert_eq_m256i(r, a);
14621        let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
14622        #[rustfmt::skip]
14623        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14624                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14625        assert_eq_m256i(r, e);
14626    }
14627
14628    #[simd_test(enable = "avx512bw,avx512vl")]
14629    const fn test_mm256_maskz_max_epi8() {
14630        #[rustfmt::skip]
14631        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14632                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14633        #[rustfmt::skip]
14634        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14635                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14636        let r = _mm256_maskz_max_epi8(0, a, b);
14637        assert_eq_m256i(r, _mm256_setzero_si256());
14638        let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b);
14639        #[rustfmt::skip]
14640        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14641                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14642        assert_eq_m256i(r, e);
14643    }
14644
14645    #[simd_test(enable = "avx512bw,avx512vl")]
14646    const fn test_mm_mask_max_epi8() {
14647        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14648        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14649        let r = _mm_mask_max_epi8(a, 0, a, b);
14650        assert_eq_m128i(r, a);
14651        let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b);
14652        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14653        assert_eq_m128i(r, e);
14654    }
14655
14656    #[simd_test(enable = "avx512bw,avx512vl")]
14657    const fn test_mm_maskz_max_epi8() {
14658        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14659        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14660        let r = _mm_maskz_max_epi8(0, a, b);
14661        assert_eq_m128i(r, _mm_setzero_si128());
14662        let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b);
14663        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14664        assert_eq_m128i(r, e);
14665    }
14666
14667    #[simd_test(enable = "avx512bw")]
14668    const fn test_mm512_min_epu16() {
14669        #[rustfmt::skip]
14670        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14671                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14672        #[rustfmt::skip]
14673        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14674                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14675        let r = _mm512_min_epu16(a, b);
14676        #[rustfmt::skip]
14677        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14678                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14679        assert_eq_m512i(r, e);
14680    }
14681
14682    #[simd_test(enable = "avx512bw")]
14683    const fn test_mm512_mask_min_epu16() {
14684        #[rustfmt::skip]
14685        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14686                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14687        #[rustfmt::skip]
14688        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14689                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14690        let r = _mm512_mask_min_epu16(a, 0, a, b);
14691        assert_eq_m512i(r, a);
14692        let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
14693        #[rustfmt::skip]
14694        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14695                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14696        assert_eq_m512i(r, e);
14697    }
14698
14699    #[simd_test(enable = "avx512bw")]
14700    const fn test_mm512_maskz_min_epu16() {
14701        #[rustfmt::skip]
14702        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14703                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14704        #[rustfmt::skip]
14705        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14706                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14707        let r = _mm512_maskz_min_epu16(0, a, b);
14708        assert_eq_m512i(r, _mm512_setzero_si512());
14709        let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b);
14710        #[rustfmt::skip]
14711        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14712                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14713        assert_eq_m512i(r, e);
14714    }
14715
14716    #[simd_test(enable = "avx512bw,avx512vl")]
14717    const fn test_mm256_mask_min_epu16() {
14718        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14719        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14720        let r = _mm256_mask_min_epu16(a, 0, a, b);
14721        assert_eq_m256i(r, a);
14722        let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b);
14723        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14724        assert_eq_m256i(r, e);
14725    }
14726
14727    #[simd_test(enable = "avx512bw,avx512vl")]
14728    const fn test_mm256_maskz_min_epu16() {
14729        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14730        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14731        let r = _mm256_maskz_min_epu16(0, a, b);
14732        assert_eq_m256i(r, _mm256_setzero_si256());
14733        let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b);
14734        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14735        assert_eq_m256i(r, e);
14736    }
14737
14738    #[simd_test(enable = "avx512bw,avx512vl")]
14739    const fn test_mm_mask_min_epu16() {
14740        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14741        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14742        let r = _mm_mask_min_epu16(a, 0, a, b);
14743        assert_eq_m128i(r, a);
14744        let r = _mm_mask_min_epu16(a, 0b00001111, a, b);
14745        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
14746        assert_eq_m128i(r, e);
14747    }
14748
14749    #[simd_test(enable = "avx512bw,avx512vl")]
14750    const fn test_mm_maskz_min_epu16() {
14751        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14752        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14753        let r = _mm_maskz_min_epu16(0, a, b);
14754        assert_eq_m128i(r, _mm_setzero_si128());
14755        let r = _mm_maskz_min_epu16(0b00001111, a, b);
14756        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
14757        assert_eq_m128i(r, e);
14758    }
14759
14760    #[simd_test(enable = "avx512bw")]
14761    const fn test_mm512_min_epu8() {
14762        #[rustfmt::skip]
14763        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14764                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14765                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14766                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14767        #[rustfmt::skip]
14768        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14769                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14770                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14771                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14772        let r = _mm512_min_epu8(a, b);
14773        #[rustfmt::skip]
14774        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14775                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14776                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14777                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14778        assert_eq_m512i(r, e);
14779    }
14780
14781    #[simd_test(enable = "avx512bw")]
14782    const fn test_mm512_mask_min_epu8() {
14783        #[rustfmt::skip]
14784        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14785                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14786                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14787                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14788        #[rustfmt::skip]
14789        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14790                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14791                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14792                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14793        let r = _mm512_mask_min_epu8(a, 0, a, b);
14794        assert_eq_m512i(r, a);
14795        let r = _mm512_mask_min_epu8(
14796            a,
14797            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14798            a,
14799            b,
14800        );
14801        #[rustfmt::skip]
14802        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14803                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14804                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14805                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14806        assert_eq_m512i(r, e);
14807    }
14808
14809    #[simd_test(enable = "avx512bw")]
14810    const fn test_mm512_maskz_min_epu8() {
14811        #[rustfmt::skip]
14812        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14813                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14814                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14815                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14816        #[rustfmt::skip]
14817        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14818                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14819                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14820                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14821        let r = _mm512_maskz_min_epu8(0, a, b);
14822        assert_eq_m512i(r, _mm512_setzero_si512());
14823        let r = _mm512_maskz_min_epu8(
14824            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14825            a,
14826            b,
14827        );
14828        #[rustfmt::skip]
14829        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14830                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14831                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14832                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14833        assert_eq_m512i(r, e);
14834    }
14835
14836    #[simd_test(enable = "avx512bw,avx512vl")]
14837    const fn test_mm256_mask_min_epu8() {
14838        #[rustfmt::skip]
14839        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14840                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14841        #[rustfmt::skip]
14842        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14843                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14844        let r = _mm256_mask_min_epu8(a, 0, a, b);
14845        assert_eq_m256i(r, a);
14846        let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
14847        #[rustfmt::skip]
14848        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14849                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14850        assert_eq_m256i(r, e);
14851    }
14852
14853    #[simd_test(enable = "avx512bw,avx512vl")]
14854    const fn test_mm256_maskz_min_epu8() {
14855        #[rustfmt::skip]
14856        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14857                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14858        #[rustfmt::skip]
14859        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14860                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14861        let r = _mm256_maskz_min_epu8(0, a, b);
14862        assert_eq_m256i(r, _mm256_setzero_si256());
14863        let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b);
14864        #[rustfmt::skip]
14865        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14866                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14867        assert_eq_m256i(r, e);
14868    }
14869
14870    #[simd_test(enable = "avx512bw,avx512vl")]
14871    const fn test_mm_mask_min_epu8() {
14872        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14873        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14874        let r = _mm_mask_min_epu8(a, 0, a, b);
14875        assert_eq_m128i(r, a);
14876        let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b);
14877        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14878        assert_eq_m128i(r, e);
14879    }
14880
14881    #[simd_test(enable = "avx512bw,avx512vl")]
14882    const fn test_mm_maskz_min_epu8() {
14883        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14884        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14885        let r = _mm_maskz_min_epu8(0, a, b);
14886        assert_eq_m128i(r, _mm_setzero_si128());
14887        let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b);
14888        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14889        assert_eq_m128i(r, e);
14890    }
14891
14892    #[simd_test(enable = "avx512bw")]
14893    const fn test_mm512_min_epi16() {
14894        #[rustfmt::skip]
14895        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14896                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14897        #[rustfmt::skip]
14898        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14899                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14900        let r = _mm512_min_epi16(a, b);
14901        #[rustfmt::skip]
14902        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14903                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14904        assert_eq_m512i(r, e);
14905    }
14906
14907    #[simd_test(enable = "avx512bw")]
14908    const fn test_mm512_mask_min_epi16() {
14909        #[rustfmt::skip]
14910        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14911                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14912        #[rustfmt::skip]
14913        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14914                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14915        let r = _mm512_mask_min_epi16(a, 0, a, b);
14916        assert_eq_m512i(r, a);
14917        let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
14918        #[rustfmt::skip]
14919        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14920                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14921        assert_eq_m512i(r, e);
14922    }
14923
14924    #[simd_test(enable = "avx512bw")]
14925    const fn test_mm512_maskz_min_epi16() {
14926        #[rustfmt::skip]
14927        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14928                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14929        #[rustfmt::skip]
14930        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14931                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14932        let r = _mm512_maskz_min_epi16(0, a, b);
14933        assert_eq_m512i(r, _mm512_setzero_si512());
14934        let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b);
14935        #[rustfmt::skip]
14936        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14937                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14938        assert_eq_m512i(r, e);
14939    }
14940
14941    #[simd_test(enable = "avx512bw,avx512vl")]
14942    const fn test_mm256_mask_min_epi16() {
14943        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14944        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14945        let r = _mm256_mask_min_epi16(a, 0, a, b);
14946        assert_eq_m256i(r, a);
14947        let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b);
14948        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14949        assert_eq_m256i(r, e);
14950    }
14951
14952    #[simd_test(enable = "avx512bw,avx512vl")]
14953    const fn test_mm256_maskz_min_epi16() {
14954        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14955        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14956        let r = _mm256_maskz_min_epi16(0, a, b);
14957        assert_eq_m256i(r, _mm256_setzero_si256());
14958        let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b);
14959        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14960        assert_eq_m256i(r, e);
14961    }
14962
14963    #[simd_test(enable = "avx512bw,avx512vl")]
14964    const fn test_mm_mask_min_epi16() {
14965        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14966        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14967        let r = _mm_mask_min_epi16(a, 0, a, b);
14968        assert_eq_m128i(r, a);
14969        let r = _mm_mask_min_epi16(a, 0b00001111, a, b);
14970        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
14971        assert_eq_m128i(r, e);
14972    }
14973
14974    #[simd_test(enable = "avx512bw,avx512vl")]
14975    const fn test_mm_maskz_min_epi16() {
14976        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14977        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14978        let r = _mm_maskz_min_epi16(0, a, b);
14979        assert_eq_m128i(r, _mm_setzero_si128());
14980        let r = _mm_maskz_min_epi16(0b00001111, a, b);
14981        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
14982        assert_eq_m128i(r, e);
14983    }
14984
14985    #[simd_test(enable = "avx512bw")]
14986    const fn test_mm512_min_epi8() {
14987        #[rustfmt::skip]
14988        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14989                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14990                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14991                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14992        #[rustfmt::skip]
14993        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14994                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14995                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14996                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14997        let r = _mm512_min_epi8(a, b);
14998        #[rustfmt::skip]
14999        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15000                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15001                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15002                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15003        assert_eq_m512i(r, e);
15004    }
15005
15006    #[simd_test(enable = "avx512bw")]
15007    const fn test_mm512_mask_min_epi8() {
15008        #[rustfmt::skip]
15009        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15010                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15011                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15012                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15013        #[rustfmt::skip]
15014        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15015                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15016                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15017                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15018        let r = _mm512_mask_min_epi8(a, 0, a, b);
15019        assert_eq_m512i(r, a);
15020        let r = _mm512_mask_min_epi8(
15021            a,
15022            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
15023            a,
15024            b,
15025        );
15026        #[rustfmt::skip]
15027        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15028                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15029                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15030                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15031        assert_eq_m512i(r, e);
15032    }
15033
15034    #[simd_test(enable = "avx512bw")]
15035    const fn test_mm512_maskz_min_epi8() {
15036        #[rustfmt::skip]
15037        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15038                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15039                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15040                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15041        #[rustfmt::skip]
15042        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15043                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15044                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15045                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15046        let r = _mm512_maskz_min_epi8(0, a, b);
15047        assert_eq_m512i(r, _mm512_setzero_si512());
15048        let r = _mm512_maskz_min_epi8(
15049            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
15050            a,
15051            b,
15052        );
15053        #[rustfmt::skip]
15054        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15055                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15056                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15057                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15058        assert_eq_m512i(r, e);
15059    }
15060
15061    #[simd_test(enable = "avx512bw,avx512vl")]
15062    const fn test_mm256_mask_min_epi8() {
15063        #[rustfmt::skip]
15064        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15065                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15066        #[rustfmt::skip]
15067        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15068                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15069        let r = _mm256_mask_min_epi8(a, 0, a, b);
15070        assert_eq_m256i(r, a);
15071        let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
15072        #[rustfmt::skip]
15073        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15074                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15075        assert_eq_m256i(r, e);
15076    }
15077
15078    #[simd_test(enable = "avx512bw,avx512vl")]
15079    const fn test_mm256_maskz_min_epi8() {
15080        #[rustfmt::skip]
15081        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15082                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15083        #[rustfmt::skip]
15084        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15085                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15086        let r = _mm256_maskz_min_epi8(0, a, b);
15087        assert_eq_m256i(r, _mm256_setzero_si256());
15088        let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b);
15089        #[rustfmt::skip]
15090        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15091                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15092        assert_eq_m256i(r, e);
15093    }
15094
15095    #[simd_test(enable = "avx512bw,avx512vl")]
15096    const fn test_mm_mask_min_epi8() {
15097        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15098        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15099        let r = _mm_mask_min_epi8(a, 0, a, b);
15100        assert_eq_m128i(r, a);
15101        let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b);
15102        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15103        assert_eq_m128i(r, e);
15104    }
15105
15106    #[simd_test(enable = "avx512bw,avx512vl")]
15107    const fn test_mm_maskz_min_epi8() {
15108        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15109        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15110        let r = _mm_maskz_min_epi8(0, a, b);
15111        assert_eq_m128i(r, _mm_setzero_si128());
15112        let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b);
15113        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15114        assert_eq_m128i(r, e);
15115    }
15116
15117    #[simd_test(enable = "avx512bw")]
15118    const fn test_mm512_cmplt_epu16_mask() {
15119        let a = _mm512_set1_epi16(-2);
15120        let b = _mm512_set1_epi16(-1);
15121        let m = _mm512_cmplt_epu16_mask(a, b);
15122        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15123    }
15124
15125    #[simd_test(enable = "avx512bw")]
15126    const fn test_mm512_mask_cmplt_epu16_mask() {
15127        let a = _mm512_set1_epi16(-2);
15128        let b = _mm512_set1_epi16(-1);
15129        let mask = 0b01010101_01010101_01010101_01010101;
15130        let r = _mm512_mask_cmplt_epu16_mask(mask, a, b);
15131        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15132    }
15133
15134    #[simd_test(enable = "avx512bw,avx512vl")]
15135    const fn test_mm256_cmplt_epu16_mask() {
15136        let a = _mm256_set1_epi16(-2);
15137        let b = _mm256_set1_epi16(-1);
15138        let m = _mm256_cmplt_epu16_mask(a, b);
15139        assert_eq!(m, 0b11111111_11111111);
15140    }
15141
15142    #[simd_test(enable = "avx512bw,avx512vl")]
15143    const fn test_mm256_mask_cmplt_epu16_mask() {
15144        let a = _mm256_set1_epi16(-2);
15145        let b = _mm256_set1_epi16(-1);
15146        let mask = 0b01010101_01010101;
15147        let r = _mm256_mask_cmplt_epu16_mask(mask, a, b);
15148        assert_eq!(r, 0b01010101_01010101);
15149    }
15150
15151    #[simd_test(enable = "avx512bw,avx512vl")]
15152    const fn test_mm_cmplt_epu16_mask() {
15153        let a = _mm_set1_epi16(-2);
15154        let b = _mm_set1_epi16(-1);
15155        let m = _mm_cmplt_epu16_mask(a, b);
15156        assert_eq!(m, 0b11111111);
15157    }
15158
15159    #[simd_test(enable = "avx512bw,avx512vl")]
15160    const fn test_mm_mask_cmplt_epu16_mask() {
15161        let a = _mm_set1_epi16(-2);
15162        let b = _mm_set1_epi16(-1);
15163        let mask = 0b01010101;
15164        let r = _mm_mask_cmplt_epu16_mask(mask, a, b);
15165        assert_eq!(r, 0b01010101);
15166    }
15167
15168    #[simd_test(enable = "avx512bw")]
15169    const fn test_mm512_cmplt_epu8_mask() {
15170        let a = _mm512_set1_epi8(-2);
15171        let b = _mm512_set1_epi8(-1);
15172        let m = _mm512_cmplt_epu8_mask(a, b);
15173        assert_eq!(
15174            m,
15175            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15176        );
15177    }
15178
15179    #[simd_test(enable = "avx512bw")]
15180    const fn test_mm512_mask_cmplt_epu8_mask() {
15181        let a = _mm512_set1_epi8(-2);
15182        let b = _mm512_set1_epi8(-1);
15183        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15184        let r = _mm512_mask_cmplt_epu8_mask(mask, a, b);
15185        assert_eq!(
15186            r,
15187            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15188        );
15189    }
15190
15191    #[simd_test(enable = "avx512bw,avx512vl")]
15192    const fn test_mm256_cmplt_epu8_mask() {
15193        let a = _mm256_set1_epi8(-2);
15194        let b = _mm256_set1_epi8(-1);
15195        let m = _mm256_cmplt_epu8_mask(a, b);
15196        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15197    }
15198
15199    #[simd_test(enable = "avx512bw,avx512vl")]
15200    const fn test_mm256_mask_cmplt_epu8_mask() {
15201        let a = _mm256_set1_epi8(-2);
15202        let b = _mm256_set1_epi8(-1);
15203        let mask = 0b01010101_01010101_01010101_01010101;
15204        let r = _mm256_mask_cmplt_epu8_mask(mask, a, b);
15205        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15206    }
15207
15208    #[simd_test(enable = "avx512bw,avx512vl")]
15209    const fn test_mm_cmplt_epu8_mask() {
15210        let a = _mm_set1_epi8(-2);
15211        let b = _mm_set1_epi8(-1);
15212        let m = _mm_cmplt_epu8_mask(a, b);
15213        assert_eq!(m, 0b11111111_11111111);
15214    }
15215
15216    #[simd_test(enable = "avx512bw,avx512vl")]
15217    const fn test_mm_mask_cmplt_epu8_mask() {
15218        let a = _mm_set1_epi8(-2);
15219        let b = _mm_set1_epi8(-1);
15220        let mask = 0b01010101_01010101;
15221        let r = _mm_mask_cmplt_epu8_mask(mask, a, b);
15222        assert_eq!(r, 0b01010101_01010101);
15223    }
15224
15225    #[simd_test(enable = "avx512bw")]
15226    const fn test_mm512_cmplt_epi16_mask() {
15227        let a = _mm512_set1_epi16(-2);
15228        let b = _mm512_set1_epi16(-1);
15229        let m = _mm512_cmplt_epi16_mask(a, b);
15230        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15231    }
15232
15233    #[simd_test(enable = "avx512bw")]
15234    const fn test_mm512_mask_cmplt_epi16_mask() {
15235        let a = _mm512_set1_epi16(-2);
15236        let b = _mm512_set1_epi16(-1);
15237        let mask = 0b01010101_01010101_01010101_01010101;
15238        let r = _mm512_mask_cmplt_epi16_mask(mask, a, b);
15239        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15240    }
15241
15242    #[simd_test(enable = "avx512bw,avx512vl")]
15243    const fn test_mm256_cmplt_epi16_mask() {
15244        let a = _mm256_set1_epi16(-2);
15245        let b = _mm256_set1_epi16(-1);
15246        let m = _mm256_cmplt_epi16_mask(a, b);
15247        assert_eq!(m, 0b11111111_11111111);
15248    }
15249
15250    #[simd_test(enable = "avx512bw,avx512vl")]
15251    const fn test_mm256_mask_cmplt_epi16_mask() {
15252        let a = _mm256_set1_epi16(-2);
15253        let b = _mm256_set1_epi16(-1);
15254        let mask = 0b01010101_01010101;
15255        let r = _mm256_mask_cmplt_epi16_mask(mask, a, b);
15256        assert_eq!(r, 0b01010101_01010101);
15257    }
15258
15259    #[simd_test(enable = "avx512bw,avx512vl")]
15260    const fn test_mm_cmplt_epi16_mask() {
15261        let a = _mm_set1_epi16(-2);
15262        let b = _mm_set1_epi16(-1);
15263        let m = _mm_cmplt_epi16_mask(a, b);
15264        assert_eq!(m, 0b11111111);
15265    }
15266
15267    #[simd_test(enable = "avx512bw,avx512vl")]
15268    const fn test_mm_mask_cmplt_epi16_mask() {
15269        let a = _mm_set1_epi16(-2);
15270        let b = _mm_set1_epi16(-1);
15271        let mask = 0b01010101;
15272        let r = _mm_mask_cmplt_epi16_mask(mask, a, b);
15273        assert_eq!(r, 0b01010101);
15274    }
15275
15276    #[simd_test(enable = "avx512bw")]
15277    const fn test_mm512_cmplt_epi8_mask() {
15278        let a = _mm512_set1_epi8(-2);
15279        let b = _mm512_set1_epi8(-1);
15280        let m = _mm512_cmplt_epi8_mask(a, b);
15281        assert_eq!(
15282            m,
15283            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15284        );
15285    }
15286
15287    #[simd_test(enable = "avx512bw")]
15288    const fn test_mm512_mask_cmplt_epi8_mask() {
15289        let a = _mm512_set1_epi8(-2);
15290        let b = _mm512_set1_epi8(-1);
15291        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15292        let r = _mm512_mask_cmplt_epi8_mask(mask, a, b);
15293        assert_eq!(
15294            r,
15295            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15296        );
15297    }
15298
15299    #[simd_test(enable = "avx512bw,avx512vl")]
15300    const fn test_mm256_cmplt_epi8_mask() {
15301        let a = _mm256_set1_epi8(-2);
15302        let b = _mm256_set1_epi8(-1);
15303        let m = _mm256_cmplt_epi8_mask(a, b);
15304        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15305    }
15306
15307    #[simd_test(enable = "avx512bw,avx512vl")]
15308    const fn test_mm256_mask_cmplt_epi8_mask() {
15309        let a = _mm256_set1_epi8(-2);
15310        let b = _mm256_set1_epi8(-1);
15311        let mask = 0b01010101_01010101_01010101_01010101;
15312        let r = _mm256_mask_cmplt_epi8_mask(mask, a, b);
15313        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15314    }
15315
15316    #[simd_test(enable = "avx512bw,avx512vl")]
15317    const fn test_mm_cmplt_epi8_mask() {
15318        let a = _mm_set1_epi8(-2);
15319        let b = _mm_set1_epi8(-1);
15320        let m = _mm_cmplt_epi8_mask(a, b);
15321        assert_eq!(m, 0b11111111_11111111);
15322    }
15323
15324    #[simd_test(enable = "avx512bw,avx512vl")]
15325    const fn test_mm_mask_cmplt_epi8_mask() {
15326        let a = _mm_set1_epi8(-2);
15327        let b = _mm_set1_epi8(-1);
15328        let mask = 0b01010101_01010101;
15329        let r = _mm_mask_cmplt_epi8_mask(mask, a, b);
15330        assert_eq!(r, 0b01010101_01010101);
15331    }
15332
15333    #[simd_test(enable = "avx512bw")]
15334    const fn test_mm512_cmpgt_epu16_mask() {
15335        let a = _mm512_set1_epi16(2);
15336        let b = _mm512_set1_epi16(1);
15337        let m = _mm512_cmpgt_epu16_mask(a, b);
15338        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15339    }
15340
15341    #[simd_test(enable = "avx512bw")]
15342    const fn test_mm512_mask_cmpgt_epu16_mask() {
15343        let a = _mm512_set1_epi16(2);
15344        let b = _mm512_set1_epi16(1);
15345        let mask = 0b01010101_01010101_01010101_01010101;
15346        let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b);
15347        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15348    }
15349
15350    #[simd_test(enable = "avx512bw,avx512vl")]
15351    const fn test_mm256_cmpgt_epu16_mask() {
15352        let a = _mm256_set1_epi16(2);
15353        let b = _mm256_set1_epi16(1);
15354        let m = _mm256_cmpgt_epu16_mask(a, b);
15355        assert_eq!(m, 0b11111111_11111111);
15356    }
15357
15358    #[simd_test(enable = "avx512bw,avx512vl")]
15359    const fn test_mm256_mask_cmpgt_epu16_mask() {
15360        let a = _mm256_set1_epi16(2);
15361        let b = _mm256_set1_epi16(1);
15362        let mask = 0b01010101_01010101;
15363        let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b);
15364        assert_eq!(r, 0b01010101_01010101);
15365    }
15366
15367    #[simd_test(enable = "avx512bw,avx512vl")]
15368    const fn test_mm_cmpgt_epu16_mask() {
15369        let a = _mm_set1_epi16(2);
15370        let b = _mm_set1_epi16(1);
15371        let m = _mm_cmpgt_epu16_mask(a, b);
15372        assert_eq!(m, 0b11111111);
15373    }
15374
15375    #[simd_test(enable = "avx512bw,avx512vl")]
15376    const fn test_mm_mask_cmpgt_epu16_mask() {
15377        let a = _mm_set1_epi16(2);
15378        let b = _mm_set1_epi16(1);
15379        let mask = 0b01010101;
15380        let r = _mm_mask_cmpgt_epu16_mask(mask, a, b);
15381        assert_eq!(r, 0b01010101);
15382    }
15383
15384    #[simd_test(enable = "avx512bw")]
15385    const fn test_mm512_cmpgt_epu8_mask() {
15386        let a = _mm512_set1_epi8(2);
15387        let b = _mm512_set1_epi8(1);
15388        let m = _mm512_cmpgt_epu8_mask(a, b);
15389        assert_eq!(
15390            m,
15391            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15392        );
15393    }
15394
15395    #[simd_test(enable = "avx512bw")]
15396    const fn test_mm512_mask_cmpgt_epu8_mask() {
15397        let a = _mm512_set1_epi8(2);
15398        let b = _mm512_set1_epi8(1);
15399        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15400        let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b);
15401        assert_eq!(
15402            r,
15403            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15404        );
15405    }
15406
15407    #[simd_test(enable = "avx512bw,avx512vl")]
15408    const fn test_mm256_cmpgt_epu8_mask() {
15409        let a = _mm256_set1_epi8(2);
15410        let b = _mm256_set1_epi8(1);
15411        let m = _mm256_cmpgt_epu8_mask(a, b);
15412        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15413    }
15414
15415    #[simd_test(enable = "avx512bw,avx512vl")]
15416    const fn test_mm256_mask_cmpgt_epu8_mask() {
15417        let a = _mm256_set1_epi8(2);
15418        let b = _mm256_set1_epi8(1);
15419        let mask = 0b01010101_01010101_01010101_01010101;
15420        let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b);
15421        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15422    }
15423
15424    #[simd_test(enable = "avx512bw,avx512vl")]
15425    const fn test_mm_cmpgt_epu8_mask() {
15426        let a = _mm_set1_epi8(2);
15427        let b = _mm_set1_epi8(1);
15428        let m = _mm_cmpgt_epu8_mask(a, b);
15429        assert_eq!(m, 0b11111111_11111111);
15430    }
15431
15432    #[simd_test(enable = "avx512bw,avx512vl")]
15433    const fn test_mm_mask_cmpgt_epu8_mask() {
15434        let a = _mm_set1_epi8(2);
15435        let b = _mm_set1_epi8(1);
15436        let mask = 0b01010101_01010101;
15437        let r = _mm_mask_cmpgt_epu8_mask(mask, a, b);
15438        assert_eq!(r, 0b01010101_01010101);
15439    }
15440
15441    #[simd_test(enable = "avx512bw")]
15442    const fn test_mm512_cmpgt_epi16_mask() {
15443        let a = _mm512_set1_epi16(2);
15444        let b = _mm512_set1_epi16(-1);
15445        let m = _mm512_cmpgt_epi16_mask(a, b);
15446        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15447    }
15448
15449    #[simd_test(enable = "avx512bw")]
15450    const fn test_mm512_mask_cmpgt_epi16_mask() {
15451        let a = _mm512_set1_epi16(2);
15452        let b = _mm512_set1_epi16(-1);
15453        let mask = 0b01010101_01010101_01010101_01010101;
15454        let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b);
15455        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15456    }
15457
15458    #[simd_test(enable = "avx512bw,avx512vl")]
15459    const fn test_mm256_cmpgt_epi16_mask() {
15460        let a = _mm256_set1_epi16(2);
15461        let b = _mm256_set1_epi16(-1);
15462        let m = _mm256_cmpgt_epi16_mask(a, b);
15463        assert_eq!(m, 0b11111111_11111111);
15464    }
15465
15466    #[simd_test(enable = "avx512bw,avx512vl")]
15467    const fn test_mm256_mask_cmpgt_epi16_mask() {
15468        let a = _mm256_set1_epi16(2);
15469        let b = _mm256_set1_epi16(-1);
15470        let mask = 0b001010101_01010101;
15471        let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b);
15472        assert_eq!(r, 0b01010101_01010101);
15473    }
15474
15475    #[simd_test(enable = "avx512bw,avx512vl")]
15476    const fn test_mm_cmpgt_epi16_mask() {
15477        let a = _mm_set1_epi16(2);
15478        let b = _mm_set1_epi16(-1);
15479        let m = _mm_cmpgt_epi16_mask(a, b);
15480        assert_eq!(m, 0b11111111);
15481    }
15482
15483    #[simd_test(enable = "avx512bw,avx512vl")]
15484    const fn test_mm_mask_cmpgt_epi16_mask() {
15485        let a = _mm_set1_epi16(2);
15486        let b = _mm_set1_epi16(-1);
15487        let mask = 0b01010101;
15488        let r = _mm_mask_cmpgt_epi16_mask(mask, a, b);
15489        assert_eq!(r, 0b01010101);
15490    }
15491
15492    #[simd_test(enable = "avx512bw")]
15493    const fn test_mm512_cmpgt_epi8_mask() {
15494        let a = _mm512_set1_epi8(2);
15495        let b = _mm512_set1_epi8(-1);
15496        let m = _mm512_cmpgt_epi8_mask(a, b);
15497        assert_eq!(
15498            m,
15499            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15500        );
15501    }
15502
15503    #[simd_test(enable = "avx512bw")]
15504    const fn test_mm512_mask_cmpgt_epi8_mask() {
15505        let a = _mm512_set1_epi8(2);
15506        let b = _mm512_set1_epi8(-1);
15507        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15508        let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b);
15509        assert_eq!(
15510            r,
15511            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15512        );
15513    }
15514
15515    #[simd_test(enable = "avx512bw,avx512vl")]
15516    const fn test_mm256_cmpgt_epi8_mask() {
15517        let a = _mm256_set1_epi8(2);
15518        let b = _mm256_set1_epi8(-1);
15519        let m = _mm256_cmpgt_epi8_mask(a, b);
15520        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15521    }
15522
15523    #[simd_test(enable = "avx512bw,avx512vl")]
15524    const fn test_mm256_mask_cmpgt_epi8_mask() {
15525        let a = _mm256_set1_epi8(2);
15526        let b = _mm256_set1_epi8(-1);
15527        let mask = 0b01010101_01010101_01010101_01010101;
15528        let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b);
15529        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15530    }
15531
15532    #[simd_test(enable = "avx512bw,avx512vl")]
15533    const fn test_mm_cmpgt_epi8_mask() {
15534        let a = _mm_set1_epi8(2);
15535        let b = _mm_set1_epi8(-1);
15536        let m = _mm_cmpgt_epi8_mask(a, b);
15537        assert_eq!(m, 0b11111111_11111111);
15538    }
15539
15540    #[simd_test(enable = "avx512bw,avx512vl")]
15541    const fn test_mm_mask_cmpgt_epi8_mask() {
15542        let a = _mm_set1_epi8(2);
15543        let b = _mm_set1_epi8(-1);
15544        let mask = 0b01010101_01010101;
15545        let r = _mm_mask_cmpgt_epi8_mask(mask, a, b);
15546        assert_eq!(r, 0b01010101_01010101);
15547    }
15548
15549    #[simd_test(enable = "avx512bw")]
15550    const fn test_mm512_cmple_epu16_mask() {
15551        let a = _mm512_set1_epi16(-1);
15552        let b = _mm512_set1_epi16(-1);
15553        let m = _mm512_cmple_epu16_mask(a, b);
15554        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15555    }
15556
15557    #[simd_test(enable = "avx512bw")]
15558    const fn test_mm512_mask_cmple_epu16_mask() {
15559        let a = _mm512_set1_epi16(-1);
15560        let b = _mm512_set1_epi16(-1);
15561        let mask = 0b01010101_01010101_01010101_01010101;
15562        let r = _mm512_mask_cmple_epu16_mask(mask, a, b);
15563        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15564    }
15565
15566    #[simd_test(enable = "avx512bw,avx512vl")]
15567    const fn test_mm256_cmple_epu16_mask() {
15568        let a = _mm256_set1_epi16(-1);
15569        let b = _mm256_set1_epi16(-1);
15570        let m = _mm256_cmple_epu16_mask(a, b);
15571        assert_eq!(m, 0b11111111_11111111);
15572    }
15573
15574    #[simd_test(enable = "avx512bw,avx512vl")]
15575    const fn test_mm256_mask_cmple_epu16_mask() {
15576        let a = _mm256_set1_epi16(-1);
15577        let b = _mm256_set1_epi16(-1);
15578        let mask = 0b01010101_01010101;
15579        let r = _mm256_mask_cmple_epu16_mask(mask, a, b);
15580        assert_eq!(r, 0b01010101_01010101);
15581    }
15582
15583    #[simd_test(enable = "avx512bw,avx512vl")]
15584    const fn test_mm_cmple_epu16_mask() {
15585        let a = _mm_set1_epi16(-1);
15586        let b = _mm_set1_epi16(-1);
15587        let m = _mm_cmple_epu16_mask(a, b);
15588        assert_eq!(m, 0b11111111);
15589    }
15590
15591    #[simd_test(enable = "avx512bw,avx512vl")]
15592    const fn test_mm_mask_cmple_epu16_mask() {
15593        let a = _mm_set1_epi16(-1);
15594        let b = _mm_set1_epi16(-1);
15595        let mask = 0b01010101;
15596        let r = _mm_mask_cmple_epu16_mask(mask, a, b);
15597        assert_eq!(r, 0b01010101);
15598    }
15599
15600    #[simd_test(enable = "avx512bw")]
15601    const fn test_mm512_cmple_epu8_mask() {
15602        let a = _mm512_set1_epi8(-1);
15603        let b = _mm512_set1_epi8(-1);
15604        let m = _mm512_cmple_epu8_mask(a, b);
15605        assert_eq!(
15606            m,
15607            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15608        );
15609    }
15610
15611    #[simd_test(enable = "avx512bw")]
15612    const fn test_mm512_mask_cmple_epu8_mask() {
15613        let a = _mm512_set1_epi8(-1);
15614        let b = _mm512_set1_epi8(-1);
15615        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15616        let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
15617        assert_eq!(
15618            r,
15619            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15620        );
15621    }
15622
15623    #[simd_test(enable = "avx512bw,avx512vl")]
15624    const fn test_mm256_cmple_epu8_mask() {
15625        let a = _mm256_set1_epi8(-1);
15626        let b = _mm256_set1_epi8(-1);
15627        let m = _mm256_cmple_epu8_mask(a, b);
15628        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15629    }
15630
15631    #[simd_test(enable = "avx512bw,avx512vl")]
15632    const fn test_mm256_mask_cmple_epu8_mask() {
15633        let a = _mm256_set1_epi8(-1);
15634        let b = _mm256_set1_epi8(-1);
15635        let mask = 0b01010101_01010101_01010101_01010101;
15636        let r = _mm256_mask_cmple_epu8_mask(mask, a, b);
15637        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15638    }
15639
15640    #[simd_test(enable = "avx512bw,avx512vl")]
15641    const fn test_mm_cmple_epu8_mask() {
15642        let a = _mm_set1_epi8(-1);
15643        let b = _mm_set1_epi8(-1);
15644        let m = _mm_cmple_epu8_mask(a, b);
15645        assert_eq!(m, 0b11111111_11111111);
15646    }
15647
15648    #[simd_test(enable = "avx512bw,avx512vl")]
15649    const fn test_mm_mask_cmple_epu8_mask() {
15650        let a = _mm_set1_epi8(-1);
15651        let b = _mm_set1_epi8(-1);
15652        let mask = 0b01010101_01010101;
15653        let r = _mm_mask_cmple_epu8_mask(mask, a, b);
15654        assert_eq!(r, 0b01010101_01010101);
15655    }
15656
15657    #[simd_test(enable = "avx512bw")]
15658    const fn test_mm512_cmple_epi16_mask() {
15659        let a = _mm512_set1_epi16(-1);
15660        let b = _mm512_set1_epi16(-1);
15661        let m = _mm512_cmple_epi16_mask(a, b);
15662        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15663    }
15664
15665    #[simd_test(enable = "avx512bw")]
15666    const fn test_mm512_mask_cmple_epi16_mask() {
15667        let a = _mm512_set1_epi16(-1);
15668        let b = _mm512_set1_epi16(-1);
15669        let mask = 0b01010101_01010101_01010101_01010101;
15670        let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
15671        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15672    }
15673
15674    #[simd_test(enable = "avx512bw,avx512vl")]
15675    const fn test_mm256_cmple_epi16_mask() {
15676        let a = _mm256_set1_epi16(-1);
15677        let b = _mm256_set1_epi16(-1);
15678        let m = _mm256_cmple_epi16_mask(a, b);
15679        assert_eq!(m, 0b11111111_11111111);
15680    }
15681
15682    #[simd_test(enable = "avx512bw,avx512vl")]
15683    const fn test_mm256_mask_cmple_epi16_mask() {
15684        let a = _mm256_set1_epi16(-1);
15685        let b = _mm256_set1_epi16(-1);
15686        let mask = 0b01010101_01010101;
15687        let r = _mm256_mask_cmple_epi16_mask(mask, a, b);
15688        assert_eq!(r, 0b01010101_01010101);
15689    }
15690
15691    #[simd_test(enable = "avx512bw,avx512vl")]
15692    const fn test_mm_cmple_epi16_mask() {
15693        let a = _mm_set1_epi16(-1);
15694        let b = _mm_set1_epi16(-1);
15695        let m = _mm_cmple_epi16_mask(a, b);
15696        assert_eq!(m, 0b11111111);
15697    }
15698
15699    #[simd_test(enable = "avx512bw,avx512vl")]
15700    const fn test_mm_mask_cmple_epi16_mask() {
15701        let a = _mm_set1_epi16(-1);
15702        let b = _mm_set1_epi16(-1);
15703        let mask = 0b01010101;
15704        let r = _mm_mask_cmple_epi16_mask(mask, a, b);
15705        assert_eq!(r, 0b01010101);
15706    }
15707
15708    #[simd_test(enable = "avx512bw")]
15709    const fn test_mm512_cmple_epi8_mask() {
15710        let a = _mm512_set1_epi8(-1);
15711        let b = _mm512_set1_epi8(-1);
15712        let m = _mm512_cmple_epi8_mask(a, b);
15713        assert_eq!(
15714            m,
15715            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15716        );
15717    }
15718
15719    #[simd_test(enable = "avx512bw")]
15720    const fn test_mm512_mask_cmple_epi8_mask() {
15721        let a = _mm512_set1_epi8(-1);
15722        let b = _mm512_set1_epi8(-1);
15723        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15724        let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
15725        assert_eq!(
15726            r,
15727            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15728        );
15729    }
15730
15731    #[simd_test(enable = "avx512bw,avx512vl")]
15732    const fn test_mm256_cmple_epi8_mask() {
15733        let a = _mm256_set1_epi8(-1);
15734        let b = _mm256_set1_epi8(-1);
15735        let m = _mm256_cmple_epi8_mask(a, b);
15736        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15737    }
15738
15739    #[simd_test(enable = "avx512bw,avx512vl")]
15740    const fn test_mm256_mask_cmple_epi8_mask() {
15741        let a = _mm256_set1_epi8(-1);
15742        let b = _mm256_set1_epi8(-1);
15743        let mask = 0b01010101_01010101_01010101_01010101;
15744        let r = _mm256_mask_cmple_epi8_mask(mask, a, b);
15745        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15746    }
15747
15748    #[simd_test(enable = "avx512bw,avx512vl")]
15749    const fn test_mm_cmple_epi8_mask() {
15750        let a = _mm_set1_epi8(-1);
15751        let b = _mm_set1_epi8(-1);
15752        let m = _mm_cmple_epi8_mask(a, b);
15753        assert_eq!(m, 0b11111111_11111111);
15754    }
15755
15756    #[simd_test(enable = "avx512bw,avx512vl")]
15757    const fn test_mm_mask_cmple_epi8_mask() {
15758        let a = _mm_set1_epi8(-1);
15759        let b = _mm_set1_epi8(-1);
15760        let mask = 0b01010101_01010101;
15761        let r = _mm_mask_cmple_epi8_mask(mask, a, b);
15762        assert_eq!(r, 0b01010101_01010101);
15763    }
15764
15765    #[simd_test(enable = "avx512bw")]
15766    const fn test_mm512_cmpge_epu16_mask() {
15767        let a = _mm512_set1_epi16(1);
15768        let b = _mm512_set1_epi16(1);
15769        let m = _mm512_cmpge_epu16_mask(a, b);
15770        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15771    }
15772
15773    #[simd_test(enable = "avx512bw")]
15774    const fn test_mm512_mask_cmpge_epu16_mask() {
15775        let a = _mm512_set1_epi16(1);
15776        let b = _mm512_set1_epi16(1);
15777        let mask = 0b01010101_01010101_01010101_01010101;
15778        let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
15779        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15780    }
15781
15782    #[simd_test(enable = "avx512bw,avx512vl")]
15783    const fn test_mm256_cmpge_epu16_mask() {
15784        let a = _mm256_set1_epi16(1);
15785        let b = _mm256_set1_epi16(1);
15786        let m = _mm256_cmpge_epu16_mask(a, b);
15787        assert_eq!(m, 0b11111111_11111111);
15788    }
15789
15790    #[simd_test(enable = "avx512bw,avx512vl")]
15791    const fn test_mm256_mask_cmpge_epu16_mask() {
15792        let a = _mm256_set1_epi16(1);
15793        let b = _mm256_set1_epi16(1);
15794        let mask = 0b01010101_01010101;
15795        let r = _mm256_mask_cmpge_epu16_mask(mask, a, b);
15796        assert_eq!(r, 0b01010101_01010101);
15797    }
15798
15799    #[simd_test(enable = "avx512bw,avx512vl")]
15800    const fn test_mm_cmpge_epu16_mask() {
15801        let a = _mm_set1_epi16(1);
15802        let b = _mm_set1_epi16(1);
15803        let m = _mm_cmpge_epu16_mask(a, b);
15804        assert_eq!(m, 0b11111111);
15805    }
15806
15807    #[simd_test(enable = "avx512bw,avx512vl")]
15808    const fn test_mm_mask_cmpge_epu16_mask() {
15809        let a = _mm_set1_epi16(1);
15810        let b = _mm_set1_epi16(1);
15811        let mask = 0b01010101;
15812        let r = _mm_mask_cmpge_epu16_mask(mask, a, b);
15813        assert_eq!(r, 0b01010101);
15814    }
15815
15816    #[simd_test(enable = "avx512bw")]
15817    const fn test_mm512_cmpge_epu8_mask() {
15818        let a = _mm512_set1_epi8(1);
15819        let b = _mm512_set1_epi8(1);
15820        let m = _mm512_cmpge_epu8_mask(a, b);
15821        assert_eq!(
15822            m,
15823            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15824        );
15825    }
15826
15827    #[simd_test(enable = "avx512bw")]
15828    const fn test_mm512_mask_cmpge_epu8_mask() {
15829        let a = _mm512_set1_epi8(1);
15830        let b = _mm512_set1_epi8(1);
15831        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15832        let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
15833        assert_eq!(
15834            r,
15835            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15836        );
15837    }
15838
15839    #[simd_test(enable = "avx512bw,avx512vl")]
15840    const fn test_mm256_cmpge_epu8_mask() {
15841        let a = _mm256_set1_epi8(1);
15842        let b = _mm256_set1_epi8(1);
15843        let m = _mm256_cmpge_epu8_mask(a, b);
15844        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15845    }
15846
15847    #[simd_test(enable = "avx512bw,avx512vl")]
15848    const fn test_mm256_mask_cmpge_epu8_mask() {
15849        let a = _mm256_set1_epi8(1);
15850        let b = _mm256_set1_epi8(1);
15851        let mask = 0b01010101_01010101_01010101_01010101;
15852        let r = _mm256_mask_cmpge_epu8_mask(mask, a, b);
15853        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15854    }
15855
15856    #[simd_test(enable = "avx512bw,avx512vl")]
15857    const fn test_mm_cmpge_epu8_mask() {
15858        let a = _mm_set1_epi8(1);
15859        let b = _mm_set1_epi8(1);
15860        let m = _mm_cmpge_epu8_mask(a, b);
15861        assert_eq!(m, 0b11111111_11111111);
15862    }
15863
15864    #[simd_test(enable = "avx512bw,avx512vl")]
15865    const fn test_mm_mask_cmpge_epu8_mask() {
15866        let a = _mm_set1_epi8(1);
15867        let b = _mm_set1_epi8(1);
15868        let mask = 0b01010101_01010101;
15869        let r = _mm_mask_cmpge_epu8_mask(mask, a, b);
15870        assert_eq!(r, 0b01010101_01010101);
15871    }
15872
15873    #[simd_test(enable = "avx512bw")]
15874    const fn test_mm512_cmpge_epi16_mask() {
15875        let a = _mm512_set1_epi16(-1);
15876        let b = _mm512_set1_epi16(-1);
15877        let m = _mm512_cmpge_epi16_mask(a, b);
15878        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15879    }
15880
15881    #[simd_test(enable = "avx512bw")]
15882    const fn test_mm512_mask_cmpge_epi16_mask() {
15883        let a = _mm512_set1_epi16(-1);
15884        let b = _mm512_set1_epi16(-1);
15885        let mask = 0b01010101_01010101_01010101_01010101;
15886        let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
15887        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15888    }
15889
15890    #[simd_test(enable = "avx512bw,avx512vl")]
15891    const fn test_mm256_cmpge_epi16_mask() {
15892        let a = _mm256_set1_epi16(-1);
15893        let b = _mm256_set1_epi16(-1);
15894        let m = _mm256_cmpge_epi16_mask(a, b);
15895        assert_eq!(m, 0b11111111_11111111);
15896    }
15897
15898    #[simd_test(enable = "avx512bw,avx512vl")]
15899    const fn test_mm256_mask_cmpge_epi16_mask() {
15900        let a = _mm256_set1_epi16(-1);
15901        let b = _mm256_set1_epi16(-1);
15902        let mask = 0b01010101_01010101;
15903        let r = _mm256_mask_cmpge_epi16_mask(mask, a, b);
15904        assert_eq!(r, 0b01010101_01010101);
15905    }
15906
15907    #[simd_test(enable = "avx512bw,avx512vl")]
15908    const fn test_mm_cmpge_epi16_mask() {
15909        let a = _mm_set1_epi16(-1);
15910        let b = _mm_set1_epi16(-1);
15911        let m = _mm_cmpge_epi16_mask(a, b);
15912        assert_eq!(m, 0b11111111);
15913    }
15914
15915    #[simd_test(enable = "avx512bw,avx512vl")]
15916    const fn test_mm_mask_cmpge_epi16_mask() {
15917        let a = _mm_set1_epi16(-1);
15918        let b = _mm_set1_epi16(-1);
15919        let mask = 0b01010101;
15920        let r = _mm_mask_cmpge_epi16_mask(mask, a, b);
15921        assert_eq!(r, 0b01010101);
15922    }
15923
15924    #[simd_test(enable = "avx512bw")]
15925    const fn test_mm512_cmpge_epi8_mask() {
15926        let a = _mm512_set1_epi8(-1);
15927        let b = _mm512_set1_epi8(-1);
15928        let m = _mm512_cmpge_epi8_mask(a, b);
15929        assert_eq!(
15930            m,
15931            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15932        );
15933    }
15934
15935    #[simd_test(enable = "avx512bw")]
15936    const fn test_mm512_mask_cmpge_epi8_mask() {
15937        let a = _mm512_set1_epi8(-1);
15938        let b = _mm512_set1_epi8(-1);
15939        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15940        let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
15941        assert_eq!(
15942            r,
15943            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15944        );
15945    }
15946
15947    #[simd_test(enable = "avx512bw,avx512vl")]
15948    const fn test_mm256_cmpge_epi8_mask() {
15949        let a = _mm256_set1_epi8(-1);
15950        let b = _mm256_set1_epi8(-1);
15951        let m = _mm256_cmpge_epi8_mask(a, b);
15952        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15953    }
15954
15955    #[simd_test(enable = "avx512bw,avx512vl")]
15956    const fn test_mm256_mask_cmpge_epi8_mask() {
15957        let a = _mm256_set1_epi8(-1);
15958        let b = _mm256_set1_epi8(-1);
15959        let mask = 0b01010101_01010101_01010101_01010101;
15960        let r = _mm256_mask_cmpge_epi8_mask(mask, a, b);
15961        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15962    }
15963
15964    #[simd_test(enable = "avx512bw,avx512vl")]
15965    const fn test_mm_cmpge_epi8_mask() {
15966        let a = _mm_set1_epi8(-1);
15967        let b = _mm_set1_epi8(-1);
15968        let m = _mm_cmpge_epi8_mask(a, b);
15969        assert_eq!(m, 0b11111111_11111111);
15970    }
15971
15972    #[simd_test(enable = "avx512bw,avx512vl")]
15973    const fn test_mm_mask_cmpge_epi8_mask() {
15974        let a = _mm_set1_epi8(-1);
15975        let b = _mm_set1_epi8(-1);
15976        let mask = 0b01010101_01010101;
15977        let r = _mm_mask_cmpge_epi8_mask(mask, a, b);
15978        assert_eq!(r, 0b01010101_01010101);
15979    }
15980
15981    #[simd_test(enable = "avx512bw")]
15982    const fn test_mm512_cmpeq_epu16_mask() {
15983        let a = _mm512_set1_epi16(1);
15984        let b = _mm512_set1_epi16(1);
15985        let m = _mm512_cmpeq_epu16_mask(a, b);
15986        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15987    }
15988
15989    #[simd_test(enable = "avx512bw")]
15990    const fn test_mm512_mask_cmpeq_epu16_mask() {
15991        let a = _mm512_set1_epi16(1);
15992        let b = _mm512_set1_epi16(1);
15993        let mask = 0b01010101_01010101_01010101_01010101;
15994        let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
15995        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15996    }
15997
15998    #[simd_test(enable = "avx512bw,avx512vl")]
15999    const fn test_mm256_cmpeq_epu16_mask() {
16000        let a = _mm256_set1_epi16(1);
16001        let b = _mm256_set1_epi16(1);
16002        let m = _mm256_cmpeq_epu16_mask(a, b);
16003        assert_eq!(m, 0b11111111_11111111);
16004    }
16005
16006    #[simd_test(enable = "avx512bw,avx512vl")]
16007    const fn test_mm256_mask_cmpeq_epu16_mask() {
16008        let a = _mm256_set1_epi16(1);
16009        let b = _mm256_set1_epi16(1);
16010        let mask = 0b01010101_01010101;
16011        let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b);
16012        assert_eq!(r, 0b01010101_01010101);
16013    }
16014
16015    #[simd_test(enable = "avx512bw,avx512vl")]
16016    const fn test_mm_cmpeq_epu16_mask() {
16017        let a = _mm_set1_epi16(1);
16018        let b = _mm_set1_epi16(1);
16019        let m = _mm_cmpeq_epu16_mask(a, b);
16020        assert_eq!(m, 0b11111111);
16021    }
16022
16023    #[simd_test(enable = "avx512bw,avx512vl")]
16024    const fn test_mm_mask_cmpeq_epu16_mask() {
16025        let a = _mm_set1_epi16(1);
16026        let b = _mm_set1_epi16(1);
16027        let mask = 0b01010101;
16028        let r = _mm_mask_cmpeq_epu16_mask(mask, a, b);
16029        assert_eq!(r, 0b01010101);
16030    }
16031
16032    #[simd_test(enable = "avx512bw")]
16033    const fn test_mm512_cmpeq_epu8_mask() {
16034        let a = _mm512_set1_epi8(1);
16035        let b = _mm512_set1_epi8(1);
16036        let m = _mm512_cmpeq_epu8_mask(a, b);
16037        assert_eq!(
16038            m,
16039            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16040        );
16041    }
16042
16043    #[simd_test(enable = "avx512bw")]
16044    const fn test_mm512_mask_cmpeq_epu8_mask() {
16045        let a = _mm512_set1_epi8(1);
16046        let b = _mm512_set1_epi8(1);
16047        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16048        let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
16049        assert_eq!(
16050            r,
16051            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16052        );
16053    }
16054
16055    #[simd_test(enable = "avx512bw,avx512vl")]
16056    const fn test_mm256_cmpeq_epu8_mask() {
16057        let a = _mm256_set1_epi8(1);
16058        let b = _mm256_set1_epi8(1);
16059        let m = _mm256_cmpeq_epu8_mask(a, b);
16060        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16061    }
16062
16063    #[simd_test(enable = "avx512bw,avx512vl")]
16064    const fn test_mm256_mask_cmpeq_epu8_mask() {
16065        let a = _mm256_set1_epi8(1);
16066        let b = _mm256_set1_epi8(1);
16067        let mask = 0b01010101_01010101_01010101_01010101;
16068        let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b);
16069        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16070    }
16071
16072    #[simd_test(enable = "avx512bw,avx512vl")]
16073    const fn test_mm_cmpeq_epu8_mask() {
16074        let a = _mm_set1_epi8(1);
16075        let b = _mm_set1_epi8(1);
16076        let m = _mm_cmpeq_epu8_mask(a, b);
16077        assert_eq!(m, 0b11111111_11111111);
16078    }
16079
16080    #[simd_test(enable = "avx512bw,avx512vl")]
16081    const fn test_mm_mask_cmpeq_epu8_mask() {
16082        let a = _mm_set1_epi8(1);
16083        let b = _mm_set1_epi8(1);
16084        let mask = 0b01010101_01010101;
16085        let r = _mm_mask_cmpeq_epu8_mask(mask, a, b);
16086        assert_eq!(r, 0b01010101_01010101);
16087    }
16088
16089    #[simd_test(enable = "avx512bw")]
16090    const fn test_mm512_cmpeq_epi16_mask() {
16091        let a = _mm512_set1_epi16(-1);
16092        let b = _mm512_set1_epi16(-1);
16093        let m = _mm512_cmpeq_epi16_mask(a, b);
16094        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16095    }
16096
16097    #[simd_test(enable = "avx512bw")]
16098    const fn test_mm512_mask_cmpeq_epi16_mask() {
16099        let a = _mm512_set1_epi16(-1);
16100        let b = _mm512_set1_epi16(-1);
16101        let mask = 0b01010101_01010101_01010101_01010101;
16102        let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
16103        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16104    }
16105
16106    #[simd_test(enable = "avx512bw,avx512vl")]
16107    const fn test_mm256_cmpeq_epi16_mask() {
16108        let a = _mm256_set1_epi16(-1);
16109        let b = _mm256_set1_epi16(-1);
16110        let m = _mm256_cmpeq_epi16_mask(a, b);
16111        assert_eq!(m, 0b11111111_11111111);
16112    }
16113
16114    #[simd_test(enable = "avx512bw,avx512vl")]
16115    const fn test_mm256_mask_cmpeq_epi16_mask() {
16116        let a = _mm256_set1_epi16(-1);
16117        let b = _mm256_set1_epi16(-1);
16118        let mask = 0b01010101_01010101;
16119        let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b);
16120        assert_eq!(r, 0b01010101_01010101);
16121    }
16122
16123    #[simd_test(enable = "avx512bw,avx512vl")]
16124    const fn test_mm_cmpeq_epi16_mask() {
16125        let a = _mm_set1_epi16(-1);
16126        let b = _mm_set1_epi16(-1);
16127        let m = _mm_cmpeq_epi16_mask(a, b);
16128        assert_eq!(m, 0b11111111);
16129    }
16130
16131    #[simd_test(enable = "avx512bw,avx512vl")]
16132    const fn test_mm_mask_cmpeq_epi16_mask() {
16133        let a = _mm_set1_epi16(-1);
16134        let b = _mm_set1_epi16(-1);
16135        let mask = 0b01010101;
16136        let r = _mm_mask_cmpeq_epi16_mask(mask, a, b);
16137        assert_eq!(r, 0b01010101);
16138    }
16139
16140    #[simd_test(enable = "avx512bw")]
16141    const fn test_mm512_cmpeq_epi8_mask() {
16142        let a = _mm512_set1_epi8(-1);
16143        let b = _mm512_set1_epi8(-1);
16144        let m = _mm512_cmpeq_epi8_mask(a, b);
16145        assert_eq!(
16146            m,
16147            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16148        );
16149    }
16150
16151    #[simd_test(enable = "avx512bw")]
16152    const fn test_mm512_mask_cmpeq_epi8_mask() {
16153        let a = _mm512_set1_epi8(-1);
16154        let b = _mm512_set1_epi8(-1);
16155        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16156        let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
16157        assert_eq!(
16158            r,
16159            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16160        );
16161    }
16162
16163    #[simd_test(enable = "avx512bw,avx512vl")]
16164    const fn test_mm256_cmpeq_epi8_mask() {
16165        let a = _mm256_set1_epi8(-1);
16166        let b = _mm256_set1_epi8(-1);
16167        let m = _mm256_cmpeq_epi8_mask(a, b);
16168        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16169    }
16170
16171    #[simd_test(enable = "avx512bw,avx512vl")]
16172    const fn test_mm256_mask_cmpeq_epi8_mask() {
16173        let a = _mm256_set1_epi8(-1);
16174        let b = _mm256_set1_epi8(-1);
16175        let mask = 0b01010101_01010101_01010101_01010101;
16176        let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b);
16177        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16178    }
16179
16180    #[simd_test(enable = "avx512bw,avx512vl")]
16181    const fn test_mm_cmpeq_epi8_mask() {
16182        let a = _mm_set1_epi8(-1);
16183        let b = _mm_set1_epi8(-1);
16184        let m = _mm_cmpeq_epi8_mask(a, b);
16185        assert_eq!(m, 0b11111111_11111111);
16186    }
16187
16188    #[simd_test(enable = "avx512bw,avx512vl")]
16189    const fn test_mm_mask_cmpeq_epi8_mask() {
16190        let a = _mm_set1_epi8(-1);
16191        let b = _mm_set1_epi8(-1);
16192        let mask = 0b01010101_01010101;
16193        let r = _mm_mask_cmpeq_epi8_mask(mask, a, b);
16194        assert_eq!(r, 0b01010101_01010101);
16195    }
16196
16197    #[simd_test(enable = "avx512bw")]
16198    const fn test_mm512_cmpneq_epu16_mask() {
16199        let a = _mm512_set1_epi16(2);
16200        let b = _mm512_set1_epi16(1);
16201        let m = _mm512_cmpneq_epu16_mask(a, b);
16202        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16203    }
16204
16205    #[simd_test(enable = "avx512bw")]
16206    const fn test_mm512_mask_cmpneq_epu16_mask() {
16207        let a = _mm512_set1_epi16(2);
16208        let b = _mm512_set1_epi16(1);
16209        let mask = 0b01010101_01010101_01010101_01010101;
16210        let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
16211        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16212    }
16213
16214    #[simd_test(enable = "avx512bw,avx512vl")]
16215    const fn test_mm256_cmpneq_epu16_mask() {
16216        let a = _mm256_set1_epi16(2);
16217        let b = _mm256_set1_epi16(1);
16218        let m = _mm256_cmpneq_epu16_mask(a, b);
16219        assert_eq!(m, 0b11111111_11111111);
16220    }
16221
16222    #[simd_test(enable = "avx512bw,avx512vl")]
16223    const fn test_mm256_mask_cmpneq_epu16_mask() {
16224        let a = _mm256_set1_epi16(2);
16225        let b = _mm256_set1_epi16(1);
16226        let mask = 0b01010101_01010101;
16227        let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b);
16228        assert_eq!(r, 0b01010101_01010101);
16229    }
16230
16231    #[simd_test(enable = "avx512bw,avx512vl")]
16232    const fn test_mm_cmpneq_epu16_mask() {
16233        let a = _mm_set1_epi16(2);
16234        let b = _mm_set1_epi16(1);
16235        let m = _mm_cmpneq_epu16_mask(a, b);
16236        assert_eq!(m, 0b11111111);
16237    }
16238
16239    #[simd_test(enable = "avx512bw,avx512vl")]
16240    const fn test_mm_mask_cmpneq_epu16_mask() {
16241        let a = _mm_set1_epi16(2);
16242        let b = _mm_set1_epi16(1);
16243        let mask = 0b01010101;
16244        let r = _mm_mask_cmpneq_epu16_mask(mask, a, b);
16245        assert_eq!(r, 0b01010101);
16246    }
16247
16248    #[simd_test(enable = "avx512bw")]
16249    const fn test_mm512_cmpneq_epu8_mask() {
16250        let a = _mm512_set1_epi8(2);
16251        let b = _mm512_set1_epi8(1);
16252        let m = _mm512_cmpneq_epu8_mask(a, b);
16253        assert_eq!(
16254            m,
16255            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16256        );
16257    }
16258
16259    #[simd_test(enable = "avx512bw")]
16260    const fn test_mm512_mask_cmpneq_epu8_mask() {
16261        let a = _mm512_set1_epi8(2);
16262        let b = _mm512_set1_epi8(1);
16263        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16264        let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
16265        assert_eq!(
16266            r,
16267            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16268        );
16269    }
16270
16271    #[simd_test(enable = "avx512bw,avx512vl")]
16272    const fn test_mm256_cmpneq_epu8_mask() {
16273        let a = _mm256_set1_epi8(2);
16274        let b = _mm256_set1_epi8(1);
16275        let m = _mm256_cmpneq_epu8_mask(a, b);
16276        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16277    }
16278
16279    #[simd_test(enable = "avx512bw,avx512vl")]
16280    const fn test_mm256_mask_cmpneq_epu8_mask() {
16281        let a = _mm256_set1_epi8(2);
16282        let b = _mm256_set1_epi8(1);
16283        let mask = 0b01010101_01010101_01010101_01010101;
16284        let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b);
16285        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16286    }
16287
16288    #[simd_test(enable = "avx512bw,avx512vl")]
16289    const fn test_mm_cmpneq_epu8_mask() {
16290        let a = _mm_set1_epi8(2);
16291        let b = _mm_set1_epi8(1);
16292        let m = _mm_cmpneq_epu8_mask(a, b);
16293        assert_eq!(m, 0b11111111_11111111);
16294    }
16295
16296    #[simd_test(enable = "avx512bw,avx512vl")]
16297    const fn test_mm_mask_cmpneq_epu8_mask() {
16298        let a = _mm_set1_epi8(2);
16299        let b = _mm_set1_epi8(1);
16300        let mask = 0b01010101_01010101;
16301        let r = _mm_mask_cmpneq_epu8_mask(mask, a, b);
16302        assert_eq!(r, 0b01010101_01010101);
16303    }
16304
16305    #[simd_test(enable = "avx512bw")]
16306    const fn test_mm512_cmpneq_epi16_mask() {
16307        let a = _mm512_set1_epi16(1);
16308        let b = _mm512_set1_epi16(-1);
16309        let m = _mm512_cmpneq_epi16_mask(a, b);
16310        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16311    }
16312
16313    #[simd_test(enable = "avx512bw")]
16314    const fn test_mm512_mask_cmpneq_epi16_mask() {
16315        let a = _mm512_set1_epi16(1);
16316        let b = _mm512_set1_epi16(-1);
16317        let mask = 0b01010101_01010101_01010101_01010101;
16318        let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
16319        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16320    }
16321
16322    #[simd_test(enable = "avx512bw,avx512vl")]
16323    const fn test_mm256_cmpneq_epi16_mask() {
16324        let a = _mm256_set1_epi16(1);
16325        let b = _mm256_set1_epi16(-1);
16326        let m = _mm256_cmpneq_epi16_mask(a, b);
16327        assert_eq!(m, 0b11111111_11111111);
16328    }
16329
16330    #[simd_test(enable = "avx512bw,avx512vl")]
16331    const fn test_mm256_mask_cmpneq_epi16_mask() {
16332        let a = _mm256_set1_epi16(1);
16333        let b = _mm256_set1_epi16(-1);
16334        let mask = 0b01010101_01010101;
16335        let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b);
16336        assert_eq!(r, 0b01010101_01010101);
16337    }
16338
16339    #[simd_test(enable = "avx512bw,avx512vl")]
16340    const fn test_mm_cmpneq_epi16_mask() {
16341        let a = _mm_set1_epi16(1);
16342        let b = _mm_set1_epi16(-1);
16343        let m = _mm_cmpneq_epi16_mask(a, b);
16344        assert_eq!(m, 0b11111111);
16345    }
16346
16347    #[simd_test(enable = "avx512bw,avx512vl")]
16348    const fn test_mm_mask_cmpneq_epi16_mask() {
16349        let a = _mm_set1_epi16(1);
16350        let b = _mm_set1_epi16(-1);
16351        let mask = 0b01010101;
16352        let r = _mm_mask_cmpneq_epi16_mask(mask, a, b);
16353        assert_eq!(r, 0b01010101);
16354    }
16355
16356    #[simd_test(enable = "avx512bw")]
16357    const fn test_mm512_cmpneq_epi8_mask() {
16358        let a = _mm512_set1_epi8(1);
16359        let b = _mm512_set1_epi8(-1);
16360        let m = _mm512_cmpneq_epi8_mask(a, b);
16361        assert_eq!(
16362            m,
16363            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16364        );
16365    }
16366
16367    #[simd_test(enable = "avx512bw")]
16368    const fn test_mm512_mask_cmpneq_epi8_mask() {
16369        let a = _mm512_set1_epi8(1);
16370        let b = _mm512_set1_epi8(-1);
16371        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16372        let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
16373        assert_eq!(
16374            r,
16375            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16376        );
16377    }
16378
16379    #[simd_test(enable = "avx512bw,avx512vl")]
16380    const fn test_mm256_cmpneq_epi8_mask() {
16381        let a = _mm256_set1_epi8(1);
16382        let b = _mm256_set1_epi8(-1);
16383        let m = _mm256_cmpneq_epi8_mask(a, b);
16384        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16385    }
16386
16387    #[simd_test(enable = "avx512bw,avx512vl")]
16388    const fn test_mm256_mask_cmpneq_epi8_mask() {
16389        let a = _mm256_set1_epi8(1);
16390        let b = _mm256_set1_epi8(-1);
16391        let mask = 0b01010101_01010101_01010101_01010101;
16392        let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b);
16393        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16394    }
16395
16396    #[simd_test(enable = "avx512bw,avx512vl")]
16397    const fn test_mm_cmpneq_epi8_mask() {
16398        let a = _mm_set1_epi8(1);
16399        let b = _mm_set1_epi8(-1);
16400        let m = _mm_cmpneq_epi8_mask(a, b);
16401        assert_eq!(m, 0b11111111_11111111);
16402    }
16403
16404    #[simd_test(enable = "avx512bw,avx512vl")]
16405    const fn test_mm_mask_cmpneq_epi8_mask() {
16406        let a = _mm_set1_epi8(1);
16407        let b = _mm_set1_epi8(-1);
16408        let mask = 0b01010101_01010101;
16409        let r = _mm_mask_cmpneq_epi8_mask(mask, a, b);
16410        assert_eq!(r, 0b01010101_01010101);
16411    }
16412
16413    #[simd_test(enable = "avx512bw")]
16414    const fn test_mm512_cmp_epu16_mask() {
16415        let a = _mm512_set1_epi16(0);
16416        let b = _mm512_set1_epi16(1);
16417        let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
16418        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16419    }
16420
16421    #[simd_test(enable = "avx512bw")]
16422    const fn test_mm512_mask_cmp_epu16_mask() {
16423        let a = _mm512_set1_epi16(0);
16424        let b = _mm512_set1_epi16(1);
16425        let mask = 0b01010101_01010101_01010101_01010101;
16426        let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
16427        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16428    }
16429
16430    #[simd_test(enable = "avx512bw,avx512vl")]
16431    const fn test_mm256_cmp_epu16_mask() {
16432        let a = _mm256_set1_epi16(0);
16433        let b = _mm256_set1_epi16(1);
16434        let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
16435        assert_eq!(m, 0b11111111_11111111);
16436    }
16437
16438    #[simd_test(enable = "avx512bw,avx512vl")]
16439    const fn test_mm256_mask_cmp_epu16_mask() {
16440        let a = _mm256_set1_epi16(0);
16441        let b = _mm256_set1_epi16(1);
16442        let mask = 0b01010101_01010101;
16443        let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
16444        assert_eq!(r, 0b01010101_01010101);
16445    }
16446
16447    #[simd_test(enable = "avx512bw,avx512vl")]
16448    const fn test_mm_cmp_epu16_mask() {
16449        let a = _mm_set1_epi16(0);
16450        let b = _mm_set1_epi16(1);
16451        let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
16452        assert_eq!(m, 0b11111111);
16453    }
16454
16455    #[simd_test(enable = "avx512bw,avx512vl")]
16456    const fn test_mm_mask_cmp_epu16_mask() {
16457        let a = _mm_set1_epi16(0);
16458        let b = _mm_set1_epi16(1);
16459        let mask = 0b01010101;
16460        let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
16461        assert_eq!(r, 0b01010101);
16462    }
16463
16464    #[simd_test(enable = "avx512bw")]
16465    const fn test_mm512_cmp_epu8_mask() {
16466        let a = _mm512_set1_epi8(0);
16467        let b = _mm512_set1_epi8(1);
16468        let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
16469        assert_eq!(
16470            m,
16471            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16472        );
16473    }
16474
16475    #[simd_test(enable = "avx512bw")]
16476    const fn test_mm512_mask_cmp_epu8_mask() {
16477        let a = _mm512_set1_epi8(0);
16478        let b = _mm512_set1_epi8(1);
16479        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16480        let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
16481        assert_eq!(
16482            r,
16483            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16484        );
16485    }
16486
16487    #[simd_test(enable = "avx512bw,avx512vl")]
16488    const fn test_mm256_cmp_epu8_mask() {
16489        let a = _mm256_set1_epi8(0);
16490        let b = _mm256_set1_epi8(1);
16491        let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
16492        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16493    }
16494
16495    #[simd_test(enable = "avx512bw,avx512vl")]
16496    const fn test_mm256_mask_cmp_epu8_mask() {
16497        let a = _mm256_set1_epi8(0);
16498        let b = _mm256_set1_epi8(1);
16499        let mask = 0b01010101_01010101_01010101_01010101;
16500        let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
16501        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16502    }
16503
16504    #[simd_test(enable = "avx512bw,avx512vl")]
16505    const fn test_mm_cmp_epu8_mask() {
16506        let a = _mm_set1_epi8(0);
16507        let b = _mm_set1_epi8(1);
16508        let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
16509        assert_eq!(m, 0b11111111_11111111);
16510    }
16511
16512    #[simd_test(enable = "avx512bw,avx512vl")]
16513    const fn test_mm_mask_cmp_epu8_mask() {
16514        let a = _mm_set1_epi8(0);
16515        let b = _mm_set1_epi8(1);
16516        let mask = 0b01010101_01010101;
16517        let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
16518        assert_eq!(r, 0b01010101_01010101);
16519    }
16520
16521    #[simd_test(enable = "avx512bw")]
16522    const fn test_mm512_cmp_epi16_mask() {
16523        let a = _mm512_set1_epi16(0);
16524        let b = _mm512_set1_epi16(1);
16525        let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
16526        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16527    }
16528
16529    #[simd_test(enable = "avx512bw")]
16530    const fn test_mm512_mask_cmp_epi16_mask() {
16531        let a = _mm512_set1_epi16(0);
16532        let b = _mm512_set1_epi16(1);
16533        let mask = 0b01010101_01010101_01010101_01010101;
16534        let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
16535        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16536    }
16537
16538    #[simd_test(enable = "avx512bw,avx512vl")]
16539    const fn test_mm256_cmp_epi16_mask() {
16540        let a = _mm256_set1_epi16(0);
16541        let b = _mm256_set1_epi16(1);
16542        let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
16543        assert_eq!(m, 0b11111111_11111111);
16544    }
16545
16546    #[simd_test(enable = "avx512bw,avx512vl")]
16547    const fn test_mm256_mask_cmp_epi16_mask() {
16548        let a = _mm256_set1_epi16(0);
16549        let b = _mm256_set1_epi16(1);
16550        let mask = 0b01010101_01010101;
16551        let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
16552        assert_eq!(r, 0b01010101_01010101);
16553    }
16554
16555    #[simd_test(enable = "avx512bw,avx512vl")]
16556    const fn test_mm_cmp_epi16_mask() {
16557        let a = _mm_set1_epi16(0);
16558        let b = _mm_set1_epi16(1);
16559        let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
16560        assert_eq!(m, 0b11111111);
16561    }
16562
16563    #[simd_test(enable = "avx512bw,avx512vl")]
16564    const fn test_mm_mask_cmp_epi16_mask() {
16565        let a = _mm_set1_epi16(0);
16566        let b = _mm_set1_epi16(1);
16567        let mask = 0b01010101;
16568        let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
16569        assert_eq!(r, 0b01010101);
16570    }
16571
16572    #[simd_test(enable = "avx512bw")]
16573    const fn test_mm512_cmp_epi8_mask() {
16574        let a = _mm512_set1_epi8(0);
16575        let b = _mm512_set1_epi8(1);
16576        let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
16577        assert_eq!(
16578            m,
16579            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16580        );
16581    }
16582
16583    #[simd_test(enable = "avx512bw")]
16584    const fn test_mm512_mask_cmp_epi8_mask() {
16585        let a = _mm512_set1_epi8(0);
16586        let b = _mm512_set1_epi8(1);
16587        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16588        let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
16589        assert_eq!(
16590            r,
16591            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16592        );
16593    }
16594
16595    #[simd_test(enable = "avx512bw,avx512vl")]
16596    const fn test_mm256_cmp_epi8_mask() {
16597        let a = _mm256_set1_epi8(0);
16598        let b = _mm256_set1_epi8(1);
16599        let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
16600        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16601    }
16602
16603    #[simd_test(enable = "avx512bw,avx512vl")]
16604    const fn test_mm256_mask_cmp_epi8_mask() {
16605        let a = _mm256_set1_epi8(0);
16606        let b = _mm256_set1_epi8(1);
16607        let mask = 0b01010101_01010101_01010101_01010101;
16608        let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
16609        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16610    }
16611
16612    #[simd_test(enable = "avx512bw,avx512vl")]
16613    const fn test_mm_cmp_epi8_mask() {
16614        let a = _mm_set1_epi8(0);
16615        let b = _mm_set1_epi8(1);
16616        let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
16617        assert_eq!(m, 0b11111111_11111111);
16618    }
16619
16620    #[simd_test(enable = "avx512bw,avx512vl")]
16621    const fn test_mm_mask_cmp_epi8_mask() {
16622        let a = _mm_set1_epi8(0);
16623        let b = _mm_set1_epi8(1);
16624        let mask = 0b01010101_01010101;
16625        let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
16626        assert_eq!(r, 0b01010101_01010101);
16627    }
16628
16629    #[simd_test(enable = "avx512bw,avx512vl")]
16630    const fn test_mm256_reduce_add_epi16() {
16631        let a = _mm256_set1_epi16(1);
16632        let e = _mm256_reduce_add_epi16(a);
16633        assert_eq!(16, e);
16634    }
16635
16636    #[simd_test(enable = "avx512bw,avx512vl")]
16637    const fn test_mm256_mask_reduce_add_epi16() {
16638        let a = _mm256_set1_epi16(1);
16639        let e = _mm256_mask_reduce_add_epi16(0b11111111_00000000, a);
16640        assert_eq!(8, e);
16641    }
16642
16643    #[simd_test(enable = "avx512bw,avx512vl")]
16644    const fn test_mm_reduce_add_epi16() {
16645        let a = _mm_set1_epi16(1);
16646        let e = _mm_reduce_add_epi16(a);
16647        assert_eq!(8, e);
16648    }
16649
16650    #[simd_test(enable = "avx512bw,avx512vl")]
16651    const fn test_mm_mask_reduce_add_epi16() {
16652        let a = _mm_set1_epi16(1);
16653        let e = _mm_mask_reduce_add_epi16(0b11110000, a);
16654        assert_eq!(4, e);
16655    }
16656
16657    #[simd_test(enable = "avx512bw,avx512vl")]
16658    const fn test_mm256_reduce_add_epi8() {
16659        let a = _mm256_set1_epi8(1);
16660        let e = _mm256_reduce_add_epi8(a);
16661        assert_eq!(32, e);
16662    }
16663
16664    #[simd_test(enable = "avx512bw,avx512vl")]
16665    const fn test_mm256_mask_reduce_add_epi8() {
16666        let a = _mm256_set1_epi8(1);
16667        let e = _mm256_mask_reduce_add_epi8(0b11111111_00000000_11111111_00000000, a);
16668        assert_eq!(16, e);
16669    }
16670
16671    #[simd_test(enable = "avx512bw,avx512vl")]
16672    const fn test_mm_reduce_add_epi8() {
16673        let a = _mm_set1_epi8(1);
16674        let e = _mm_reduce_add_epi8(a);
16675        assert_eq!(16, e);
16676    }
16677
16678    #[simd_test(enable = "avx512bw,avx512vl")]
16679    const fn test_mm_mask_reduce_add_epi8() {
16680        let a = _mm_set1_epi8(1);
16681        let e = _mm_mask_reduce_add_epi8(0b11111111_00000000, a);
16682        assert_eq!(8, e);
16683    }
16684
16685    #[simd_test(enable = "avx512bw,avx512vl")]
16686    const fn test_mm256_reduce_and_epi16() {
16687        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16688        let e = _mm256_reduce_and_epi16(a);
16689        assert_eq!(0, e);
16690    }
16691
16692    #[simd_test(enable = "avx512bw,avx512vl")]
16693    const fn test_mm256_mask_reduce_and_epi16() {
16694        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16695        let e = _mm256_mask_reduce_and_epi16(0b11111111_00000000, a);
16696        assert_eq!(1, e);
16697    }
16698
16699    #[simd_test(enable = "avx512bw,avx512vl")]
16700    const fn test_mm_reduce_and_epi16() {
16701        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16702        let e = _mm_reduce_and_epi16(a);
16703        assert_eq!(0, e);
16704    }
16705
16706    #[simd_test(enable = "avx512bw,avx512vl")]
16707    const fn test_mm_mask_reduce_and_epi16() {
16708        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16709        let e = _mm_mask_reduce_and_epi16(0b11110000, a);
16710        assert_eq!(1, e);
16711    }
16712
16713    #[simd_test(enable = "avx512bw,avx512vl")]
16714    const fn test_mm256_reduce_and_epi8() {
16715        let a = _mm256_set_epi8(
16716            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16717            2, 2, 2,
16718        );
16719        let e = _mm256_reduce_and_epi8(a);
16720        assert_eq!(0, e);
16721    }
16722
16723    #[simd_test(enable = "avx512bw,avx512vl")]
16724    const fn test_mm256_mask_reduce_and_epi8() {
16725        let a = _mm256_set_epi8(
16726            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16727            2, 2, 2,
16728        );
16729        let e = _mm256_mask_reduce_and_epi8(0b11111111_00000000_11111111_00000000, a);
16730        assert_eq!(1, e);
16731    }
16732
16733    #[simd_test(enable = "avx512bw,avx512vl")]
16734    const fn test_mm_reduce_and_epi8() {
16735        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16736        let e = _mm_reduce_and_epi8(a);
16737        assert_eq!(0, e);
16738    }
16739
16740    #[simd_test(enable = "avx512bw,avx512vl")]
16741    const fn test_mm_mask_reduce_and_epi8() {
16742        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16743        let e = _mm_mask_reduce_and_epi8(0b11111111_00000000, a);
16744        assert_eq!(1, e);
16745    }
16746
16747    #[simd_test(enable = "avx512bw,avx512vl")]
16748    const fn test_mm256_reduce_mul_epi16() {
16749        let a = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
16750        let e = _mm256_reduce_mul_epi16(a);
16751        assert_eq!(256, e);
16752    }
16753
16754    #[simd_test(enable = "avx512bw,avx512vl")]
16755    const fn test_mm256_mask_reduce_mul_epi16() {
16756        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16757        let e = _mm256_mask_reduce_mul_epi16(0b11111111_00000000, a);
16758        assert_eq!(1, e);
16759    }
16760
16761    #[simd_test(enable = "avx512bw,avx512vl")]
16762    const fn test_mm_reduce_mul_epi16() {
16763        let a = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
16764        let e = _mm_reduce_mul_epi16(a);
16765        assert_eq!(16, e);
16766    }
16767
16768    #[simd_test(enable = "avx512bw,avx512vl")]
16769    const fn test_mm_mask_reduce_mul_epi16() {
16770        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16771        let e = _mm_mask_reduce_mul_epi16(0b11110000, a);
16772        assert_eq!(1, e);
16773    }
16774
16775    #[simd_test(enable = "avx512bw,avx512vl")]
16776    const fn test_mm256_reduce_mul_epi8() {
16777        let a = _mm256_set_epi8(
16778            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16779            2, 2, 2,
16780        );
16781        let e = _mm256_reduce_mul_epi8(a);
16782        assert_eq!(64, e);
16783    }
16784
16785    #[simd_test(enable = "avx512bw,avx512vl")]
16786    const fn test_mm256_mask_reduce_mul_epi8() {
16787        let a = _mm256_set_epi8(
16788            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16789            2, 2, 2,
16790        );
16791        let e = _mm256_mask_reduce_mul_epi8(0b11111111_00000000_11111111_00000000, a);
16792        assert_eq!(1, e);
16793    }
16794
16795    #[simd_test(enable = "avx512bw,avx512vl")]
16796    const fn test_mm_reduce_mul_epi8() {
16797        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
16798        let e = _mm_reduce_mul_epi8(a);
16799        assert_eq!(8, e);
16800    }
16801
16802    #[simd_test(enable = "avx512bw,avx512vl")]
16803    const fn test_mm_mask_reduce_mul_epi8() {
16804        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
16805        let e = _mm_mask_reduce_mul_epi8(0b11111111_00000000, a);
16806        assert_eq!(1, e);
16807    }
16808
16809    #[simd_test(enable = "avx512bw,avx512vl")]
16810    const fn test_mm256_reduce_max_epi16() {
16811        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16812        let e: i16 = _mm256_reduce_max_epi16(a);
16813        assert_eq!(15, e);
16814    }
16815
16816    #[simd_test(enable = "avx512bw,avx512vl")]
16817    const fn test_mm256_mask_reduce_max_epi16() {
16818        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16819        let e: i16 = _mm256_mask_reduce_max_epi16(0b11111111_00000000, a);
16820        assert_eq!(7, e);
16821    }
16822
16823    #[simd_test(enable = "avx512bw,avx512vl")]
16824    const fn test_mm_reduce_max_epi16() {
16825        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16826        let e: i16 = _mm_reduce_max_epi16(a);
16827        assert_eq!(7, e);
16828    }
16829
16830    #[simd_test(enable = "avx512bw,avx512vl")]
16831    const fn test_mm_mask_reduce_max_epi16() {
16832        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16833        let e: i16 = _mm_mask_reduce_max_epi16(0b11110000, a);
16834        assert_eq!(3, e);
16835    }
16836
16837    #[simd_test(enable = "avx512bw,avx512vl")]
16838    const fn test_mm256_reduce_max_epi8() {
16839        let a = _mm256_set_epi8(
16840            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16841            24, 25, 26, 27, 28, 29, 30, 31,
16842        );
16843        let e: i8 = _mm256_reduce_max_epi8(a);
16844        assert_eq!(31, e);
16845    }
16846
16847    #[simd_test(enable = "avx512bw,avx512vl")]
16848    const fn test_mm256_mask_reduce_max_epi8() {
16849        let a = _mm256_set_epi8(
16850            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16851            24, 25, 26, 27, 28, 29, 30, 31,
16852        );
16853        let e: i8 = _mm256_mask_reduce_max_epi8(0b1111111111111111_0000000000000000, a);
16854        assert_eq!(15, e);
16855    }
16856
16857    #[simd_test(enable = "avx512bw,avx512vl")]
16858    const fn test_mm_reduce_max_epi8() {
16859        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16860        let e: i8 = _mm_reduce_max_epi8(a);
16861        assert_eq!(15, e);
16862    }
16863
16864    #[simd_test(enable = "avx512bw,avx512vl")]
16865    const fn test_mm_mask_reduce_max_epi8() {
16866        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16867        let e: i8 = _mm_mask_reduce_max_epi8(0b11111111_00000000, a);
16868        assert_eq!(7, e);
16869    }
16870
16871    #[simd_test(enable = "avx512bw,avx512vl")]
16872    const fn test_mm256_reduce_max_epu16() {
16873        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16874        let e: u16 = _mm256_reduce_max_epu16(a);
16875        assert_eq!(15, e);
16876    }
16877
16878    #[simd_test(enable = "avx512bw,avx512vl")]
16879    const fn test_mm256_mask_reduce_max_epu16() {
16880        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16881        let e: u16 = _mm256_mask_reduce_max_epu16(0b11111111_00000000, a);
16882        assert_eq!(7, e);
16883    }
16884
16885    #[simd_test(enable = "avx512bw,avx512vl")]
16886    const fn test_mm_reduce_max_epu16() {
16887        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16888        let e: u16 = _mm_reduce_max_epu16(a);
16889        assert_eq!(7, e);
16890    }
16891
16892    #[simd_test(enable = "avx512bw,avx512vl")]
16893    const fn test_mm_mask_reduce_max_epu16() {
16894        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16895        let e: u16 = _mm_mask_reduce_max_epu16(0b11110000, a);
16896        assert_eq!(3, e);
16897    }
16898
16899    #[simd_test(enable = "avx512bw,avx512vl")]
16900    const fn test_mm256_reduce_max_epu8() {
16901        let a = _mm256_set_epi8(
16902            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16903            24, 25, 26, 27, 28, 29, 30, 31,
16904        );
16905        let e: u8 = _mm256_reduce_max_epu8(a);
16906        assert_eq!(31, e);
16907    }
16908
16909    #[simd_test(enable = "avx512bw,avx512vl")]
16910    const fn test_mm256_mask_reduce_max_epu8() {
16911        let a = _mm256_set_epi8(
16912            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16913            24, 25, 26, 27, 28, 29, 30, 31,
16914        );
16915        let e: u8 = _mm256_mask_reduce_max_epu8(0b1111111111111111_0000000000000000, a);
16916        assert_eq!(15, e);
16917    }
16918
16919    #[simd_test(enable = "avx512bw,avx512vl")]
16920    const fn test_mm_reduce_max_epu8() {
16921        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16922        let e: u8 = _mm_reduce_max_epu8(a);
16923        assert_eq!(15, e);
16924    }
16925
16926    #[simd_test(enable = "avx512bw,avx512vl")]
16927    const fn test_mm_mask_reduce_max_epu8() {
16928        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16929        let e: u8 = _mm_mask_reduce_max_epu8(0b11111111_00000000, a);
16930        assert_eq!(7, e);
16931    }
16932
16933    #[simd_test(enable = "avx512bw,avx512vl")]
16934    const fn test_mm256_reduce_min_epi16() {
16935        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16936        let e: i16 = _mm256_reduce_min_epi16(a);
16937        assert_eq!(0, e);
16938    }
16939
16940    #[simd_test(enable = "avx512bw,avx512vl")]
16941    const fn test_mm256_mask_reduce_min_epi16() {
16942        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16943        let e: i16 = _mm256_mask_reduce_min_epi16(0b11111111_00000000, a);
16944        assert_eq!(0, e);
16945    }
16946
16947    #[simd_test(enable = "avx512bw,avx512vl")]
16948    const fn test_mm_reduce_min_epi16() {
16949        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16950        let e: i16 = _mm_reduce_min_epi16(a);
16951        assert_eq!(0, e);
16952    }
16953
16954    #[simd_test(enable = "avx512bw,avx512vl")]
16955    const fn test_mm_mask_reduce_min_epi16() {
16956        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16957        let e: i16 = _mm_mask_reduce_min_epi16(0b11110000, a);
16958        assert_eq!(0, e);
16959    }
16960
16961    #[simd_test(enable = "avx512bw,avx512vl")]
16962    const fn test_mm256_reduce_min_epi8() {
16963        let a = _mm256_set_epi8(
16964            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16965            24, 25, 26, 27, 28, 29, 30, 31,
16966        );
16967        let e: i8 = _mm256_reduce_min_epi8(a);
16968        assert_eq!(0, e);
16969    }
16970
16971    #[simd_test(enable = "avx512bw,avx512vl")]
16972    const fn test_mm256_mask_reduce_min_epi8() {
16973        let a = _mm256_set_epi8(
16974            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16975            24, 25, 26, 27, 28, 29, 30, 31,
16976        );
16977        let e: i8 = _mm256_mask_reduce_min_epi8(0b1111111111111111_0000000000000000, a);
16978        assert_eq!(0, e);
16979    }
16980
16981    #[simd_test(enable = "avx512bw,avx512vl")]
16982    const fn test_mm_reduce_min_epi8() {
16983        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16984        let e: i8 = _mm_reduce_min_epi8(a);
16985        assert_eq!(0, e);
16986    }
16987
16988    #[simd_test(enable = "avx512bw,avx512vl")]
16989    const fn test_mm_mask_reduce_min_epi8() {
16990        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16991        let e: i8 = _mm_mask_reduce_min_epi8(0b11111111_00000000, a);
16992        assert_eq!(0, e);
16993    }
16994
16995    #[simd_test(enable = "avx512bw,avx512vl")]
16996    const fn test_mm256_reduce_min_epu16() {
16997        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16998        let e: u16 = _mm256_reduce_min_epu16(a);
16999        assert_eq!(0, e);
17000    }
17001
17002    #[simd_test(enable = "avx512bw,avx512vl")]
17003    const fn test_mm256_mask_reduce_min_epu16() {
17004        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17005        let e: u16 = _mm256_mask_reduce_min_epu16(0b11111111_00000000, a);
17006        assert_eq!(0, e);
17007    }
17008
17009    #[simd_test(enable = "avx512bw,avx512vl")]
17010    const fn test_mm_reduce_min_epu16() {
17011        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17012        let e: u16 = _mm_reduce_min_epu16(a);
17013        assert_eq!(0, e);
17014    }
17015
17016    #[simd_test(enable = "avx512bw,avx512vl")]
17017    const fn test_mm_mask_reduce_min_epu16() {
17018        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17019        let e: u16 = _mm_mask_reduce_min_epu16(0b11110000, a);
17020        assert_eq!(0, e);
17021    }
17022
17023    #[simd_test(enable = "avx512bw,avx512vl")]
17024    const fn test_mm256_reduce_min_epu8() {
17025        let a = _mm256_set_epi8(
17026            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17027            24, 25, 26, 27, 28, 29, 30, 31,
17028        );
17029        let e: u8 = _mm256_reduce_min_epu8(a);
17030        assert_eq!(0, e);
17031    }
17032
17033    #[simd_test(enable = "avx512bw,avx512vl")]
17034    const fn test_mm256_mask_reduce_min_epu8() {
17035        let a = _mm256_set_epi8(
17036            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17037            24, 25, 26, 27, 28, 29, 30, 31,
17038        );
17039        let e: u8 = _mm256_mask_reduce_min_epu8(0b1111111111111111_0000000000000000, a);
17040        assert_eq!(0, e);
17041    }
17042
17043    #[simd_test(enable = "avx512bw,avx512vl")]
17044    const fn test_mm_reduce_min_epu8() {
17045        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17046        let e: u8 = _mm_reduce_min_epu8(a);
17047        assert_eq!(0, e);
17048    }
17049
17050    #[simd_test(enable = "avx512bw,avx512vl")]
17051    const fn test_mm_mask_reduce_min_epu8() {
17052        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17053        let e: u8 = _mm_mask_reduce_min_epu8(0b11111111_00000000, a);
17054        assert_eq!(0, e);
17055    }
17056
17057    #[simd_test(enable = "avx512bw,avx512vl")]
17058    const fn test_mm256_reduce_or_epi16() {
17059        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17060        let e = _mm256_reduce_or_epi16(a);
17061        assert_eq!(3, e);
17062    }
17063
17064    #[simd_test(enable = "avx512bw,avx512vl")]
17065    const fn test_mm256_mask_reduce_or_epi16() {
17066        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17067        let e = _mm256_mask_reduce_or_epi16(0b11111111_00000000, a);
17068        assert_eq!(1, e);
17069    }
17070
17071    #[simd_test(enable = "avx512bw,avx512vl")]
17072    const fn test_mm_reduce_or_epi16() {
17073        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
17074        let e = _mm_reduce_or_epi16(a);
17075        assert_eq!(3, e);
17076    }
17077
17078    #[simd_test(enable = "avx512bw,avx512vl")]
17079    const fn test_mm_mask_reduce_or_epi16() {
17080        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
17081        let e = _mm_mask_reduce_or_epi16(0b11110000, a);
17082        assert_eq!(1, e);
17083    }
17084
17085    #[simd_test(enable = "avx512bw,avx512vl")]
17086    const fn test_mm256_reduce_or_epi8() {
17087        let a = _mm256_set_epi8(
17088            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
17089            2, 2, 2,
17090        );
17091        let e = _mm256_reduce_or_epi8(a);
17092        assert_eq!(3, e);
17093    }
17094
17095    #[simd_test(enable = "avx512bw,avx512vl")]
17096    const fn test_mm256_mask_reduce_or_epi8() {
17097        let a = _mm256_set_epi8(
17098            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
17099            2, 2, 2,
17100        );
17101        let e = _mm256_mask_reduce_or_epi8(0b11111111_00000000_11111111_00000000, a);
17102        assert_eq!(1, e);
17103    }
17104
17105    #[simd_test(enable = "avx512bw,avx512vl")]
17106    const fn test_mm_reduce_or_epi8() {
17107        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17108        let e = _mm_reduce_or_epi8(a);
17109        assert_eq!(3, e);
17110    }
17111
17112    #[simd_test(enable = "avx512bw,avx512vl")]
17113    const fn test_mm_mask_reduce_or_epi8() {
17114        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17115        let e = _mm_mask_reduce_or_epi8(0b11111111_00000000, a);
17116        assert_eq!(1, e);
17117    }
17118
17119    #[simd_test(enable = "avx512bw")]
17120    const fn test_mm512_loadu_epi16() {
17121        #[rustfmt::skip]
17122        let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
17123        let r = unsafe { _mm512_loadu_epi16(&a[0]) };
17124        #[rustfmt::skip]
17125        let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17126        assert_eq_m512i(r, e);
17127    }
17128
17129    #[simd_test(enable = "avx512bw,avx512vl")]
17130    const fn test_mm256_loadu_epi16() {
17131        let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17132        let r = unsafe { _mm256_loadu_epi16(&a[0]) };
17133        let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17134        assert_eq_m256i(r, e);
17135    }
17136
17137    #[simd_test(enable = "avx512bw,avx512vl")]
17138    const fn test_mm_loadu_epi16() {
17139        let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
17140        let r = unsafe { _mm_loadu_epi16(&a[0]) };
17141        let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
17142        assert_eq_m128i(r, e);
17143    }
17144
17145    #[simd_test(enable = "avx512bw")]
17146    const fn test_mm512_loadu_epi8() {
17147        #[rustfmt::skip]
17148        let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
17149                           1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
17150        let r = unsafe { _mm512_loadu_epi8(&a[0]) };
17151        #[rustfmt::skip]
17152        let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
17153                                32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17154        assert_eq_m512i(r, e);
17155    }
17156
17157    #[simd_test(enable = "avx512bw,avx512vl")]
17158    const fn test_mm256_loadu_epi8() {
17159        #[rustfmt::skip]
17160        let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
17161        let r = unsafe { _mm256_loadu_epi8(&a[0]) };
17162        #[rustfmt::skip]
17163        let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17164        assert_eq_m256i(r, e);
17165    }
17166
17167    #[simd_test(enable = "avx512bw,avx512vl")]
17168    const fn test_mm_loadu_epi8() {
17169        let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17170        let r = unsafe { _mm_loadu_epi8(&a[0]) };
17171        let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17172        assert_eq_m128i(r, e);
17173    }
17174
17175    #[simd_test(enable = "avx512bw")]
17176    const fn test_mm512_storeu_epi16() {
17177        let a = _mm512_set1_epi16(9);
17178        let mut r = _mm512_undefined_epi32();
17179        unsafe {
17180            _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
17181        }
17182        assert_eq_m512i(r, a);
17183    }
17184
17185    #[simd_test(enable = "avx512bw,avx512vl")]
17186    const fn test_mm256_storeu_epi16() {
17187        let a = _mm256_set1_epi16(9);
17188        let mut r = _mm256_set1_epi32(0);
17189        unsafe {
17190            _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
17191        }
17192        assert_eq_m256i(r, a);
17193    }
17194
17195    #[simd_test(enable = "avx512bw,avx512vl")]
17196    const fn test_mm_storeu_epi16() {
17197        let a = _mm_set1_epi16(9);
17198        let mut r = _mm_set1_epi32(0);
17199        unsafe {
17200            _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
17201        }
17202        assert_eq_m128i(r, a);
17203    }
17204
17205    #[simd_test(enable = "avx512bw")]
17206    const fn test_mm512_storeu_epi8() {
17207        let a = _mm512_set1_epi8(9);
17208        let mut r = _mm512_undefined_epi32();
17209        unsafe {
17210            _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
17211        }
17212        assert_eq_m512i(r, a);
17213    }
17214
17215    #[simd_test(enable = "avx512bw,avx512vl")]
17216    const fn test_mm256_storeu_epi8() {
17217        let a = _mm256_set1_epi8(9);
17218        let mut r = _mm256_set1_epi32(0);
17219        unsafe {
17220            _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
17221        }
17222        assert_eq_m256i(r, a);
17223    }
17224
17225    #[simd_test(enable = "avx512bw,avx512vl")]
17226    const fn test_mm_storeu_epi8() {
17227        let a = _mm_set1_epi8(9);
17228        let mut r = _mm_set1_epi32(0);
17229        unsafe {
17230            _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
17231        }
17232        assert_eq_m128i(r, a);
17233    }
17234
17235    #[simd_test(enable = "avx512bw")]
17236    const fn test_mm512_mask_loadu_epi16() {
17237        let src = _mm512_set1_epi16(42);
17238        let a = &[
17239            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17240            24, 25, 26, 27, 28, 29, 30, 31, 32,
17241        ];
17242        let p = a.as_ptr();
17243        let m = 0b10101010_11001100_11101000_11001010;
17244        let r = unsafe { _mm512_mask_loadu_epi16(src, m, black_box(p)) };
17245        let e = &[
17246            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17247            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17248        ];
17249        let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
17250        assert_eq_m512i(r, e);
17251    }
17252
17253    #[simd_test(enable = "avx512bw")]
17254    const fn test_mm512_maskz_loadu_epi16() {
17255        let a = &[
17256            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17257            24, 25, 26, 27, 28, 29, 30, 31, 32,
17258        ];
17259        let p = a.as_ptr();
17260        let m = 0b10101010_11001100_11101000_11001010;
17261        let r = unsafe { _mm512_maskz_loadu_epi16(m, black_box(p)) };
17262        let e = &[
17263            0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
17264            26, 0, 28, 0, 30, 0, 32,
17265        ];
17266        let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
17267        assert_eq_m512i(r, e);
17268    }
17269
17270    #[simd_test(enable = "avx512bw")]
17271    const fn test_mm512_mask_storeu_epi16() {
17272        let mut r = [42_i16; 32];
17273        let a = &[
17274            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17275            24, 25, 26, 27, 28, 29, 30, 31, 32,
17276        ];
17277        let a = unsafe { _mm512_loadu_epi16(a.as_ptr()) };
17278        let m = 0b10101010_11001100_11101000_11001010;
17279        unsafe {
17280            _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
17281        }
17282        let e = &[
17283            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17284            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17285        ];
17286        let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
17287        assert_eq_m512i(unsafe { _mm512_loadu_epi16(r.as_ptr()) }, e);
17288    }
17289
17290    #[simd_test(enable = "avx512bw")]
17291    const fn test_mm512_mask_loadu_epi8() {
17292        let src = _mm512_set1_epi8(42);
17293        let a = &[
17294            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17295            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17296            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17297        ];
17298        let p = a.as_ptr();
17299        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
17300        let r = unsafe { _mm512_mask_loadu_epi8(src, m, black_box(p)) };
17301        let e = &[
17302            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17303            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
17304            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
17305        ];
17306        let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
17307        assert_eq_m512i(r, e);
17308    }
17309
17310    #[simd_test(enable = "avx512bw")]
17311    const fn test_mm512_maskz_loadu_epi8() {
17312        let a = &[
17313            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17314            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17315            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17316        ];
17317        let p = a.as_ptr();
17318        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
17319        let r = unsafe { _mm512_maskz_loadu_epi8(m, black_box(p)) };
17320        let e = &[
17321            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
17322            26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49,
17323            50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0,
17324        ];
17325        let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
17326        assert_eq_m512i(r, e);
17327    }
17328
17329    #[simd_test(enable = "avx512bw")]
17330    const fn test_mm512_mask_storeu_epi8() {
17331        let mut r = [42_i8; 64];
17332        let a = &[
17333            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17334            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17335            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17336        ];
17337        let a = unsafe { _mm512_loadu_epi8(a.as_ptr()) };
17338        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
17339        unsafe {
17340            _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
17341        }
17342        let e = &[
17343            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17344            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
17345            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
17346        ];
17347        let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
17348        assert_eq_m512i(unsafe { _mm512_loadu_epi8(r.as_ptr()) }, e);
17349    }
17350
17351    #[simd_test(enable = "avx512bw,avx512vl")]
17352    const fn test_mm256_mask_loadu_epi16() {
17353        let src = _mm256_set1_epi16(42);
17354        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17355        let p = a.as_ptr();
17356        let m = 0b11101000_11001010;
17357        let r = unsafe { _mm256_mask_loadu_epi16(src, m, black_box(p)) };
17358        let e = &[
17359            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17360        ];
17361        let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
17362        assert_eq_m256i(r, e);
17363    }
17364
17365    #[simd_test(enable = "avx512bw,avx512vl")]
17366    const fn test_mm256_maskz_loadu_epi16() {
17367        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17368        let p = a.as_ptr();
17369        let m = 0b11101000_11001010;
17370        let r = unsafe { _mm256_maskz_loadu_epi16(m, black_box(p)) };
17371        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
17372        let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
17373        assert_eq_m256i(r, e);
17374    }
17375
17376    #[simd_test(enable = "avx512bw,avx512vl")]
17377    const fn test_mm256_mask_storeu_epi16() {
17378        let mut r = [42_i16; 16];
17379        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17380        let a = unsafe { _mm256_loadu_epi16(a.as_ptr()) };
17381        let m = 0b11101000_11001010;
17382        unsafe {
17383            _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
17384        }
17385        let e = &[
17386            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17387        ];
17388        let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
17389        assert_eq_m256i(unsafe { _mm256_loadu_epi16(r.as_ptr()) }, e);
17390    }
17391
17392    #[simd_test(enable = "avx512bw,avx512vl")]
17393    const fn test_mm256_mask_loadu_epi8() {
17394        let src = _mm256_set1_epi8(42);
17395        let a = &[
17396            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17397            24, 25, 26, 27, 28, 29, 30, 31, 32,
17398        ];
17399        let p = a.as_ptr();
17400        let m = 0b10101010_11001100_11101000_11001010;
17401        let r = unsafe { _mm256_mask_loadu_epi8(src, m, black_box(p)) };
17402        let e = &[
17403            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17404            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17405        ];
17406        let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
17407        assert_eq_m256i(r, e);
17408    }
17409
17410    #[simd_test(enable = "avx512bw,avx512vl")]
17411    const fn test_mm256_maskz_loadu_epi8() {
17412        let a = &[
17413            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17414            24, 25, 26, 27, 28, 29, 30, 31, 32,
17415        ];
17416        let p = a.as_ptr();
17417        let m = 0b10101010_11001100_11101000_11001010;
17418        let r = unsafe { _mm256_maskz_loadu_epi8(m, black_box(p)) };
17419        let e = &[
17420            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
17421            26, 0, 28, 0, 30, 0, 32,
17422        ];
17423        let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
17424        assert_eq_m256i(r, e);
17425    }
17426
17427    #[simd_test(enable = "avx512bw,avx512vl")]
17428    const fn test_mm256_mask_storeu_epi8() {
17429        let mut r = [42_i8; 32];
17430        let a = &[
17431            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17432            24, 25, 26, 27, 28, 29, 30, 31, 32,
17433        ];
17434        let a = unsafe { _mm256_loadu_epi8(a.as_ptr()) };
17435        let m = 0b10101010_11001100_11101000_11001010;
17436        unsafe {
17437            _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
17438        }
17439        let e = &[
17440            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17441            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17442        ];
17443        let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
17444        assert_eq_m256i(unsafe { _mm256_loadu_epi8(r.as_ptr()) }, e);
17445    }
17446
17447    #[simd_test(enable = "avx512bw,avx512vl")]
17448    const fn test_mm_mask_loadu_epi16() {
17449        let src = _mm_set1_epi16(42);
17450        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
17451        let p = a.as_ptr();
17452        let m = 0b11001010;
17453        let r = unsafe { _mm_mask_loadu_epi16(src, m, black_box(p)) };
17454        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
17455        let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
17456        assert_eq_m128i(r, e);
17457    }
17458
17459    #[simd_test(enable = "avx512bw,avx512vl")]
17460    const fn test_mm_maskz_loadu_epi16() {
17461        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
17462        let p = a.as_ptr();
17463        let m = 0b11001010;
17464        let r = unsafe { _mm_maskz_loadu_epi16(m, black_box(p)) };
17465        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8];
17466        let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
17467        assert_eq_m128i(r, e);
17468    }
17469
17470    #[simd_test(enable = "avx512bw,avx512vl")]
17471    const fn test_mm_mask_storeu_epi16() {
17472        let mut r = [42_i16; 8];
17473        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
17474        let a = unsafe { _mm_loadu_epi16(a.as_ptr()) };
17475        let m = 0b11001010;
17476        unsafe { _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a) };
17477        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
17478        let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
17479        assert_eq_m128i(unsafe { _mm_loadu_epi16(r.as_ptr()) }, e);
17480    }
17481
17482    #[simd_test(enable = "avx512bw,avx512vl")]
17483    const fn test_mm_mask_loadu_epi8() {
17484        let src = _mm_set1_epi8(42);
17485        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17486        let p = a.as_ptr();
17487        let m = 0b11101000_11001010;
17488        let r = unsafe { _mm_mask_loadu_epi8(src, m, black_box(p)) };
17489        let e = &[
17490            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17491        ];
17492        let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
17493        assert_eq_m128i(r, e);
17494    }
17495
17496    #[simd_test(enable = "avx512bw,avx512vl")]
17497    const fn test_mm_maskz_loadu_epi8() {
17498        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17499        let p = a.as_ptr();
17500        let m = 0b11101000_11001010;
17501        let r = unsafe { _mm_maskz_loadu_epi8(m, black_box(p)) };
17502        let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
17503        let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
17504        assert_eq_m128i(r, e);
17505    }
17506
17507    #[simd_test(enable = "avx512bw,avx512vl")]
17508    const fn test_mm_mask_storeu_epi8() {
17509        let mut r = [42_i8; 16];
17510        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17511        let a = unsafe { _mm_loadu_epi8(a.as_ptr()) };
17512        let m = 0b11101000_11001010;
17513        unsafe { _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a) };
17514        let e = &[
17515            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17516        ];
17517        let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
17518        assert_eq_m128i(unsafe { _mm_loadu_epi8(r.as_ptr()) }, e);
17519    }
17520
17521    #[simd_test(enable = "avx512bw")]
17522    fn test_mm512_madd_epi16() {
17523        let a = _mm512_set1_epi16(1);
17524        let b = _mm512_set1_epi16(1);
17525        let r = _mm512_madd_epi16(a, b);
17526        let e = _mm512_set1_epi32(2);
17527        assert_eq_m512i(r, e);
17528    }
17529
17530    #[simd_test(enable = "avx512bw")]
17531    fn test_mm512_mask_madd_epi16() {
17532        let a = _mm512_set1_epi16(1);
17533        let b = _mm512_set1_epi16(1);
17534        let r = _mm512_mask_madd_epi16(a, 0, a, b);
17535        assert_eq_m512i(r, a);
17536        let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b);
17537        let e = _mm512_set_epi32(
17538            1 << 16 | 1,
17539            1 << 16 | 1,
17540            1 << 16 | 1,
17541            1 << 16 | 1,
17542            1 << 16 | 1,
17543            1 << 16 | 1,
17544            1 << 16 | 1,
17545            1 << 16 | 1,
17546            1 << 16 | 1,
17547            1 << 16 | 1,
17548            1 << 16 | 1,
17549            1 << 16 | 1,
17550            2,
17551            2,
17552            2,
17553            2,
17554        );
17555        assert_eq_m512i(r, e);
17556    }
17557
17558    #[simd_test(enable = "avx512bw")]
17559    fn test_mm512_maskz_madd_epi16() {
17560        let a = _mm512_set1_epi16(1);
17561        let b = _mm512_set1_epi16(1);
17562        let r = _mm512_maskz_madd_epi16(0, a, b);
17563        assert_eq_m512i(r, _mm512_setzero_si512());
17564        let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b);
17565        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2);
17566        assert_eq_m512i(r, e);
17567    }
17568
17569    #[simd_test(enable = "avx512bw,avx512vl")]
17570    fn test_mm256_mask_madd_epi16() {
17571        let a = _mm256_set1_epi16(1);
17572        let b = _mm256_set1_epi16(1);
17573        let r = _mm256_mask_madd_epi16(a, 0, a, b);
17574        assert_eq_m256i(r, a);
17575        let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b);
17576        let e = _mm256_set_epi32(
17577            1 << 16 | 1,
17578            1 << 16 | 1,
17579            1 << 16 | 1,
17580            1 << 16 | 1,
17581            2,
17582            2,
17583            2,
17584            2,
17585        );
17586        assert_eq_m256i(r, e);
17587    }
17588
17589    #[simd_test(enable = "avx512bw,avx512vl")]
17590    fn test_mm256_maskz_madd_epi16() {
17591        let a = _mm256_set1_epi16(1);
17592        let b = _mm256_set1_epi16(1);
17593        let r = _mm256_maskz_madd_epi16(0, a, b);
17594        assert_eq_m256i(r, _mm256_setzero_si256());
17595        let r = _mm256_maskz_madd_epi16(0b00001111, a, b);
17596        let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2);
17597        assert_eq_m256i(r, e);
17598    }
17599
17600    #[simd_test(enable = "avx512bw,avx512vl")]
17601    fn test_mm_mask_madd_epi16() {
17602        let a = _mm_set1_epi16(1);
17603        let b = _mm_set1_epi16(1);
17604        let r = _mm_mask_madd_epi16(a, 0, a, b);
17605        assert_eq_m128i(r, a);
17606        let r = _mm_mask_madd_epi16(a, 0b00001111, a, b);
17607        let e = _mm_set_epi32(2, 2, 2, 2);
17608        assert_eq_m128i(r, e);
17609    }
17610
17611    #[simd_test(enable = "avx512bw,avx512vl")]
17612    fn test_mm_maskz_madd_epi16() {
17613        let a = _mm_set1_epi16(1);
17614        let b = _mm_set1_epi16(1);
17615        let r = _mm_maskz_madd_epi16(0, a, b);
17616        assert_eq_m128i(r, _mm_setzero_si128());
17617        let r = _mm_maskz_madd_epi16(0b00001111, a, b);
17618        let e = _mm_set_epi32(2, 2, 2, 2);
17619        assert_eq_m128i(r, e);
17620    }
17621
17622    #[simd_test(enable = "avx512bw")]
17623    fn test_mm512_maddubs_epi16() {
17624        let a = _mm512_set1_epi8(1);
17625        let b = _mm512_set1_epi8(1);
17626        let r = _mm512_maddubs_epi16(a, b);
17627        let e = _mm512_set1_epi16(2);
17628        assert_eq_m512i(r, e);
17629    }
17630
17631    #[simd_test(enable = "avx512bw")]
17632    fn test_mm512_mask_maddubs_epi16() {
17633        let a = _mm512_set1_epi8(1);
17634        let b = _mm512_set1_epi8(1);
17635        let src = _mm512_set1_epi16(1);
17636        let r = _mm512_mask_maddubs_epi16(src, 0, a, b);
17637        assert_eq_m512i(r, src);
17638        let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b);
17639        #[rustfmt::skip]
17640        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17641                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2);
17642        assert_eq_m512i(r, e);
17643    }
17644
17645    #[simd_test(enable = "avx512bw")]
17646    fn test_mm512_maskz_maddubs_epi16() {
17647        let a = _mm512_set1_epi8(1);
17648        let b = _mm512_set1_epi8(1);
17649        let r = _mm512_maskz_maddubs_epi16(0, a, b);
17650        assert_eq_m512i(r, _mm512_setzero_si512());
17651        let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b);
17652        #[rustfmt::skip]
17653        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
17654                                 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
17655        assert_eq_m512i(r, e);
17656    }
17657
17658    #[simd_test(enable = "avx512bw,avx512vl")]
17659    fn test_mm256_mask_maddubs_epi16() {
17660        let a = _mm256_set1_epi8(1);
17661        let b = _mm256_set1_epi8(1);
17662        let src = _mm256_set1_epi16(1);
17663        let r = _mm256_mask_maddubs_epi16(src, 0, a, b);
17664        assert_eq_m256i(r, src);
17665        let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b);
17666        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
17667        assert_eq_m256i(r, e);
17668    }
17669
17670    #[simd_test(enable = "avx512bw,avx512vl")]
17671    fn test_mm256_maskz_maddubs_epi16() {
17672        let a = _mm256_set1_epi8(1);
17673        let b = _mm256_set1_epi8(1);
17674        let r = _mm256_maskz_maddubs_epi16(0, a, b);
17675        assert_eq_m256i(r, _mm256_setzero_si256());
17676        let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b);
17677        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
17678        assert_eq_m256i(r, e);
17679    }
17680
17681    #[simd_test(enable = "avx512bw,avx512vl")]
17682    fn test_mm_mask_maddubs_epi16() {
17683        let a = _mm_set1_epi8(1);
17684        let b = _mm_set1_epi8(1);
17685        let src = _mm_set1_epi16(1);
17686        let r = _mm_mask_maddubs_epi16(src, 0, a, b);
17687        assert_eq_m128i(r, src);
17688        let r = _mm_mask_add_epi16(src, 0b00000001, a, b);
17689        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
17690        assert_eq_m128i(r, e);
17691    }
17692
17693    #[simd_test(enable = "avx512bw,avx512vl")]
17694    fn test_mm_maskz_maddubs_epi16() {
17695        let a = _mm_set1_epi8(1);
17696        let b = _mm_set1_epi8(1);
17697        let r = _mm_maskz_maddubs_epi16(0, a, b);
17698        assert_eq_m128i(r, _mm_setzero_si128());
17699        let r = _mm_maskz_maddubs_epi16(0b00001111, a, b);
17700        let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2);
17701        assert_eq_m128i(r, e);
17702    }
17703
17704    #[simd_test(enable = "avx512bw")]
17705    fn test_mm512_packs_epi32() {
17706        let a = _mm512_set1_epi32(i32::MAX);
17707        let b = _mm512_set1_epi32(1);
17708        let r = _mm512_packs_epi32(a, b);
17709        #[rustfmt::skip]
17710        let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX,
17711                                 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17712        assert_eq_m512i(r, e);
17713    }
17714
17715    #[simd_test(enable = "avx512bw")]
17716    fn test_mm512_mask_packs_epi32() {
17717        let a = _mm512_set1_epi32(i32::MAX);
17718        let b = _mm512_set1_epi32(1 << 16 | 1);
17719        let r = _mm512_mask_packs_epi32(a, 0, a, b);
17720        assert_eq_m512i(r, a);
17721        let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
17722        #[rustfmt::skip]
17723        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17724                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17725        assert_eq_m512i(r, e);
17726    }
17727
17728    #[simd_test(enable = "avx512bw")]
17729    fn test_mm512_maskz_packs_epi32() {
17730        let a = _mm512_set1_epi32(i32::MAX);
17731        let b = _mm512_set1_epi32(1);
17732        let r = _mm512_maskz_packs_epi32(0, a, b);
17733        assert_eq_m512i(r, _mm512_setzero_si512());
17734        let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b);
17735        #[rustfmt::skip]
17736        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17737                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17738        assert_eq_m512i(r, e);
17739    }
17740
17741    #[simd_test(enable = "avx512bw,avx512vl")]
17742    fn test_mm256_mask_packs_epi32() {
17743        let a = _mm256_set1_epi32(i32::MAX);
17744        let b = _mm256_set1_epi32(1 << 16 | 1);
17745        let r = _mm256_mask_packs_epi32(a, 0, a, b);
17746        assert_eq_m256i(r, a);
17747        let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b);
17748        #[rustfmt::skip]
17749        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17750        assert_eq_m256i(r, e);
17751    }
17752
17753    #[simd_test(enable = "avx512bw,avx512vl")]
17754    fn test_mm256_maskz_packs_epi32() {
17755        let a = _mm256_set1_epi32(i32::MAX);
17756        let b = _mm256_set1_epi32(1);
17757        let r = _mm256_maskz_packs_epi32(0, a, b);
17758        assert_eq_m256i(r, _mm256_setzero_si256());
17759        let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b);
17760        #[rustfmt::skip]
17761        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17762        assert_eq_m256i(r, e);
17763    }
17764
17765    #[simd_test(enable = "avx512bw,avx512vl")]
17766    fn test_mm_mask_packs_epi32() {
17767        let a = _mm_set1_epi32(i32::MAX);
17768        let b = _mm_set1_epi32(1 << 16 | 1);
17769        let r = _mm_mask_packs_epi32(a, 0, a, b);
17770        assert_eq_m128i(r, a);
17771        let r = _mm_mask_packs_epi32(b, 0b00001111, a, b);
17772        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17773        assert_eq_m128i(r, e);
17774    }
17775
17776    #[simd_test(enable = "avx512bw,avx512vl")]
17777    fn test_mm_maskz_packs_epi32() {
17778        let a = _mm_set1_epi32(i32::MAX);
17779        let b = _mm_set1_epi32(1);
17780        let r = _mm_maskz_packs_epi32(0, a, b);
17781        assert_eq_m128i(r, _mm_setzero_si128());
17782        let r = _mm_maskz_packs_epi32(0b00001111, a, b);
17783        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17784        assert_eq_m128i(r, e);
17785    }
17786
17787    #[simd_test(enable = "avx512bw")]
17788    fn test_mm512_packs_epi16() {
17789        let a = _mm512_set1_epi16(i16::MAX);
17790        let b = _mm512_set1_epi16(1);
17791        let r = _mm512_packs_epi16(a, b);
17792        #[rustfmt::skip]
17793        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17794                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17795                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17796                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17797        assert_eq_m512i(r, e);
17798    }
17799
17800    #[simd_test(enable = "avx512bw")]
17801    fn test_mm512_mask_packs_epi16() {
17802        let a = _mm512_set1_epi16(i16::MAX);
17803        let b = _mm512_set1_epi16(1 << 8 | 1);
17804        let r = _mm512_mask_packs_epi16(a, 0, a, b);
17805        assert_eq_m512i(r, a);
17806        let r = _mm512_mask_packs_epi16(
17807            b,
17808            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17809            a,
17810            b,
17811        );
17812        #[rustfmt::skip]
17813        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17814                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17815                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17816                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17817        assert_eq_m512i(r, e);
17818    }
17819
17820    #[simd_test(enable = "avx512bw")]
17821    fn test_mm512_maskz_packs_epi16() {
17822        let a = _mm512_set1_epi16(i16::MAX);
17823        let b = _mm512_set1_epi16(1);
17824        let r = _mm512_maskz_packs_epi16(0, a, b);
17825        assert_eq_m512i(r, _mm512_setzero_si512());
17826        let r = _mm512_maskz_packs_epi16(
17827            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17828            a,
17829            b,
17830        );
17831        #[rustfmt::skip]
17832        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17833                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17834                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17835                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17836        assert_eq_m512i(r, e);
17837    }
17838
17839    #[simd_test(enable = "avx512bw,avx512vl")]
17840    fn test_mm256_mask_packs_epi16() {
17841        let a = _mm256_set1_epi16(i16::MAX);
17842        let b = _mm256_set1_epi16(1 << 8 | 1);
17843        let r = _mm256_mask_packs_epi16(a, 0, a, b);
17844        assert_eq_m256i(r, a);
17845        let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
17846        #[rustfmt::skip]
17847        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17848                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17849        assert_eq_m256i(r, e);
17850    }
17851
17852    #[simd_test(enable = "avx512bw,avx512vl")]
17853    fn test_mm256_maskz_packs_epi16() {
17854        let a = _mm256_set1_epi16(i16::MAX);
17855        let b = _mm256_set1_epi16(1);
17856        let r = _mm256_maskz_packs_epi16(0, a, b);
17857        assert_eq_m256i(r, _mm256_setzero_si256());
17858        let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b);
17859        #[rustfmt::skip]
17860        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17861                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17862        assert_eq_m256i(r, e);
17863    }
17864
17865    #[simd_test(enable = "avx512bw,avx512vl")]
17866    fn test_mm_mask_packs_epi16() {
17867        let a = _mm_set1_epi16(i16::MAX);
17868        let b = _mm_set1_epi16(1 << 8 | 1);
17869        let r = _mm_mask_packs_epi16(a, 0, a, b);
17870        assert_eq_m128i(r, a);
17871        let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b);
17872        #[rustfmt::skip]
17873        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17874        assert_eq_m128i(r, e);
17875    }
17876
17877    #[simd_test(enable = "avx512bw,avx512vl")]
17878    fn test_mm_maskz_packs_epi16() {
17879        let a = _mm_set1_epi16(i16::MAX);
17880        let b = _mm_set1_epi16(1);
17881        let r = _mm_maskz_packs_epi16(0, a, b);
17882        assert_eq_m128i(r, _mm_setzero_si128());
17883        let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b);
17884        #[rustfmt::skip]
17885        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17886        assert_eq_m128i(r, e);
17887    }
17888
17889    #[simd_test(enable = "avx512bw")]
17890    fn test_mm512_packus_epi32() {
17891        let a = _mm512_set1_epi32(-1);
17892        let b = _mm512_set1_epi32(1);
17893        let r = _mm512_packus_epi32(a, b);
17894        #[rustfmt::skip]
17895        let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
17896                                 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0);
17897        assert_eq_m512i(r, e);
17898    }
17899
17900    #[simd_test(enable = "avx512bw")]
17901    fn test_mm512_mask_packus_epi32() {
17902        let a = _mm512_set1_epi32(-1);
17903        let b = _mm512_set1_epi32(1 << 16 | 1);
17904        let r = _mm512_mask_packus_epi32(a, 0, a, b);
17905        assert_eq_m512i(r, a);
17906        let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
17907        #[rustfmt::skip]
17908        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17909                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17910        assert_eq_m512i(r, e);
17911    }
17912
17913    #[simd_test(enable = "avx512bw")]
17914    fn test_mm512_maskz_packus_epi32() {
17915        let a = _mm512_set1_epi32(-1);
17916        let b = _mm512_set1_epi32(1);
17917        let r = _mm512_maskz_packus_epi32(0, a, b);
17918        assert_eq_m512i(r, _mm512_setzero_si512());
17919        let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b);
17920        #[rustfmt::skip]
17921        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17922                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17923        assert_eq_m512i(r, e);
17924    }
17925
17926    #[simd_test(enable = "avx512bw,avx512vl")]
17927    fn test_mm256_mask_packus_epi32() {
17928        let a = _mm256_set1_epi32(-1);
17929        let b = _mm256_set1_epi32(1 << 16 | 1);
17930        let r = _mm256_mask_packus_epi32(a, 0, a, b);
17931        assert_eq_m256i(r, a);
17932        let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b);
17933        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17934        assert_eq_m256i(r, e);
17935    }
17936
17937    #[simd_test(enable = "avx512bw,avx512vl")]
17938    fn test_mm256_maskz_packus_epi32() {
17939        let a = _mm256_set1_epi32(-1);
17940        let b = _mm256_set1_epi32(1);
17941        let r = _mm256_maskz_packus_epi32(0, a, b);
17942        assert_eq_m256i(r, _mm256_setzero_si256());
17943        let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b);
17944        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17945        assert_eq_m256i(r, e);
17946    }
17947
17948    #[simd_test(enable = "avx512bw,avx512vl")]
17949    fn test_mm_mask_packus_epi32() {
17950        let a = _mm_set1_epi32(-1);
17951        let b = _mm_set1_epi32(1 << 16 | 1);
17952        let r = _mm_mask_packus_epi32(a, 0, a, b);
17953        assert_eq_m128i(r, a);
17954        let r = _mm_mask_packus_epi32(b, 0b00001111, a, b);
17955        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
17956        assert_eq_m128i(r, e);
17957    }
17958
17959    #[simd_test(enable = "avx512bw,avx512vl")]
17960    fn test_mm_maskz_packus_epi32() {
17961        let a = _mm_set1_epi32(-1);
17962        let b = _mm_set1_epi32(1);
17963        let r = _mm_maskz_packus_epi32(0, a, b);
17964        assert_eq_m128i(r, _mm_setzero_si128());
17965        let r = _mm_maskz_packus_epi32(0b00001111, a, b);
17966        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
17967        assert_eq_m128i(r, e);
17968    }
17969
17970    #[simd_test(enable = "avx512bw")]
17971    fn test_mm512_packus_epi16() {
17972        let a = _mm512_set1_epi16(-1);
17973        let b = _mm512_set1_epi16(1);
17974        let r = _mm512_packus_epi16(a, b);
17975        #[rustfmt::skip]
17976        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17977                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17978                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17979                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
17980        assert_eq_m512i(r, e);
17981    }
17982
17983    #[simd_test(enable = "avx512bw")]
17984    fn test_mm512_mask_packus_epi16() {
17985        let a = _mm512_set1_epi16(-1);
17986        let b = _mm512_set1_epi16(1 << 8 | 1);
17987        let r = _mm512_mask_packus_epi16(a, 0, a, b);
17988        assert_eq_m512i(r, a);
17989        let r = _mm512_mask_packus_epi16(
17990            b,
17991            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17992            a,
17993            b,
17994        );
17995        #[rustfmt::skip]
17996        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17997                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17998                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17999                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
18000        assert_eq_m512i(r, e);
18001    }
18002
18003    #[simd_test(enable = "avx512bw")]
18004    fn test_mm512_maskz_packus_epi16() {
18005        let a = _mm512_set1_epi16(-1);
18006        let b = _mm512_set1_epi16(1);
18007        let r = _mm512_maskz_packus_epi16(0, a, b);
18008        assert_eq_m512i(r, _mm512_setzero_si512());
18009        let r = _mm512_maskz_packus_epi16(
18010            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
18011            a,
18012            b,
18013        );
18014        #[rustfmt::skip]
18015        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18016                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18017                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18018                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
18019        assert_eq_m512i(r, e);
18020    }
18021
18022    #[simd_test(enable = "avx512bw,avx512vl")]
18023    fn test_mm256_mask_packus_epi16() {
18024        let a = _mm256_set1_epi16(-1);
18025        let b = _mm256_set1_epi16(1 << 8 | 1);
18026        let r = _mm256_mask_packus_epi16(a, 0, a, b);
18027        assert_eq_m256i(r, a);
18028        let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
18029        #[rustfmt::skip]
18030        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18031                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
18032        assert_eq_m256i(r, e);
18033    }
18034
18035    #[simd_test(enable = "avx512bw,avx512vl")]
18036    fn test_mm256_maskz_packus_epi16() {
18037        let a = _mm256_set1_epi16(-1);
18038        let b = _mm256_set1_epi16(1);
18039        let r = _mm256_maskz_packus_epi16(0, a, b);
18040        assert_eq_m256i(r, _mm256_setzero_si256());
18041        let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b);
18042        #[rustfmt::skip]
18043        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18044                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
18045        assert_eq_m256i(r, e);
18046    }
18047
18048    #[simd_test(enable = "avx512bw,avx512vl")]
18049    fn test_mm_mask_packus_epi16() {
18050        let a = _mm_set1_epi16(-1);
18051        let b = _mm_set1_epi16(1 << 8 | 1);
18052        let r = _mm_mask_packus_epi16(a, 0, a, b);
18053        assert_eq_m128i(r, a);
18054        let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b);
18055        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
18056        assert_eq_m128i(r, e);
18057    }
18058
18059    #[simd_test(enable = "avx512bw,avx512vl")]
18060    fn test_mm_maskz_packus_epi16() {
18061        let a = _mm_set1_epi16(-1);
18062        let b = _mm_set1_epi16(1);
18063        let r = _mm_maskz_packus_epi16(0, a, b);
18064        assert_eq_m128i(r, _mm_setzero_si128());
18065        let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b);
18066        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
18067        assert_eq_m128i(r, e);
18068    }
18069
18070    #[simd_test(enable = "avx512bw")]
18071    const fn test_mm512_avg_epu16() {
18072        let a = _mm512_set1_epi16(1);
18073        let b = _mm512_set1_epi16(1);
18074        let r = _mm512_avg_epu16(a, b);
18075        let e = _mm512_set1_epi16(1);
18076        assert_eq_m512i(r, e);
18077    }
18078
18079    #[simd_test(enable = "avx512bw")]
18080    const fn test_mm512_mask_avg_epu16() {
18081        let a = _mm512_set1_epi16(1);
18082        let b = _mm512_set1_epi16(1);
18083        let r = _mm512_mask_avg_epu16(a, 0, a, b);
18084        assert_eq_m512i(r, a);
18085        let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
18086        #[rustfmt::skip]
18087        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18088                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18089        assert_eq_m512i(r, e);
18090    }
18091
18092    #[simd_test(enable = "avx512bw")]
18093    const fn test_mm512_maskz_avg_epu16() {
18094        let a = _mm512_set1_epi16(1);
18095        let b = _mm512_set1_epi16(1);
18096        let r = _mm512_maskz_avg_epu16(0, a, b);
18097        assert_eq_m512i(r, _mm512_setzero_si512());
18098        let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b);
18099        #[rustfmt::skip]
18100        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18101                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18102        assert_eq_m512i(r, e);
18103    }
18104
18105    #[simd_test(enable = "avx512bw,avx512vl")]
18106    const fn test_mm256_mask_avg_epu16() {
18107        let a = _mm256_set1_epi16(1);
18108        let b = _mm256_set1_epi16(1);
18109        let r = _mm256_mask_avg_epu16(a, 0, a, b);
18110        assert_eq_m256i(r, a);
18111        let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b);
18112        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18113        assert_eq_m256i(r, e);
18114    }
18115
18116    #[simd_test(enable = "avx512bw,avx512vl")]
18117    const fn test_mm256_maskz_avg_epu16() {
18118        let a = _mm256_set1_epi16(1);
18119        let b = _mm256_set1_epi16(1);
18120        let r = _mm256_maskz_avg_epu16(0, a, b);
18121        assert_eq_m256i(r, _mm256_setzero_si256());
18122        let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b);
18123        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18124        assert_eq_m256i(r, e);
18125    }
18126
18127    #[simd_test(enable = "avx512bw,avx512vl")]
18128    const fn test_mm_mask_avg_epu16() {
18129        let a = _mm_set1_epi16(1);
18130        let b = _mm_set1_epi16(1);
18131        let r = _mm_mask_avg_epu16(a, 0, a, b);
18132        assert_eq_m128i(r, a);
18133        let r = _mm_mask_avg_epu16(a, 0b00001111, a, b);
18134        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
18135        assert_eq_m128i(r, e);
18136    }
18137
18138    #[simd_test(enable = "avx512bw,avx512vl")]
18139    const fn test_mm_maskz_avg_epu16() {
18140        let a = _mm_set1_epi16(1);
18141        let b = _mm_set1_epi16(1);
18142        let r = _mm_maskz_avg_epu16(0, a, b);
18143        assert_eq_m128i(r, _mm_setzero_si128());
18144        let r = _mm_maskz_avg_epu16(0b00001111, a, b);
18145        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
18146        assert_eq_m128i(r, e);
18147    }
18148
18149    #[simd_test(enable = "avx512bw")]
18150    const fn test_mm512_avg_epu8() {
18151        let a = _mm512_set1_epi8(1);
18152        let b = _mm512_set1_epi8(1);
18153        let r = _mm512_avg_epu8(a, b);
18154        let e = _mm512_set1_epi8(1);
18155        assert_eq_m512i(r, e);
18156    }
18157
18158    #[simd_test(enable = "avx512bw")]
18159    const fn test_mm512_mask_avg_epu8() {
18160        let a = _mm512_set1_epi8(1);
18161        let b = _mm512_set1_epi8(1);
18162        let r = _mm512_mask_avg_epu8(a, 0, a, b);
18163        assert_eq_m512i(r, a);
18164        let r = _mm512_mask_avg_epu8(
18165            a,
18166            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
18167            a,
18168            b,
18169        );
18170        #[rustfmt::skip]
18171        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18172                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18173                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18174                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18175        assert_eq_m512i(r, e);
18176    }
18177
18178    #[simd_test(enable = "avx512bw")]
18179    const fn test_mm512_maskz_avg_epu8() {
18180        let a = _mm512_set1_epi8(1);
18181        let b = _mm512_set1_epi8(1);
18182        let r = _mm512_maskz_avg_epu8(0, a, b);
18183        assert_eq_m512i(r, _mm512_setzero_si512());
18184        let r = _mm512_maskz_avg_epu8(
18185            0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111,
18186            a,
18187            b,
18188        );
18189        #[rustfmt::skip]
18190        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18191                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18192                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18193                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18194        assert_eq_m512i(r, e);
18195    }
18196
18197    #[simd_test(enable = "avx512bw,avx512vl")]
18198    const fn test_mm256_mask_avg_epu8() {
18199        let a = _mm256_set1_epi8(1);
18200        let b = _mm256_set1_epi8(1);
18201        let r = _mm256_mask_avg_epu8(a, 0, a, b);
18202        assert_eq_m256i(r, a);
18203        let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
18204        #[rustfmt::skip]
18205        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18206                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18207        assert_eq_m256i(r, e);
18208    }
18209
18210    #[simd_test(enable = "avx512bw,avx512vl")]
18211    const fn test_mm256_maskz_avg_epu8() {
18212        let a = _mm256_set1_epi8(1);
18213        let b = _mm256_set1_epi8(1);
18214        let r = _mm256_maskz_avg_epu8(0, a, b);
18215        assert_eq_m256i(r, _mm256_setzero_si256());
18216        let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b);
18217        #[rustfmt::skip]
18218        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18219                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18220        assert_eq_m256i(r, e);
18221    }
18222
18223    #[simd_test(enable = "avx512bw,avx512vl")]
18224    const fn test_mm_mask_avg_epu8() {
18225        let a = _mm_set1_epi8(1);
18226        let b = _mm_set1_epi8(1);
18227        let r = _mm_mask_avg_epu8(a, 0, a, b);
18228        assert_eq_m128i(r, a);
18229        let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b);
18230        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18231        assert_eq_m128i(r, e);
18232    }
18233
18234    #[simd_test(enable = "avx512bw,avx512vl")]
18235    const fn test_mm_maskz_avg_epu8() {
18236        let a = _mm_set1_epi8(1);
18237        let b = _mm_set1_epi8(1);
18238        let r = _mm_maskz_avg_epu8(0, a, b);
18239        assert_eq_m128i(r, _mm_setzero_si128());
18240        let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b);
18241        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18242        assert_eq_m128i(r, e);
18243    }
18244
18245    #[simd_test(enable = "avx512bw")]
18246    fn test_mm512_sll_epi16() {
18247        let a = _mm512_set1_epi16(1 << 15);
18248        let count = _mm_set1_epi16(2);
18249        let r = _mm512_sll_epi16(a, count);
18250        let e = _mm512_set1_epi16(0);
18251        assert_eq_m512i(r, e);
18252    }
18253
18254    #[simd_test(enable = "avx512bw")]
18255    fn test_mm512_mask_sll_epi16() {
18256        let a = _mm512_set1_epi16(1 << 15);
18257        let count = _mm_set1_epi16(2);
18258        let r = _mm512_mask_sll_epi16(a, 0, a, count);
18259        assert_eq_m512i(r, a);
18260        let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18261        let e = _mm512_set1_epi16(0);
18262        assert_eq_m512i(r, e);
18263    }
18264
18265    #[simd_test(enable = "avx512bw")]
18266    fn test_mm512_maskz_sll_epi16() {
18267        let a = _mm512_set1_epi16(1 << 15);
18268        let count = _mm_set1_epi16(2);
18269        let r = _mm512_maskz_sll_epi16(0, a, count);
18270        assert_eq_m512i(r, _mm512_setzero_si512());
18271        let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count);
18272        let e = _mm512_set1_epi16(0);
18273        assert_eq_m512i(r, e);
18274    }
18275
18276    #[simd_test(enable = "avx512bw,avx512vl")]
18277    fn test_mm256_mask_sll_epi16() {
18278        let a = _mm256_set1_epi16(1 << 15);
18279        let count = _mm_set1_epi16(2);
18280        let r = _mm256_mask_sll_epi16(a, 0, a, count);
18281        assert_eq_m256i(r, a);
18282        let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count);
18283        let e = _mm256_set1_epi16(0);
18284        assert_eq_m256i(r, e);
18285    }
18286
18287    #[simd_test(enable = "avx512bw,avx512vl")]
18288    fn test_mm256_maskz_sll_epi16() {
18289        let a = _mm256_set1_epi16(1 << 15);
18290        let count = _mm_set1_epi16(2);
18291        let r = _mm256_maskz_sll_epi16(0, a, count);
18292        assert_eq_m256i(r, _mm256_setzero_si256());
18293        let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count);
18294        let e = _mm256_set1_epi16(0);
18295        assert_eq_m256i(r, e);
18296    }
18297
18298    #[simd_test(enable = "avx512bw,avx512vl")]
18299    fn test_mm_mask_sll_epi16() {
18300        let a = _mm_set1_epi16(1 << 15);
18301        let count = _mm_set1_epi16(2);
18302        let r = _mm_mask_sll_epi16(a, 0, a, count);
18303        assert_eq_m128i(r, a);
18304        let r = _mm_mask_sll_epi16(a, 0b11111111, a, count);
18305        let e = _mm_set1_epi16(0);
18306        assert_eq_m128i(r, e);
18307    }
18308
18309    #[simd_test(enable = "avx512bw,avx512vl")]
18310    fn test_mm_maskz_sll_epi16() {
18311        let a = _mm_set1_epi16(1 << 15);
18312        let count = _mm_set1_epi16(2);
18313        let r = _mm_maskz_sll_epi16(0, a, count);
18314        assert_eq_m128i(r, _mm_setzero_si128());
18315        let r = _mm_maskz_sll_epi16(0b11111111, a, count);
18316        let e = _mm_set1_epi16(0);
18317        assert_eq_m128i(r, e);
18318    }
18319
18320    #[simd_test(enable = "avx512bw")]
18321    const fn test_mm512_slli_epi16() {
18322        let a = _mm512_set1_epi16(1 << 15);
18323        let r = _mm512_slli_epi16::<1>(a);
18324        let e = _mm512_set1_epi16(0);
18325        assert_eq_m512i(r, e);
18326    }
18327
18328    #[simd_test(enable = "avx512bw")]
18329    const fn test_mm512_mask_slli_epi16() {
18330        let a = _mm512_set1_epi16(1 << 15);
18331        let r = _mm512_mask_slli_epi16::<1>(a, 0, a);
18332        assert_eq_m512i(r, a);
18333        let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a);
18334        let e = _mm512_set1_epi16(0);
18335        assert_eq_m512i(r, e);
18336    }
18337
18338    #[simd_test(enable = "avx512bw")]
18339    const fn test_mm512_maskz_slli_epi16() {
18340        let a = _mm512_set1_epi16(1 << 15);
18341        let r = _mm512_maskz_slli_epi16::<1>(0, a);
18342        assert_eq_m512i(r, _mm512_setzero_si512());
18343        let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a);
18344        let e = _mm512_set1_epi16(0);
18345        assert_eq_m512i(r, e);
18346    }
18347
18348    #[simd_test(enable = "avx512bw,avx512vl")]
18349    const fn test_mm256_mask_slli_epi16() {
18350        let a = _mm256_set1_epi16(1 << 15);
18351        let r = _mm256_mask_slli_epi16::<1>(a, 0, a);
18352        assert_eq_m256i(r, a);
18353        let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a);
18354        let e = _mm256_set1_epi16(0);
18355        assert_eq_m256i(r, e);
18356    }
18357
18358    #[simd_test(enable = "avx512bw,avx512vl")]
18359    const fn test_mm256_maskz_slli_epi16() {
18360        let a = _mm256_set1_epi16(1 << 15);
18361        let r = _mm256_maskz_slli_epi16::<1>(0, a);
18362        assert_eq_m256i(r, _mm256_setzero_si256());
18363        let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a);
18364        let e = _mm256_set1_epi16(0);
18365        assert_eq_m256i(r, e);
18366    }
18367
18368    #[simd_test(enable = "avx512bw,avx512vl")]
18369    const fn test_mm_mask_slli_epi16() {
18370        let a = _mm_set1_epi16(1 << 15);
18371        let r = _mm_mask_slli_epi16::<1>(a, 0, a);
18372        assert_eq_m128i(r, a);
18373        let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a);
18374        let e = _mm_set1_epi16(0);
18375        assert_eq_m128i(r, e);
18376    }
18377
18378    #[simd_test(enable = "avx512bw,avx512vl")]
18379    const fn test_mm_maskz_slli_epi16() {
18380        let a = _mm_set1_epi16(1 << 15);
18381        let r = _mm_maskz_slli_epi16::<1>(0, a);
18382        assert_eq_m128i(r, _mm_setzero_si128());
18383        let r = _mm_maskz_slli_epi16::<1>(0b11111111, a);
18384        let e = _mm_set1_epi16(0);
18385        assert_eq_m128i(r, e);
18386    }
18387
18388    #[simd_test(enable = "avx512bw")]
18389    const fn test_mm512_sllv_epi16() {
18390        let a = _mm512_set1_epi16(1 << 15);
18391        let count = _mm512_set1_epi16(2);
18392        let r = _mm512_sllv_epi16(a, count);
18393        let e = _mm512_set1_epi16(0);
18394        assert_eq_m512i(r, e);
18395    }
18396
18397    #[simd_test(enable = "avx512bw")]
18398    const fn test_mm512_mask_sllv_epi16() {
18399        let a = _mm512_set1_epi16(1 << 15);
18400        let count = _mm512_set1_epi16(2);
18401        let r = _mm512_mask_sllv_epi16(a, 0, a, count);
18402        assert_eq_m512i(r, a);
18403        let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18404        let e = _mm512_set1_epi16(0);
18405        assert_eq_m512i(r, e);
18406    }
18407
18408    #[simd_test(enable = "avx512bw")]
18409    const fn test_mm512_maskz_sllv_epi16() {
18410        let a = _mm512_set1_epi16(1 << 15);
18411        let count = _mm512_set1_epi16(2);
18412        let r = _mm512_maskz_sllv_epi16(0, a, count);
18413        assert_eq_m512i(r, _mm512_setzero_si512());
18414        let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count);
18415        let e = _mm512_set1_epi16(0);
18416        assert_eq_m512i(r, e);
18417    }
18418
18419    #[simd_test(enable = "avx512bw,avx512vl")]
18420    const fn test_mm256_sllv_epi16() {
18421        let a = _mm256_set1_epi16(1 << 15);
18422        let count = _mm256_set1_epi16(2);
18423        let r = _mm256_sllv_epi16(a, count);
18424        let e = _mm256_set1_epi16(0);
18425        assert_eq_m256i(r, e);
18426    }
18427
18428    #[simd_test(enable = "avx512bw,avx512vl")]
18429    const fn test_mm256_mask_sllv_epi16() {
18430        let a = _mm256_set1_epi16(1 << 15);
18431        let count = _mm256_set1_epi16(2);
18432        let r = _mm256_mask_sllv_epi16(a, 0, a, count);
18433        assert_eq_m256i(r, a);
18434        let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count);
18435        let e = _mm256_set1_epi16(0);
18436        assert_eq_m256i(r, e);
18437    }
18438
18439    #[simd_test(enable = "avx512bw,avx512vl")]
18440    const fn test_mm256_maskz_sllv_epi16() {
18441        let a = _mm256_set1_epi16(1 << 15);
18442        let count = _mm256_set1_epi16(2);
18443        let r = _mm256_maskz_sllv_epi16(0, a, count);
18444        assert_eq_m256i(r, _mm256_setzero_si256());
18445        let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count);
18446        let e = _mm256_set1_epi16(0);
18447        assert_eq_m256i(r, e);
18448    }
18449
18450    #[simd_test(enable = "avx512bw,avx512vl")]
18451    const fn test_mm_sllv_epi16() {
18452        let a = _mm_set1_epi16(1 << 15);
18453        let count = _mm_set1_epi16(2);
18454        let r = _mm_sllv_epi16(a, count);
18455        let e = _mm_set1_epi16(0);
18456        assert_eq_m128i(r, e);
18457    }
18458
18459    #[simd_test(enable = "avx512bw,avx512vl")]
18460    const fn test_mm_mask_sllv_epi16() {
18461        let a = _mm_set1_epi16(1 << 15);
18462        let count = _mm_set1_epi16(2);
18463        let r = _mm_mask_sllv_epi16(a, 0, a, count);
18464        assert_eq_m128i(r, a);
18465        let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count);
18466        let e = _mm_set1_epi16(0);
18467        assert_eq_m128i(r, e);
18468    }
18469
18470    #[simd_test(enable = "avx512bw,avx512vl")]
18471    const fn test_mm_maskz_sllv_epi16() {
18472        let a = _mm_set1_epi16(1 << 15);
18473        let count = _mm_set1_epi16(2);
18474        let r = _mm_maskz_sllv_epi16(0, a, count);
18475        assert_eq_m128i(r, _mm_setzero_si128());
18476        let r = _mm_maskz_sllv_epi16(0b11111111, a, count);
18477        let e = _mm_set1_epi16(0);
18478        assert_eq_m128i(r, e);
18479    }
18480
18481    #[simd_test(enable = "avx512bw")]
18482    fn test_mm512_srl_epi16() {
18483        let a = _mm512_set1_epi16(1 << 1);
18484        let count = _mm_set1_epi16(2);
18485        let r = _mm512_srl_epi16(a, count);
18486        let e = _mm512_set1_epi16(0);
18487        assert_eq_m512i(r, e);
18488    }
18489
18490    #[simd_test(enable = "avx512bw")]
18491    fn test_mm512_mask_srl_epi16() {
18492        let a = _mm512_set1_epi16(1 << 1);
18493        let count = _mm_set1_epi16(2);
18494        let r = _mm512_mask_srl_epi16(a, 0, a, count);
18495        assert_eq_m512i(r, a);
18496        let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18497        let e = _mm512_set1_epi16(0);
18498        assert_eq_m512i(r, e);
18499    }
18500
18501    #[simd_test(enable = "avx512bw")]
18502    fn test_mm512_maskz_srl_epi16() {
18503        let a = _mm512_set1_epi16(1 << 1);
18504        let count = _mm_set1_epi16(2);
18505        let r = _mm512_maskz_srl_epi16(0, a, count);
18506        assert_eq_m512i(r, _mm512_setzero_si512());
18507        let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count);
18508        let e = _mm512_set1_epi16(0);
18509        assert_eq_m512i(r, e);
18510    }
18511
18512    #[simd_test(enable = "avx512bw,avx512vl")]
18513    fn test_mm256_mask_srl_epi16() {
18514        let a = _mm256_set1_epi16(1 << 1);
18515        let count = _mm_set1_epi16(2);
18516        let r = _mm256_mask_srl_epi16(a, 0, a, count);
18517        assert_eq_m256i(r, a);
18518        let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count);
18519        let e = _mm256_set1_epi16(0);
18520        assert_eq_m256i(r, e);
18521    }
18522
18523    #[simd_test(enable = "avx512bw,avx512vl")]
18524    fn test_mm256_maskz_srl_epi16() {
18525        let a = _mm256_set1_epi16(1 << 1);
18526        let count = _mm_set1_epi16(2);
18527        let r = _mm256_maskz_srl_epi16(0, a, count);
18528        assert_eq_m256i(r, _mm256_setzero_si256());
18529        let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count);
18530        let e = _mm256_set1_epi16(0);
18531        assert_eq_m256i(r, e);
18532    }
18533
18534    #[simd_test(enable = "avx512bw,avx512vl")]
18535    fn test_mm_mask_srl_epi16() {
18536        let a = _mm_set1_epi16(1 << 1);
18537        let count = _mm_set1_epi16(2);
18538        let r = _mm_mask_srl_epi16(a, 0, a, count);
18539        assert_eq_m128i(r, a);
18540        let r = _mm_mask_srl_epi16(a, 0b11111111, a, count);
18541        let e = _mm_set1_epi16(0);
18542        assert_eq_m128i(r, e);
18543    }
18544
18545    #[simd_test(enable = "avx512bw,avx512vl")]
18546    fn test_mm_maskz_srl_epi16() {
18547        let a = _mm_set1_epi16(1 << 1);
18548        let count = _mm_set1_epi16(2);
18549        let r = _mm_maskz_srl_epi16(0, a, count);
18550        assert_eq_m128i(r, _mm_setzero_si128());
18551        let r = _mm_maskz_srl_epi16(0b11111111, a, count);
18552        let e = _mm_set1_epi16(0);
18553        assert_eq_m128i(r, e);
18554    }
18555
18556    #[simd_test(enable = "avx512bw")]
18557    const fn test_mm512_srli_epi16() {
18558        let a = _mm512_set1_epi16(1 << 1);
18559        let r = _mm512_srli_epi16::<2>(a);
18560        let e = _mm512_set1_epi16(0);
18561        assert_eq_m512i(r, e);
18562    }
18563
18564    #[simd_test(enable = "avx512bw")]
18565    const fn test_mm512_mask_srli_epi16() {
18566        let a = _mm512_set1_epi16(1 << 1);
18567        let r = _mm512_mask_srli_epi16::<2>(a, 0, a);
18568        assert_eq_m512i(r, a);
18569        let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
18570        let e = _mm512_set1_epi16(0);
18571        assert_eq_m512i(r, e);
18572    }
18573
18574    #[simd_test(enable = "avx512bw")]
18575    const fn test_mm512_maskz_srli_epi16() {
18576        let a = _mm512_set1_epi16(1 << 1);
18577        let r = _mm512_maskz_srli_epi16::<2>(0, a);
18578        assert_eq_m512i(r, _mm512_setzero_si512());
18579        let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
18580        let e = _mm512_set1_epi16(0);
18581        assert_eq_m512i(r, e);
18582    }
18583
18584    #[simd_test(enable = "avx512bw,avx512vl")]
18585    const fn test_mm256_mask_srli_epi16() {
18586        let a = _mm256_set1_epi16(1 << 1);
18587        let r = _mm256_mask_srli_epi16::<2>(a, 0, a);
18588        assert_eq_m256i(r, a);
18589        let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a);
18590        let e = _mm256_set1_epi16(0);
18591        assert_eq_m256i(r, e);
18592    }
18593
18594    #[simd_test(enable = "avx512bw,avx512vl")]
18595    const fn test_mm256_maskz_srli_epi16() {
18596        let a = _mm256_set1_epi16(1 << 1);
18597        let r = _mm256_maskz_srli_epi16::<2>(0, a);
18598        assert_eq_m256i(r, _mm256_setzero_si256());
18599        let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a);
18600        let e = _mm256_set1_epi16(0);
18601        assert_eq_m256i(r, e);
18602    }
18603
18604    #[simd_test(enable = "avx512bw,avx512vl")]
18605    const fn test_mm_mask_srli_epi16() {
18606        let a = _mm_set1_epi16(1 << 1);
18607        let r = _mm_mask_srli_epi16::<2>(a, 0, a);
18608        assert_eq_m128i(r, a);
18609        let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a);
18610        let e = _mm_set1_epi16(0);
18611        assert_eq_m128i(r, e);
18612    }
18613
18614    #[simd_test(enable = "avx512bw,avx512vl")]
18615    const fn test_mm_maskz_srli_epi16() {
18616        let a = _mm_set1_epi16(1 << 1);
18617        let r = _mm_maskz_srli_epi16::<2>(0, a);
18618        assert_eq_m128i(r, _mm_setzero_si128());
18619        let r = _mm_maskz_srli_epi16::<2>(0b11111111, a);
18620        let e = _mm_set1_epi16(0);
18621        assert_eq_m128i(r, e);
18622    }
18623
18624    #[simd_test(enable = "avx512bw")]
18625    const fn test_mm512_srlv_epi16() {
18626        let a = _mm512_set1_epi16(1 << 1);
18627        let count = _mm512_set1_epi16(2);
18628        let r = _mm512_srlv_epi16(a, count);
18629        let e = _mm512_set1_epi16(0);
18630        assert_eq_m512i(r, e);
18631    }
18632
18633    #[simd_test(enable = "avx512bw")]
18634    const fn test_mm512_mask_srlv_epi16() {
18635        let a = _mm512_set1_epi16(1 << 1);
18636        let count = _mm512_set1_epi16(2);
18637        let r = _mm512_mask_srlv_epi16(a, 0, a, count);
18638        assert_eq_m512i(r, a);
18639        let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18640        let e = _mm512_set1_epi16(0);
18641        assert_eq_m512i(r, e);
18642    }
18643
18644    #[simd_test(enable = "avx512bw")]
18645    const fn test_mm512_maskz_srlv_epi16() {
18646        let a = _mm512_set1_epi16(1 << 1);
18647        let count = _mm512_set1_epi16(2);
18648        let r = _mm512_maskz_srlv_epi16(0, a, count);
18649        assert_eq_m512i(r, _mm512_setzero_si512());
18650        let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count);
18651        let e = _mm512_set1_epi16(0);
18652        assert_eq_m512i(r, e);
18653    }
18654
18655    #[simd_test(enable = "avx512bw,avx512vl")]
18656    const fn test_mm256_srlv_epi16() {
18657        let a = _mm256_set1_epi16(1 << 1);
18658        let count = _mm256_set1_epi16(2);
18659        let r = _mm256_srlv_epi16(a, count);
18660        let e = _mm256_set1_epi16(0);
18661        assert_eq_m256i(r, e);
18662    }
18663
18664    #[simd_test(enable = "avx512bw,avx512vl")]
18665    const fn test_mm256_mask_srlv_epi16() {
18666        let a = _mm256_set1_epi16(1 << 1);
18667        let count = _mm256_set1_epi16(2);
18668        let r = _mm256_mask_srlv_epi16(a, 0, a, count);
18669        assert_eq_m256i(r, a);
18670        let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count);
18671        let e = _mm256_set1_epi16(0);
18672        assert_eq_m256i(r, e);
18673    }
18674
18675    #[simd_test(enable = "avx512bw,avx512vl")]
18676    const fn test_mm256_maskz_srlv_epi16() {
18677        let a = _mm256_set1_epi16(1 << 1);
18678        let count = _mm256_set1_epi16(2);
18679        let r = _mm256_maskz_srlv_epi16(0, a, count);
18680        assert_eq_m256i(r, _mm256_setzero_si256());
18681        let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count);
18682        let e = _mm256_set1_epi16(0);
18683        assert_eq_m256i(r, e);
18684    }
18685
18686    #[simd_test(enable = "avx512bw,avx512vl")]
18687    const fn test_mm_srlv_epi16() {
18688        let a = _mm_set1_epi16(1 << 1);
18689        let count = _mm_set1_epi16(2);
18690        let r = _mm_srlv_epi16(a, count);
18691        let e = _mm_set1_epi16(0);
18692        assert_eq_m128i(r, e);
18693    }
18694
18695    #[simd_test(enable = "avx512bw,avx512vl")]
18696    const fn test_mm_mask_srlv_epi16() {
18697        let a = _mm_set1_epi16(1 << 1);
18698        let count = _mm_set1_epi16(2);
18699        let r = _mm_mask_srlv_epi16(a, 0, a, count);
18700        assert_eq_m128i(r, a);
18701        let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count);
18702        let e = _mm_set1_epi16(0);
18703        assert_eq_m128i(r, e);
18704    }
18705
18706    #[simd_test(enable = "avx512bw,avx512vl")]
18707    const fn test_mm_maskz_srlv_epi16() {
18708        let a = _mm_set1_epi16(1 << 1);
18709        let count = _mm_set1_epi16(2);
18710        let r = _mm_maskz_srlv_epi16(0, a, count);
18711        assert_eq_m128i(r, _mm_setzero_si128());
18712        let r = _mm_maskz_srlv_epi16(0b11111111, a, count);
18713        let e = _mm_set1_epi16(0);
18714        assert_eq_m128i(r, e);
18715    }
18716
18717    #[simd_test(enable = "avx512bw")]
18718    fn test_mm512_sra_epi16() {
18719        let a = _mm512_set1_epi16(8);
18720        let count = _mm_set1_epi16(1);
18721        let r = _mm512_sra_epi16(a, count);
18722        let e = _mm512_set1_epi16(0);
18723        assert_eq_m512i(r, e);
18724    }
18725
18726    #[simd_test(enable = "avx512bw")]
18727    fn test_mm512_mask_sra_epi16() {
18728        let a = _mm512_set1_epi16(8);
18729        let count = _mm_set1_epi16(1);
18730        let r = _mm512_mask_sra_epi16(a, 0, a, count);
18731        assert_eq_m512i(r, a);
18732        let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18733        let e = _mm512_set1_epi16(0);
18734        assert_eq_m512i(r, e);
18735    }
18736
18737    #[simd_test(enable = "avx512bw")]
18738    fn test_mm512_maskz_sra_epi16() {
18739        let a = _mm512_set1_epi16(8);
18740        let count = _mm_set1_epi16(1);
18741        let r = _mm512_maskz_sra_epi16(0, a, count);
18742        assert_eq_m512i(r, _mm512_setzero_si512());
18743        let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count);
18744        let e = _mm512_set1_epi16(0);
18745        assert_eq_m512i(r, e);
18746    }
18747
18748    #[simd_test(enable = "avx512bw,avx512vl")]
18749    fn test_mm256_mask_sra_epi16() {
18750        let a = _mm256_set1_epi16(8);
18751        let count = _mm_set1_epi16(1);
18752        let r = _mm256_mask_sra_epi16(a, 0, a, count);
18753        assert_eq_m256i(r, a);
18754        let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count);
18755        let e = _mm256_set1_epi16(0);
18756        assert_eq_m256i(r, e);
18757    }
18758
18759    #[simd_test(enable = "avx512bw,avx512vl")]
18760    fn test_mm256_maskz_sra_epi16() {
18761        let a = _mm256_set1_epi16(8);
18762        let count = _mm_set1_epi16(1);
18763        let r = _mm256_maskz_sra_epi16(0, a, count);
18764        assert_eq_m256i(r, _mm256_setzero_si256());
18765        let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count);
18766        let e = _mm256_set1_epi16(0);
18767        assert_eq_m256i(r, e);
18768    }
18769
18770    #[simd_test(enable = "avx512bw,avx512vl")]
18771    fn test_mm_mask_sra_epi16() {
18772        let a = _mm_set1_epi16(8);
18773        let count = _mm_set1_epi16(1);
18774        let r = _mm_mask_sra_epi16(a, 0, a, count);
18775        assert_eq_m128i(r, a);
18776        let r = _mm_mask_sra_epi16(a, 0b11111111, a, count);
18777        let e = _mm_set1_epi16(0);
18778        assert_eq_m128i(r, e);
18779    }
18780
18781    #[simd_test(enable = "avx512bw,avx512vl")]
18782    fn test_mm_maskz_sra_epi16() {
18783        let a = _mm_set1_epi16(8);
18784        let count = _mm_set1_epi16(1);
18785        let r = _mm_maskz_sra_epi16(0, a, count);
18786        assert_eq_m128i(r, _mm_setzero_si128());
18787        let r = _mm_maskz_sra_epi16(0b11111111, a, count);
18788        let e = _mm_set1_epi16(0);
18789        assert_eq_m128i(r, e);
18790    }
18791
18792    #[simd_test(enable = "avx512bw")]
18793    const fn test_mm512_srai_epi16() {
18794        let a = _mm512_set1_epi16(8);
18795        let r = _mm512_srai_epi16::<2>(a);
18796        let e = _mm512_set1_epi16(2);
18797        assert_eq_m512i(r, e);
18798    }
18799
18800    #[simd_test(enable = "avx512bw")]
18801    const fn test_mm512_mask_srai_epi16() {
18802        let a = _mm512_set1_epi16(8);
18803        let r = _mm512_mask_srai_epi16::<2>(a, 0, a);
18804        assert_eq_m512i(r, a);
18805        let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
18806        let e = _mm512_set1_epi16(2);
18807        assert_eq_m512i(r, e);
18808    }
18809
18810    #[simd_test(enable = "avx512bw")]
18811    const fn test_mm512_maskz_srai_epi16() {
18812        let a = _mm512_set1_epi16(8);
18813        let r = _mm512_maskz_srai_epi16::<2>(0, a);
18814        assert_eq_m512i(r, _mm512_setzero_si512());
18815        let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
18816        let e = _mm512_set1_epi16(2);
18817        assert_eq_m512i(r, e);
18818    }
18819
18820    #[simd_test(enable = "avx512bw,avx512vl")]
18821    const fn test_mm256_mask_srai_epi16() {
18822        let a = _mm256_set1_epi16(8);
18823        let r = _mm256_mask_srai_epi16::<2>(a, 0, a);
18824        assert_eq_m256i(r, a);
18825        let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a);
18826        let e = _mm256_set1_epi16(2);
18827        assert_eq_m256i(r, e);
18828    }
18829
18830    #[simd_test(enable = "avx512bw,avx512vl")]
18831    const fn test_mm256_maskz_srai_epi16() {
18832        let a = _mm256_set1_epi16(8);
18833        let r = _mm256_maskz_srai_epi16::<2>(0, a);
18834        assert_eq_m256i(r, _mm256_setzero_si256());
18835        let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a);
18836        let e = _mm256_set1_epi16(2);
18837        assert_eq_m256i(r, e);
18838    }
18839
18840    #[simd_test(enable = "avx512bw,avx512vl")]
18841    const fn test_mm_mask_srai_epi16() {
18842        let a = _mm_set1_epi16(8);
18843        let r = _mm_mask_srai_epi16::<2>(a, 0, a);
18844        assert_eq_m128i(r, a);
18845        let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a);
18846        let e = _mm_set1_epi16(2);
18847        assert_eq_m128i(r, e);
18848    }
18849
18850    #[simd_test(enable = "avx512bw,avx512vl")]
18851    const fn test_mm_maskz_srai_epi16() {
18852        let a = _mm_set1_epi16(8);
18853        let r = _mm_maskz_srai_epi16::<2>(0, a);
18854        assert_eq_m128i(r, _mm_setzero_si128());
18855        let r = _mm_maskz_srai_epi16::<2>(0b11111111, a);
18856        let e = _mm_set1_epi16(2);
18857        assert_eq_m128i(r, e);
18858    }
18859
18860    #[simd_test(enable = "avx512bw")]
18861    const fn test_mm512_srav_epi16() {
18862        let a = _mm512_set1_epi16(8);
18863        let count = _mm512_set1_epi16(2);
18864        let r = _mm512_srav_epi16(a, count);
18865        let e = _mm512_set1_epi16(2);
18866        assert_eq_m512i(r, e);
18867    }
18868
18869    #[simd_test(enable = "avx512bw")]
18870    const fn test_mm512_mask_srav_epi16() {
18871        let a = _mm512_set1_epi16(8);
18872        let count = _mm512_set1_epi16(2);
18873        let r = _mm512_mask_srav_epi16(a, 0, a, count);
18874        assert_eq_m512i(r, a);
18875        let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18876        let e = _mm512_set1_epi16(2);
18877        assert_eq_m512i(r, e);
18878    }
18879
18880    #[simd_test(enable = "avx512bw")]
18881    const fn test_mm512_maskz_srav_epi16() {
18882        let a = _mm512_set1_epi16(8);
18883        let count = _mm512_set1_epi16(2);
18884        let r = _mm512_maskz_srav_epi16(0, a, count);
18885        assert_eq_m512i(r, _mm512_setzero_si512());
18886        let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count);
18887        let e = _mm512_set1_epi16(2);
18888        assert_eq_m512i(r, e);
18889    }
18890
18891    #[simd_test(enable = "avx512bw,avx512vl")]
18892    const fn test_mm256_srav_epi16() {
18893        let a = _mm256_set1_epi16(8);
18894        let count = _mm256_set1_epi16(2);
18895        let r = _mm256_srav_epi16(a, count);
18896        let e = _mm256_set1_epi16(2);
18897        assert_eq_m256i(r, e);
18898    }
18899
18900    #[simd_test(enable = "avx512bw,avx512vl")]
18901    const fn test_mm256_mask_srav_epi16() {
18902        let a = _mm256_set1_epi16(8);
18903        let count = _mm256_set1_epi16(2);
18904        let r = _mm256_mask_srav_epi16(a, 0, a, count);
18905        assert_eq_m256i(r, a);
18906        let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count);
18907        let e = _mm256_set1_epi16(2);
18908        assert_eq_m256i(r, e);
18909    }
18910
18911    #[simd_test(enable = "avx512bw,avx512vl")]
18912    const fn test_mm256_maskz_srav_epi16() {
18913        let a = _mm256_set1_epi16(8);
18914        let count = _mm256_set1_epi16(2);
18915        let r = _mm256_maskz_srav_epi16(0, a, count);
18916        assert_eq_m256i(r, _mm256_setzero_si256());
18917        let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count);
18918        let e = _mm256_set1_epi16(2);
18919        assert_eq_m256i(r, e);
18920    }
18921
18922    #[simd_test(enable = "avx512bw,avx512vl")]
18923    const fn test_mm_srav_epi16() {
18924        let a = _mm_set1_epi16(8);
18925        let count = _mm_set1_epi16(2);
18926        let r = _mm_srav_epi16(a, count);
18927        let e = _mm_set1_epi16(2);
18928        assert_eq_m128i(r, e);
18929    }
18930
18931    #[simd_test(enable = "avx512bw,avx512vl")]
18932    const fn test_mm_mask_srav_epi16() {
18933        let a = _mm_set1_epi16(8);
18934        let count = _mm_set1_epi16(2);
18935        let r = _mm_mask_srav_epi16(a, 0, a, count);
18936        assert_eq_m128i(r, a);
18937        let r = _mm_mask_srav_epi16(a, 0b11111111, a, count);
18938        let e = _mm_set1_epi16(2);
18939        assert_eq_m128i(r, e);
18940    }
18941
18942    #[simd_test(enable = "avx512bw,avx512vl")]
18943    const fn test_mm_maskz_srav_epi16() {
18944        let a = _mm_set1_epi16(8);
18945        let count = _mm_set1_epi16(2);
18946        let r = _mm_maskz_srav_epi16(0, a, count);
18947        assert_eq_m128i(r, _mm_setzero_si128());
18948        let r = _mm_maskz_srav_epi16(0b11111111, a, count);
18949        let e = _mm_set1_epi16(2);
18950        assert_eq_m128i(r, e);
18951    }
18952
18953    #[simd_test(enable = "avx512bw")]
18954    fn test_mm512_permutex2var_epi16() {
18955        #[rustfmt::skip]
18956        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18957                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18958        #[rustfmt::skip]
18959        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18960                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18961        let b = _mm512_set1_epi16(100);
18962        let r = _mm512_permutex2var_epi16(a, idx, b);
18963        #[rustfmt::skip]
18964        let e = _mm512_set_epi16(
18965            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18966            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18967        );
18968        assert_eq_m512i(r, e);
18969    }
18970
18971    #[simd_test(enable = "avx512bw")]
18972    fn test_mm512_mask_permutex2var_epi16() {
18973        #[rustfmt::skip]
18974        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18975                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18976        #[rustfmt::skip]
18977        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18978                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18979        let b = _mm512_set1_epi16(100);
18980        let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b);
18981        assert_eq_m512i(r, a);
18982        let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b);
18983        #[rustfmt::skip]
18984        let e = _mm512_set_epi16(
18985            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18986            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18987        );
18988        assert_eq_m512i(r, e);
18989    }
18990
18991    #[simd_test(enable = "avx512bw")]
18992    fn test_mm512_maskz_permutex2var_epi16() {
18993        #[rustfmt::skip]
18994        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18995                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18996        #[rustfmt::skip]
18997        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18998                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18999        let b = _mm512_set1_epi16(100);
19000        let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b);
19001        assert_eq_m512i(r, _mm512_setzero_si512());
19002        let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b);
19003        #[rustfmt::skip]
19004        let e = _mm512_set_epi16(
19005            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
19006            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
19007        );
19008        assert_eq_m512i(r, e);
19009    }
19010
19011    #[simd_test(enable = "avx512bw")]
19012    fn test_mm512_mask2_permutex2var_epi16() {
19013        #[rustfmt::skip]
19014        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19015                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19016        #[rustfmt::skip]
19017        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
19018                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
19019        let b = _mm512_set1_epi16(100);
19020        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b);
19021        assert_eq_m512i(r, idx);
19022        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b);
19023        #[rustfmt::skip]
19024        let e = _mm512_set_epi16(
19025            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
19026            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
19027        );
19028        assert_eq_m512i(r, e);
19029    }
19030
19031    #[simd_test(enable = "avx512bw,avx512vl")]
19032    fn test_mm256_permutex2var_epi16() {
19033        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19034        #[rustfmt::skip]
19035        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19036        let b = _mm256_set1_epi16(100);
19037        let r = _mm256_permutex2var_epi16(a, idx, b);
19038        let e = _mm256_set_epi16(
19039            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19040        );
19041        assert_eq_m256i(r, e);
19042    }
19043
19044    #[simd_test(enable = "avx512bw,avx512vl")]
19045    fn test_mm256_mask_permutex2var_epi16() {
19046        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19047        #[rustfmt::skip]
19048        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19049        let b = _mm256_set1_epi16(100);
19050        let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b);
19051        assert_eq_m256i(r, a);
19052        let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b);
19053        let e = _mm256_set_epi16(
19054            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19055        );
19056        assert_eq_m256i(r, e);
19057    }
19058
19059    #[simd_test(enable = "avx512bw,avx512vl")]
19060    fn test_mm256_maskz_permutex2var_epi16() {
19061        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19062        #[rustfmt::skip]
19063        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19064        let b = _mm256_set1_epi16(100);
19065        let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b);
19066        assert_eq_m256i(r, _mm256_setzero_si256());
19067        let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b);
19068        let e = _mm256_set_epi16(
19069            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19070        );
19071        assert_eq_m256i(r, e);
19072    }
19073
19074    #[simd_test(enable = "avx512bw,avx512vl")]
19075    fn test_mm256_mask2_permutex2var_epi16() {
19076        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19077        #[rustfmt::skip]
19078        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19079        let b = _mm256_set1_epi16(100);
19080        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b);
19081        assert_eq_m256i(r, idx);
19082        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b);
19083        #[rustfmt::skip]
19084        let e = _mm256_set_epi16(
19085            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19086        );
19087        assert_eq_m256i(r, e);
19088    }
19089
19090    #[simd_test(enable = "avx512bw,avx512vl")]
19091    fn test_mm_permutex2var_epi16() {
19092        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19093        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19094        let b = _mm_set1_epi16(100);
19095        let r = _mm_permutex2var_epi16(a, idx, b);
19096        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19097        assert_eq_m128i(r, e);
19098    }
19099
19100    #[simd_test(enable = "avx512bw,avx512vl")]
19101    fn test_mm_mask_permutex2var_epi16() {
19102        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19103        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19104        let b = _mm_set1_epi16(100);
19105        let r = _mm_mask_permutex2var_epi16(a, 0, idx, b);
19106        assert_eq_m128i(r, a);
19107        let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b);
19108        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19109        assert_eq_m128i(r, e);
19110    }
19111
19112    #[simd_test(enable = "avx512bw,avx512vl")]
19113    fn test_mm_maskz_permutex2var_epi16() {
19114        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19115        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19116        let b = _mm_set1_epi16(100);
19117        let r = _mm_maskz_permutex2var_epi16(0, a, idx, b);
19118        assert_eq_m128i(r, _mm_setzero_si128());
19119        let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b);
19120        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19121        assert_eq_m128i(r, e);
19122    }
19123
19124    #[simd_test(enable = "avx512bw,avx512vl")]
19125    fn test_mm_mask2_permutex2var_epi16() {
19126        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19127        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19128        let b = _mm_set1_epi16(100);
19129        let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b);
19130        assert_eq_m128i(r, idx);
19131        let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b);
19132        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19133        assert_eq_m128i(r, e);
19134    }
19135
19136    #[simd_test(enable = "avx512bw")]
19137    fn test_mm512_permutexvar_epi16() {
19138        let idx = _mm512_set1_epi16(1);
19139        #[rustfmt::skip]
19140        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19141                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19142        let r = _mm512_permutexvar_epi16(idx, a);
19143        let e = _mm512_set1_epi16(30);
19144        assert_eq_m512i(r, e);
19145    }
19146
19147    #[simd_test(enable = "avx512bw")]
19148    fn test_mm512_mask_permutexvar_epi16() {
19149        let idx = _mm512_set1_epi16(1);
19150        #[rustfmt::skip]
19151        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19152                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19153        let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a);
19154        assert_eq_m512i(r, a);
19155        let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a);
19156        let e = _mm512_set1_epi16(30);
19157        assert_eq_m512i(r, e);
19158    }
19159
19160    #[simd_test(enable = "avx512bw")]
19161    fn test_mm512_maskz_permutexvar_epi16() {
19162        let idx = _mm512_set1_epi16(1);
19163        #[rustfmt::skip]
19164        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19165                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19166        let r = _mm512_maskz_permutexvar_epi16(0, idx, a);
19167        assert_eq_m512i(r, _mm512_setzero_si512());
19168        let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a);
19169        let e = _mm512_set1_epi16(30);
19170        assert_eq_m512i(r, e);
19171    }
19172
19173    #[simd_test(enable = "avx512bw,avx512vl")]
19174    fn test_mm256_permutexvar_epi16() {
19175        let idx = _mm256_set1_epi16(1);
19176        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19177        let r = _mm256_permutexvar_epi16(idx, a);
19178        let e = _mm256_set1_epi16(14);
19179        assert_eq_m256i(r, e);
19180    }
19181
19182    #[simd_test(enable = "avx512bw,avx512vl")]
19183    fn test_mm256_mask_permutexvar_epi16() {
19184        let idx = _mm256_set1_epi16(1);
19185        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19186        let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a);
19187        assert_eq_m256i(r, a);
19188        let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a);
19189        let e = _mm256_set1_epi16(14);
19190        assert_eq_m256i(r, e);
19191    }
19192
19193    #[simd_test(enable = "avx512bw,avx512vl")]
19194    fn test_mm256_maskz_permutexvar_epi16() {
19195        let idx = _mm256_set1_epi16(1);
19196        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19197        let r = _mm256_maskz_permutexvar_epi16(0, idx, a);
19198        assert_eq_m256i(r, _mm256_setzero_si256());
19199        let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a);
19200        let e = _mm256_set1_epi16(14);
19201        assert_eq_m256i(r, e);
19202    }
19203
19204    #[simd_test(enable = "avx512bw,avx512vl")]
19205    fn test_mm_permutexvar_epi16() {
19206        let idx = _mm_set1_epi16(1);
19207        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19208        let r = _mm_permutexvar_epi16(idx, a);
19209        let e = _mm_set1_epi16(6);
19210        assert_eq_m128i(r, e);
19211    }
19212
19213    #[simd_test(enable = "avx512bw,avx512vl")]
19214    fn test_mm_mask_permutexvar_epi16() {
19215        let idx = _mm_set1_epi16(1);
19216        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19217        let r = _mm_mask_permutexvar_epi16(a, 0, idx, a);
19218        assert_eq_m128i(r, a);
19219        let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a);
19220        let e = _mm_set1_epi16(6);
19221        assert_eq_m128i(r, e);
19222    }
19223
19224    #[simd_test(enable = "avx512bw,avx512vl")]
19225    fn test_mm_maskz_permutexvar_epi16() {
19226        let idx = _mm_set1_epi16(1);
19227        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19228        let r = _mm_maskz_permutexvar_epi16(0, idx, a);
19229        assert_eq_m128i(r, _mm_setzero_si128());
19230        let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a);
19231        let e = _mm_set1_epi16(6);
19232        assert_eq_m128i(r, e);
19233    }
19234
19235    #[simd_test(enable = "avx512bw")]
19236    const fn test_mm512_mask_blend_epi16() {
19237        let a = _mm512_set1_epi16(1);
19238        let b = _mm512_set1_epi16(2);
19239        let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b);
19240        #[rustfmt::skip]
19241        let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19242                                 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19243        assert_eq_m512i(r, e);
19244    }
19245
19246    #[simd_test(enable = "avx512bw,avx512vl")]
19247    const fn test_mm256_mask_blend_epi16() {
19248        let a = _mm256_set1_epi16(1);
19249        let b = _mm256_set1_epi16(2);
19250        let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b);
19251        let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19252        assert_eq_m256i(r, e);
19253    }
19254
19255    #[simd_test(enable = "avx512bw,avx512vl")]
19256    const fn test_mm_mask_blend_epi16() {
19257        let a = _mm_set1_epi16(1);
19258        let b = _mm_set1_epi16(2);
19259        let r = _mm_mask_blend_epi16(0b11110000, a, b);
19260        let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
19261        assert_eq_m128i(r, e);
19262    }
19263
19264    #[simd_test(enable = "avx512bw")]
19265    const fn test_mm512_mask_blend_epi8() {
19266        let a = _mm512_set1_epi8(1);
19267        let b = _mm512_set1_epi8(2);
19268        let r = _mm512_mask_blend_epi8(
19269            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000,
19270            a,
19271            b,
19272        );
19273        #[rustfmt::skip]
19274        let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19275                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19276                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19277                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19278        assert_eq_m512i(r, e);
19279    }
19280
19281    #[simd_test(enable = "avx512bw,avx512vl")]
19282    const fn test_mm256_mask_blend_epi8() {
19283        let a = _mm256_set1_epi8(1);
19284        let b = _mm256_set1_epi8(2);
19285        let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b);
19286        #[rustfmt::skip]
19287        let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19288                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19289        assert_eq_m256i(r, e);
19290    }
19291
19292    #[simd_test(enable = "avx512bw,avx512vl")]
19293    const fn test_mm_mask_blend_epi8() {
19294        let a = _mm_set1_epi8(1);
19295        let b = _mm_set1_epi8(2);
19296        let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b);
19297        let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19298        assert_eq_m128i(r, e);
19299    }
19300
19301    #[simd_test(enable = "avx512bw")]
19302    const fn test_mm512_broadcastw_epi16() {
19303        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19304        let r = _mm512_broadcastw_epi16(a);
19305        let e = _mm512_set1_epi16(24);
19306        assert_eq_m512i(r, e);
19307    }
19308
19309    #[simd_test(enable = "avx512bw")]
19310    const fn test_mm512_mask_broadcastw_epi16() {
19311        let src = _mm512_set1_epi16(1);
19312        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19313        let r = _mm512_mask_broadcastw_epi16(src, 0, a);
19314        assert_eq_m512i(r, src);
19315        let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19316        let e = _mm512_set1_epi16(24);
19317        assert_eq_m512i(r, e);
19318    }
19319
19320    #[simd_test(enable = "avx512bw")]
19321    const fn test_mm512_maskz_broadcastw_epi16() {
19322        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19323        let r = _mm512_maskz_broadcastw_epi16(0, a);
19324        assert_eq_m512i(r, _mm512_setzero_si512());
19325        let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a);
19326        let e = _mm512_set1_epi16(24);
19327        assert_eq_m512i(r, e);
19328    }
19329
19330    #[simd_test(enable = "avx512bw,avx512vl")]
19331    const fn test_mm256_mask_broadcastw_epi16() {
19332        let src = _mm256_set1_epi16(1);
19333        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19334        let r = _mm256_mask_broadcastw_epi16(src, 0, a);
19335        assert_eq_m256i(r, src);
19336        let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a);
19337        let e = _mm256_set1_epi16(24);
19338        assert_eq_m256i(r, e);
19339    }
19340
19341    #[simd_test(enable = "avx512bw,avx512vl")]
19342    const fn test_mm256_maskz_broadcastw_epi16() {
19343        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19344        let r = _mm256_maskz_broadcastw_epi16(0, a);
19345        assert_eq_m256i(r, _mm256_setzero_si256());
19346        let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a);
19347        let e = _mm256_set1_epi16(24);
19348        assert_eq_m256i(r, e);
19349    }
19350
19351    #[simd_test(enable = "avx512bw,avx512vl")]
19352    const fn test_mm_mask_broadcastw_epi16() {
19353        let src = _mm_set1_epi16(1);
19354        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19355        let r = _mm_mask_broadcastw_epi16(src, 0, a);
19356        assert_eq_m128i(r, src);
19357        let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a);
19358        let e = _mm_set1_epi16(24);
19359        assert_eq_m128i(r, e);
19360    }
19361
19362    #[simd_test(enable = "avx512bw,avx512vl")]
19363    const fn test_mm_maskz_broadcastw_epi16() {
19364        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19365        let r = _mm_maskz_broadcastw_epi16(0, a);
19366        assert_eq_m128i(r, _mm_setzero_si128());
19367        let r = _mm_maskz_broadcastw_epi16(0b11111111, a);
19368        let e = _mm_set1_epi16(24);
19369        assert_eq_m128i(r, e);
19370    }
19371
19372    #[simd_test(enable = "avx512bw")]
19373    const fn test_mm512_broadcastb_epi8() {
19374        let a = _mm_set_epi8(
19375            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19376        );
19377        let r = _mm512_broadcastb_epi8(a);
19378        let e = _mm512_set1_epi8(32);
19379        assert_eq_m512i(r, e);
19380    }
19381
19382    #[simd_test(enable = "avx512bw")]
19383    const fn test_mm512_mask_broadcastb_epi8() {
19384        let src = _mm512_set1_epi8(1);
19385        let a = _mm_set_epi8(
19386            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19387        );
19388        let r = _mm512_mask_broadcastb_epi8(src, 0, a);
19389        assert_eq_m512i(r, src);
19390        let r = _mm512_mask_broadcastb_epi8(
19391            src,
19392            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19393            a,
19394        );
19395        let e = _mm512_set1_epi8(32);
19396        assert_eq_m512i(r, e);
19397    }
19398
19399    #[simd_test(enable = "avx512bw")]
19400    const fn test_mm512_maskz_broadcastb_epi8() {
19401        let a = _mm_set_epi8(
19402            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19403        );
19404        let r = _mm512_maskz_broadcastb_epi8(0, a);
19405        assert_eq_m512i(r, _mm512_setzero_si512());
19406        let r = _mm512_maskz_broadcastb_epi8(
19407            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19408            a,
19409        );
19410        let e = _mm512_set1_epi8(32);
19411        assert_eq_m512i(r, e);
19412    }
19413
19414    #[simd_test(enable = "avx512bw,avx512vl")]
19415    const fn test_mm256_mask_broadcastb_epi8() {
19416        let src = _mm256_set1_epi8(1);
19417        let a = _mm_set_epi8(
19418            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19419        );
19420        let r = _mm256_mask_broadcastb_epi8(src, 0, a);
19421        assert_eq_m256i(r, src);
19422        let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19423        let e = _mm256_set1_epi8(32);
19424        assert_eq_m256i(r, e);
19425    }
19426
19427    #[simd_test(enable = "avx512bw,avx512vl")]
19428    const fn test_mm256_maskz_broadcastb_epi8() {
19429        let a = _mm_set_epi8(
19430            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19431        );
19432        let r = _mm256_maskz_broadcastb_epi8(0, a);
19433        assert_eq_m256i(r, _mm256_setzero_si256());
19434        let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a);
19435        let e = _mm256_set1_epi8(32);
19436        assert_eq_m256i(r, e);
19437    }
19438
19439    #[simd_test(enable = "avx512bw,avx512vl")]
19440    const fn test_mm_mask_broadcastb_epi8() {
19441        let src = _mm_set1_epi8(1);
19442        let a = _mm_set_epi8(
19443            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19444        );
19445        let r = _mm_mask_broadcastb_epi8(src, 0, a);
19446        assert_eq_m128i(r, src);
19447        let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a);
19448        let e = _mm_set1_epi8(32);
19449        assert_eq_m128i(r, e);
19450    }
19451
19452    #[simd_test(enable = "avx512bw,avx512vl")]
19453    const fn test_mm_maskz_broadcastb_epi8() {
19454        let a = _mm_set_epi8(
19455            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19456        );
19457        let r = _mm_maskz_broadcastb_epi8(0, a);
19458        assert_eq_m128i(r, _mm_setzero_si128());
19459        let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a);
19460        let e = _mm_set1_epi8(32);
19461        assert_eq_m128i(r, e);
19462    }
19463
19464    #[simd_test(enable = "avx512bw")]
19465    const fn test_mm512_unpackhi_epi16() {
19466        #[rustfmt::skip]
19467        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19468                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19469        #[rustfmt::skip]
19470        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19471                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19472        let r = _mm512_unpackhi_epi16(a, b);
19473        #[rustfmt::skip]
19474        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
19475                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
19476        assert_eq_m512i(r, e);
19477    }
19478
19479    #[simd_test(enable = "avx512bw")]
19480    const fn test_mm512_mask_unpackhi_epi16() {
19481        #[rustfmt::skip]
19482        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19483                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19484        #[rustfmt::skip]
19485        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19486                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19487        let r = _mm512_mask_unpackhi_epi16(a, 0, a, b);
19488        assert_eq_m512i(r, a);
19489        let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
19490        #[rustfmt::skip]
19491        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
19492                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
19493        assert_eq_m512i(r, e);
19494    }
19495
19496    #[simd_test(enable = "avx512bw")]
19497    const fn test_mm512_maskz_unpackhi_epi16() {
19498        #[rustfmt::skip]
19499        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19500                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19501        #[rustfmt::skip]
19502        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19503                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19504        let r = _mm512_maskz_unpackhi_epi16(0, a, b);
19505        assert_eq_m512i(r, _mm512_setzero_si512());
19506        let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b);
19507        #[rustfmt::skip]
19508        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
19509                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
19510        assert_eq_m512i(r, e);
19511    }
19512
19513    #[simd_test(enable = "avx512bw,avx512vl")]
19514    const fn test_mm256_mask_unpackhi_epi16() {
19515        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19516        let b = _mm256_set_epi16(
19517            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19518        );
19519        let r = _mm256_mask_unpackhi_epi16(a, 0, a, b);
19520        assert_eq_m256i(r, a);
19521        let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b);
19522        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
19523        assert_eq_m256i(r, e);
19524    }
19525
19526    #[simd_test(enable = "avx512bw,avx512vl")]
19527    const fn test_mm256_maskz_unpackhi_epi16() {
19528        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19529        let b = _mm256_set_epi16(
19530            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19531        );
19532        let r = _mm256_maskz_unpackhi_epi16(0, a, b);
19533        assert_eq_m256i(r, _mm256_setzero_si256());
19534        let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b);
19535        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
19536        assert_eq_m256i(r, e);
19537    }
19538
19539    #[simd_test(enable = "avx512bw,avx512vl")]
19540    const fn test_mm_mask_unpackhi_epi16() {
19541        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19542        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19543        let r = _mm_mask_unpackhi_epi16(a, 0, a, b);
19544        assert_eq_m128i(r, a);
19545        let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b);
19546        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
19547        assert_eq_m128i(r, e);
19548    }
19549
19550    #[simd_test(enable = "avx512bw,avx512vl")]
19551    const fn test_mm_maskz_unpackhi_epi16() {
19552        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19553        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19554        let r = _mm_maskz_unpackhi_epi16(0, a, b);
19555        assert_eq_m128i(r, _mm_setzero_si128());
19556        let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b);
19557        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
19558        assert_eq_m128i(r, e);
19559    }
19560
19561    #[simd_test(enable = "avx512bw")]
19562    const fn test_mm512_unpackhi_epi8() {
19563        #[rustfmt::skip]
19564        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19565                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19566                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19567                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19568        #[rustfmt::skip]
19569        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19570                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19571                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19572                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19573        let r = _mm512_unpackhi_epi8(a, b);
19574        #[rustfmt::skip]
19575        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19576                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
19577                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
19578                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
19579        assert_eq_m512i(r, e);
19580    }
19581
19582    #[simd_test(enable = "avx512bw")]
19583    const fn test_mm512_mask_unpackhi_epi8() {
19584        #[rustfmt::skip]
19585        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19586                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19587                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19588                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19589        #[rustfmt::skip]
19590        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19591                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19592                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19593                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19594        let r = _mm512_mask_unpackhi_epi8(a, 0, a, b);
19595        assert_eq_m512i(r, a);
19596        let r = _mm512_mask_unpackhi_epi8(
19597            a,
19598            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19599            a,
19600            b,
19601        );
19602        #[rustfmt::skip]
19603        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19604                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
19605                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
19606                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
19607        assert_eq_m512i(r, e);
19608    }
19609
19610    #[simd_test(enable = "avx512bw")]
19611    const fn test_mm512_maskz_unpackhi_epi8() {
19612        #[rustfmt::skip]
19613        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19614                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19615                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19616                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19617        #[rustfmt::skip]
19618        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19619                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19620                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19621                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19622        let r = _mm512_maskz_unpackhi_epi8(0, a, b);
19623        assert_eq_m512i(r, _mm512_setzero_si512());
19624        let r = _mm512_maskz_unpackhi_epi8(
19625            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19626            a,
19627            b,
19628        );
19629        #[rustfmt::skip]
19630        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19631                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
19632                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
19633                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
19634        assert_eq_m512i(r, e);
19635    }
19636
19637    #[simd_test(enable = "avx512bw,avx512vl")]
19638    const fn test_mm256_mask_unpackhi_epi8() {
19639        #[rustfmt::skip]
19640        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19641                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19642        #[rustfmt::skip]
19643        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19644                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19645        let r = _mm256_mask_unpackhi_epi8(a, 0, a, b);
19646        assert_eq_m256i(r, a);
19647        let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19648        #[rustfmt::skip]
19649        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19650                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
19651        assert_eq_m256i(r, e);
19652    }
19653
19654    #[simd_test(enable = "avx512bw,avx512vl")]
19655    const fn test_mm256_maskz_unpackhi_epi8() {
19656        #[rustfmt::skip]
19657        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19658                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19659        #[rustfmt::skip]
19660        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19661                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19662        let r = _mm256_maskz_unpackhi_epi8(0, a, b);
19663        assert_eq_m256i(r, _mm256_setzero_si256());
19664        let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b);
19665        #[rustfmt::skip]
19666        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19667                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
19668        assert_eq_m256i(r, e);
19669    }
19670
19671    #[simd_test(enable = "avx512bw,avx512vl")]
19672    const fn test_mm_mask_unpackhi_epi8() {
19673        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19674        let b = _mm_set_epi8(
19675            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19676        );
19677        let r = _mm_mask_unpackhi_epi8(a, 0, a, b);
19678        assert_eq_m128i(r, a);
19679        let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b);
19680        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
19681        assert_eq_m128i(r, e);
19682    }
19683
19684    #[simd_test(enable = "avx512bw,avx512vl")]
19685    const fn test_mm_maskz_unpackhi_epi8() {
19686        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19687        let b = _mm_set_epi8(
19688            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19689        );
19690        let r = _mm_maskz_unpackhi_epi8(0, a, b);
19691        assert_eq_m128i(r, _mm_setzero_si128());
19692        let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b);
19693        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
19694        assert_eq_m128i(r, e);
19695    }
19696
19697    #[simd_test(enable = "avx512bw")]
19698    const fn test_mm512_unpacklo_epi16() {
19699        #[rustfmt::skip]
19700        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19701                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19702        #[rustfmt::skip]
19703        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19704                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19705        let r = _mm512_unpacklo_epi16(a, b);
19706        #[rustfmt::skip]
19707        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19708                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19709        assert_eq_m512i(r, e);
19710    }
19711
19712    #[simd_test(enable = "avx512bw")]
19713    const fn test_mm512_mask_unpacklo_epi16() {
19714        #[rustfmt::skip]
19715        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19716                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19717        #[rustfmt::skip]
19718        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19719                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19720        let r = _mm512_mask_unpacklo_epi16(a, 0, a, b);
19721        assert_eq_m512i(r, a);
19722        let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
19723        #[rustfmt::skip]
19724        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19725                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19726        assert_eq_m512i(r, e);
19727    }
19728
19729    #[simd_test(enable = "avx512bw")]
19730    const fn test_mm512_maskz_unpacklo_epi16() {
19731        #[rustfmt::skip]
19732        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19733                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19734        #[rustfmt::skip]
19735        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19736                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19737        let r = _mm512_maskz_unpacklo_epi16(0, a, b);
19738        assert_eq_m512i(r, _mm512_setzero_si512());
19739        let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b);
19740        #[rustfmt::skip]
19741        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19742                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19743        assert_eq_m512i(r, e);
19744    }
19745
19746    #[simd_test(enable = "avx512bw,avx512vl")]
19747    const fn test_mm256_mask_unpacklo_epi16() {
19748        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19749        let b = _mm256_set_epi16(
19750            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19751        );
19752        let r = _mm256_mask_unpacklo_epi16(a, 0, a, b);
19753        assert_eq_m256i(r, a);
19754        let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b);
19755        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
19756        assert_eq_m256i(r, e);
19757    }
19758
19759    #[simd_test(enable = "avx512bw,avx512vl")]
19760    const fn test_mm256_maskz_unpacklo_epi16() {
19761        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19762        let b = _mm256_set_epi16(
19763            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19764        );
19765        let r = _mm256_maskz_unpacklo_epi16(0, a, b);
19766        assert_eq_m256i(r, _mm256_setzero_si256());
19767        let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b);
19768        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
19769        assert_eq_m256i(r, e);
19770    }
19771
19772    #[simd_test(enable = "avx512bw,avx512vl")]
19773    const fn test_mm_mask_unpacklo_epi16() {
19774        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19775        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19776        let r = _mm_mask_unpacklo_epi16(a, 0, a, b);
19777        assert_eq_m128i(r, a);
19778        let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b);
19779        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
19780        assert_eq_m128i(r, e);
19781    }
19782
19783    #[simd_test(enable = "avx512bw,avx512vl")]
19784    const fn test_mm_maskz_unpacklo_epi16() {
19785        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19786        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19787        let r = _mm_maskz_unpacklo_epi16(0, a, b);
19788        assert_eq_m128i(r, _mm_setzero_si128());
19789        let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b);
19790        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
19791        assert_eq_m128i(r, e);
19792    }
19793
19794    #[simd_test(enable = "avx512bw")]
19795    const fn test_mm512_unpacklo_epi8() {
19796        #[rustfmt::skip]
19797        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19798                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19799                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19800                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19801        #[rustfmt::skip]
19802        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19803                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19804                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19805                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19806        let r = _mm512_unpacklo_epi8(a, b);
19807        #[rustfmt::skip]
19808        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19809                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19810                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19811                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19812        assert_eq_m512i(r, e);
19813    }
19814
19815    #[simd_test(enable = "avx512bw")]
19816    const fn test_mm512_mask_unpacklo_epi8() {
19817        #[rustfmt::skip]
19818        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19819                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19820                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19821                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19822        #[rustfmt::skip]
19823        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19824                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19825                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19826                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19827        let r = _mm512_mask_unpacklo_epi8(a, 0, a, b);
19828        assert_eq_m512i(r, a);
19829        let r = _mm512_mask_unpacklo_epi8(
19830            a,
19831            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19832            a,
19833            b,
19834        );
19835        #[rustfmt::skip]
19836        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19837                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19838                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19839                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19840        assert_eq_m512i(r, e);
19841    }
19842
19843    #[simd_test(enable = "avx512bw")]
19844    const fn test_mm512_maskz_unpacklo_epi8() {
19845        #[rustfmt::skip]
19846        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19847                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19848                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19849                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19850        #[rustfmt::skip]
19851        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19852                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19853                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19854                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19855        let r = _mm512_maskz_unpacklo_epi8(0, a, b);
19856        assert_eq_m512i(r, _mm512_setzero_si512());
19857        let r = _mm512_maskz_unpacklo_epi8(
19858            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19859            a,
19860            b,
19861        );
19862        #[rustfmt::skip]
19863        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19864                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19865                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19866                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19867        assert_eq_m512i(r, e);
19868    }
19869
19870    #[simd_test(enable = "avx512bw,avx512vl")]
19871    const fn test_mm256_mask_unpacklo_epi8() {
19872        #[rustfmt::skip]
19873        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19874                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19875        #[rustfmt::skip]
19876        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19877                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19878        let r = _mm256_mask_unpacklo_epi8(a, 0, a, b);
19879        assert_eq_m256i(r, a);
19880        let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19881        #[rustfmt::skip]
19882        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19883                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
19884        assert_eq_m256i(r, e);
19885    }
19886
19887    #[simd_test(enable = "avx512bw,avx512vl")]
19888    const fn test_mm256_maskz_unpacklo_epi8() {
19889        #[rustfmt::skip]
19890        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19891                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19892        #[rustfmt::skip]
19893        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19894                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19895        let r = _mm256_maskz_unpacklo_epi8(0, a, b);
19896        assert_eq_m256i(r, _mm256_setzero_si256());
19897        let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b);
19898        #[rustfmt::skip]
19899        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19900                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
19901        assert_eq_m256i(r, e);
19902    }
19903
19904    #[simd_test(enable = "avx512bw,avx512vl")]
19905    const fn test_mm_mask_unpacklo_epi8() {
19906        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19907        let b = _mm_set_epi8(
19908            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19909        );
19910        let r = _mm_mask_unpacklo_epi8(a, 0, a, b);
19911        assert_eq_m128i(r, a);
19912        let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b);
19913        let e = _mm_set_epi8(
19914            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
19915        );
19916        assert_eq_m128i(r, e);
19917    }
19918
19919    #[simd_test(enable = "avx512bw,avx512vl")]
19920    const fn test_mm_maskz_unpacklo_epi8() {
19921        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19922        let b = _mm_set_epi8(
19923            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19924        );
19925        let r = _mm_maskz_unpacklo_epi8(0, a, b);
19926        assert_eq_m128i(r, _mm_setzero_si128());
19927        let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b);
19928        let e = _mm_set_epi8(
19929            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
19930        );
19931        assert_eq_m128i(r, e);
19932    }
19933
19934    #[simd_test(enable = "avx512bw")]
19935    const fn test_mm512_mask_mov_epi16() {
19936        let src = _mm512_set1_epi16(1);
19937        let a = _mm512_set1_epi16(2);
19938        let r = _mm512_mask_mov_epi16(src, 0, a);
19939        assert_eq_m512i(r, src);
19940        let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19941        assert_eq_m512i(r, a);
19942    }
19943
19944    #[simd_test(enable = "avx512bw")]
19945    const fn test_mm512_maskz_mov_epi16() {
19946        let a = _mm512_set1_epi16(2);
19947        let r = _mm512_maskz_mov_epi16(0, a);
19948        assert_eq_m512i(r, _mm512_setzero_si512());
19949        let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a);
19950        assert_eq_m512i(r, a);
19951    }
19952
19953    #[simd_test(enable = "avx512bw,avx512vl")]
19954    const fn test_mm256_mask_mov_epi16() {
19955        let src = _mm256_set1_epi16(1);
19956        let a = _mm256_set1_epi16(2);
19957        let r = _mm256_mask_mov_epi16(src, 0, a);
19958        assert_eq_m256i(r, src);
19959        let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a);
19960        assert_eq_m256i(r, a);
19961    }
19962
19963    #[simd_test(enable = "avx512bw,avx512vl")]
19964    const fn test_mm256_maskz_mov_epi16() {
19965        let a = _mm256_set1_epi16(2);
19966        let r = _mm256_maskz_mov_epi16(0, a);
19967        assert_eq_m256i(r, _mm256_setzero_si256());
19968        let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a);
19969        assert_eq_m256i(r, a);
19970    }
19971
19972    #[simd_test(enable = "avx512bw,avx512vl")]
19973    const fn test_mm_mask_mov_epi16() {
19974        let src = _mm_set1_epi16(1);
19975        let a = _mm_set1_epi16(2);
19976        let r = _mm_mask_mov_epi16(src, 0, a);
19977        assert_eq_m128i(r, src);
19978        let r = _mm_mask_mov_epi16(src, 0b11111111, a);
19979        assert_eq_m128i(r, a);
19980    }
19981
19982    #[simd_test(enable = "avx512bw,avx512vl")]
19983    const fn test_mm_maskz_mov_epi16() {
19984        let a = _mm_set1_epi16(2);
19985        let r = _mm_maskz_mov_epi16(0, a);
19986        assert_eq_m128i(r, _mm_setzero_si128());
19987        let r = _mm_maskz_mov_epi16(0b11111111, a);
19988        assert_eq_m128i(r, a);
19989    }
19990
19991    #[simd_test(enable = "avx512bw")]
19992    const fn test_mm512_mask_mov_epi8() {
19993        let src = _mm512_set1_epi8(1);
19994        let a = _mm512_set1_epi8(2);
19995        let r = _mm512_mask_mov_epi8(src, 0, a);
19996        assert_eq_m512i(r, src);
19997        let r = _mm512_mask_mov_epi8(
19998            src,
19999            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20000            a,
20001        );
20002        assert_eq_m512i(r, a);
20003    }
20004
20005    #[simd_test(enable = "avx512bw")]
20006    const fn test_mm512_maskz_mov_epi8() {
20007        let a = _mm512_set1_epi8(2);
20008        let r = _mm512_maskz_mov_epi8(0, a);
20009        assert_eq_m512i(r, _mm512_setzero_si512());
20010        let r = _mm512_maskz_mov_epi8(
20011            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20012            a,
20013        );
20014        assert_eq_m512i(r, a);
20015    }
20016
20017    #[simd_test(enable = "avx512bw,avx512vl")]
20018    const fn test_mm256_mask_mov_epi8() {
20019        let src = _mm256_set1_epi8(1);
20020        let a = _mm256_set1_epi8(2);
20021        let r = _mm256_mask_mov_epi8(src, 0, a);
20022        assert_eq_m256i(r, src);
20023        let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20024        assert_eq_m256i(r, a);
20025    }
20026
20027    #[simd_test(enable = "avx512bw,avx512vl")]
20028    const fn test_mm256_maskz_mov_epi8() {
20029        let a = _mm256_set1_epi8(2);
20030        let r = _mm256_maskz_mov_epi8(0, a);
20031        assert_eq_m256i(r, _mm256_setzero_si256());
20032        let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a);
20033        assert_eq_m256i(r, a);
20034    }
20035
20036    #[simd_test(enable = "avx512bw,avx512vl")]
20037    const fn test_mm_mask_mov_epi8() {
20038        let src = _mm_set1_epi8(1);
20039        let a = _mm_set1_epi8(2);
20040        let r = _mm_mask_mov_epi8(src, 0, a);
20041        assert_eq_m128i(r, src);
20042        let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a);
20043        assert_eq_m128i(r, a);
20044    }
20045
20046    #[simd_test(enable = "avx512bw,avx512vl")]
20047    const fn test_mm_maskz_mov_epi8() {
20048        let a = _mm_set1_epi8(2);
20049        let r = _mm_maskz_mov_epi8(0, a);
20050        assert_eq_m128i(r, _mm_setzero_si128());
20051        let r = _mm_maskz_mov_epi8(0b11111111_11111111, a);
20052        assert_eq_m128i(r, a);
20053    }
20054
20055    #[simd_test(enable = "avx512bw")]
20056    const fn test_mm512_mask_set1_epi16() {
20057        let src = _mm512_set1_epi16(2);
20058        let a: i16 = 11;
20059        let r = _mm512_mask_set1_epi16(src, 0, a);
20060        assert_eq_m512i(r, src);
20061        let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20062        let e = _mm512_set1_epi16(11);
20063        assert_eq_m512i(r, e);
20064    }
20065
20066    #[simd_test(enable = "avx512bw")]
20067    const fn test_mm512_maskz_set1_epi16() {
20068        let a: i16 = 11;
20069        let r = _mm512_maskz_set1_epi16(0, a);
20070        assert_eq_m512i(r, _mm512_setzero_si512());
20071        let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a);
20072        let e = _mm512_set1_epi16(11);
20073        assert_eq_m512i(r, e);
20074    }
20075
20076    #[simd_test(enable = "avx512bw,avx512vl")]
20077    const fn test_mm256_mask_set1_epi16() {
20078        let src = _mm256_set1_epi16(2);
20079        let a: i16 = 11;
20080        let r = _mm256_mask_set1_epi16(src, 0, a);
20081        assert_eq_m256i(r, src);
20082        let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a);
20083        let e = _mm256_set1_epi16(11);
20084        assert_eq_m256i(r, e);
20085    }
20086
20087    #[simd_test(enable = "avx512bw,avx512vl")]
20088    const fn test_mm256_maskz_set1_epi16() {
20089        let a: i16 = 11;
20090        let r = _mm256_maskz_set1_epi16(0, a);
20091        assert_eq_m256i(r, _mm256_setzero_si256());
20092        let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a);
20093        let e = _mm256_set1_epi16(11);
20094        assert_eq_m256i(r, e);
20095    }
20096
20097    #[simd_test(enable = "avx512bw,avx512vl")]
20098    const fn test_mm_mask_set1_epi16() {
20099        let src = _mm_set1_epi16(2);
20100        let a: i16 = 11;
20101        let r = _mm_mask_set1_epi16(src, 0, a);
20102        assert_eq_m128i(r, src);
20103        let r = _mm_mask_set1_epi16(src, 0b11111111, a);
20104        let e = _mm_set1_epi16(11);
20105        assert_eq_m128i(r, e);
20106    }
20107
20108    #[simd_test(enable = "avx512bw,avx512vl")]
20109    const fn test_mm_maskz_set1_epi16() {
20110        let a: i16 = 11;
20111        let r = _mm_maskz_set1_epi16(0, a);
20112        assert_eq_m128i(r, _mm_setzero_si128());
20113        let r = _mm_maskz_set1_epi16(0b11111111, a);
20114        let e = _mm_set1_epi16(11);
20115        assert_eq_m128i(r, e);
20116    }
20117
20118    #[simd_test(enable = "avx512bw")]
20119    const fn test_mm512_mask_set1_epi8() {
20120        let src = _mm512_set1_epi8(2);
20121        let a: i8 = 11;
20122        let r = _mm512_mask_set1_epi8(src, 0, a);
20123        assert_eq_m512i(r, src);
20124        let r = _mm512_mask_set1_epi8(
20125            src,
20126            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20127            a,
20128        );
20129        let e = _mm512_set1_epi8(11);
20130        assert_eq_m512i(r, e);
20131    }
20132
20133    #[simd_test(enable = "avx512bw")]
20134    const fn test_mm512_maskz_set1_epi8() {
20135        let a: i8 = 11;
20136        let r = _mm512_maskz_set1_epi8(0, a);
20137        assert_eq_m512i(r, _mm512_setzero_si512());
20138        let r = _mm512_maskz_set1_epi8(
20139            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20140            a,
20141        );
20142        let e = _mm512_set1_epi8(11);
20143        assert_eq_m512i(r, e);
20144    }
20145
20146    #[simd_test(enable = "avx512bw,avx512vl")]
20147    const fn test_mm256_mask_set1_epi8() {
20148        let src = _mm256_set1_epi8(2);
20149        let a: i8 = 11;
20150        let r = _mm256_mask_set1_epi8(src, 0, a);
20151        assert_eq_m256i(r, src);
20152        let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20153        let e = _mm256_set1_epi8(11);
20154        assert_eq_m256i(r, e);
20155    }
20156
20157    #[simd_test(enable = "avx512bw,avx512vl")]
20158    const fn test_mm256_maskz_set1_epi8() {
20159        let a: i8 = 11;
20160        let r = _mm256_maskz_set1_epi8(0, a);
20161        assert_eq_m256i(r, _mm256_setzero_si256());
20162        let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a);
20163        let e = _mm256_set1_epi8(11);
20164        assert_eq_m256i(r, e);
20165    }
20166
20167    #[simd_test(enable = "avx512bw,avx512vl")]
20168    const fn test_mm_mask_set1_epi8() {
20169        let src = _mm_set1_epi8(2);
20170        let a: i8 = 11;
20171        let r = _mm_mask_set1_epi8(src, 0, a);
20172        assert_eq_m128i(r, src);
20173        let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a);
20174        let e = _mm_set1_epi8(11);
20175        assert_eq_m128i(r, e);
20176    }
20177
20178    #[simd_test(enable = "avx512bw,avx512vl")]
20179    const fn test_mm_maskz_set1_epi8() {
20180        let a: i8 = 11;
20181        let r = _mm_maskz_set1_epi8(0, a);
20182        assert_eq_m128i(r, _mm_setzero_si128());
20183        let r = _mm_maskz_set1_epi8(0b11111111_11111111, a);
20184        let e = _mm_set1_epi8(11);
20185        assert_eq_m128i(r, e);
20186    }
20187
20188    #[simd_test(enable = "avx512bw")]
20189    const fn test_mm512_shufflelo_epi16() {
20190        #[rustfmt::skip]
20191        let a = _mm512_set_epi16(
20192            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20193            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20194        );
20195        #[rustfmt::skip]
20196        let e = _mm512_set_epi16(
20197            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
20198            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
20199        );
20200        let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a);
20201        assert_eq_m512i(r, e);
20202    }
20203
20204    #[simd_test(enable = "avx512bw")]
20205    const fn test_mm512_mask_shufflelo_epi16() {
20206        #[rustfmt::skip]
20207        let a = _mm512_set_epi16(
20208            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20209            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20210        );
20211        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
20212        assert_eq_m512i(r, a);
20213        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(
20214            a,
20215            0b11111111_11111111_11111111_11111111,
20216            a,
20217        );
20218        #[rustfmt::skip]
20219        let e = _mm512_set_epi16(
20220            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
20221            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
20222        );
20223        assert_eq_m512i(r, e);
20224    }
20225
20226    #[simd_test(enable = "avx512bw")]
20227    const fn test_mm512_maskz_shufflelo_epi16() {
20228        #[rustfmt::skip]
20229        let a = _mm512_set_epi16(
20230            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20231            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20232        );
20233        let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
20234        assert_eq_m512i(r, _mm512_setzero_si512());
20235        let r =
20236            _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
20237        #[rustfmt::skip]
20238        let e = _mm512_set_epi16(
20239            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
20240            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
20241        );
20242        assert_eq_m512i(r, e);
20243    }
20244
20245    #[simd_test(enable = "avx512bw,avx512vl")]
20246    const fn test_mm256_mask_shufflelo_epi16() {
20247        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20248        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
20249        assert_eq_m256i(r, a);
20250        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
20251        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
20252        assert_eq_m256i(r, e);
20253    }
20254
20255    #[simd_test(enable = "avx512bw,avx512vl")]
20256    const fn test_mm256_maskz_shufflelo_epi16() {
20257        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20258        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
20259        assert_eq_m256i(r, _mm256_setzero_si256());
20260        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
20261        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
20262        assert_eq_m256i(r, e);
20263    }
20264
20265    #[simd_test(enable = "avx512bw,avx512vl")]
20266    const fn test_mm_mask_shufflelo_epi16() {
20267        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20268        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
20269        assert_eq_m128i(r, a);
20270        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a);
20271        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
20272        assert_eq_m128i(r, e);
20273    }
20274
20275    #[simd_test(enable = "avx512bw,avx512vl")]
20276    const fn test_mm_maskz_shufflelo_epi16() {
20277        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20278        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
20279        assert_eq_m128i(r, _mm_setzero_si128());
20280        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a);
20281        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
20282        assert_eq_m128i(r, e);
20283    }
20284
20285    #[simd_test(enable = "avx512bw")]
20286    const fn test_mm512_shufflehi_epi16() {
20287        #[rustfmt::skip]
20288        let a = _mm512_set_epi16(
20289            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20290            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20291        );
20292        #[rustfmt::skip]
20293        let e = _mm512_set_epi16(
20294            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
20295            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
20296        );
20297        let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a);
20298        assert_eq_m512i(r, e);
20299    }
20300
20301    #[simd_test(enable = "avx512bw")]
20302    const fn test_mm512_mask_shufflehi_epi16() {
20303        #[rustfmt::skip]
20304        let a = _mm512_set_epi16(
20305            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20306            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20307        );
20308        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
20309        assert_eq_m512i(r, a);
20310        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(
20311            a,
20312            0b11111111_11111111_11111111_11111111,
20313            a,
20314        );
20315        #[rustfmt::skip]
20316        let e = _mm512_set_epi16(
20317            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
20318            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
20319        );
20320        assert_eq_m512i(r, e);
20321    }
20322
20323    #[simd_test(enable = "avx512bw")]
20324    const fn test_mm512_maskz_shufflehi_epi16() {
20325        #[rustfmt::skip]
20326        let a = _mm512_set_epi16(
20327            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20328            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20329        );
20330        let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
20331        assert_eq_m512i(r, _mm512_setzero_si512());
20332        let r =
20333            _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
20334        #[rustfmt::skip]
20335        let e = _mm512_set_epi16(
20336            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
20337            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
20338        );
20339        assert_eq_m512i(r, e);
20340    }
20341
20342    #[simd_test(enable = "avx512bw,avx512vl")]
20343    const fn test_mm256_mask_shufflehi_epi16() {
20344        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20345        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
20346        assert_eq_m256i(r, a);
20347        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
20348        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
20349        assert_eq_m256i(r, e);
20350    }
20351
20352    #[simd_test(enable = "avx512bw,avx512vl")]
20353    const fn test_mm256_maskz_shufflehi_epi16() {
20354        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20355        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
20356        assert_eq_m256i(r, _mm256_setzero_si256());
20357        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
20358        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
20359        assert_eq_m256i(r, e);
20360    }
20361
20362    #[simd_test(enable = "avx512bw,avx512vl")]
20363    const fn test_mm_mask_shufflehi_epi16() {
20364        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20365        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
20366        assert_eq_m128i(r, a);
20367        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a);
20368        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
20369        assert_eq_m128i(r, e);
20370    }
20371
20372    #[simd_test(enable = "avx512bw,avx512vl")]
20373    const fn test_mm_maskz_shufflehi_epi16() {
20374        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20375        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
20376        assert_eq_m128i(r, _mm_setzero_si128());
20377        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a);
20378        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
20379        assert_eq_m128i(r, e);
20380    }
20381
20382    #[simd_test(enable = "avx512bw")]
20383    fn test_mm512_shuffle_epi8() {
20384        #[rustfmt::skip]
20385        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20386                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20387                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20388                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
20389        let b = _mm512_set1_epi8(1);
20390        let r = _mm512_shuffle_epi8(a, b);
20391        #[rustfmt::skip]
20392        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20393                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
20394                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
20395                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
20396        assert_eq_m512i(r, e);
20397    }
20398
20399    #[simd_test(enable = "avx512bw")]
20400    fn test_mm512_mask_shuffle_epi8() {
20401        #[rustfmt::skip]
20402        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20403                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20404                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20405                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
20406        let b = _mm512_set1_epi8(1);
20407        let r = _mm512_mask_shuffle_epi8(a, 0, a, b);
20408        assert_eq_m512i(r, a);
20409        let r = _mm512_mask_shuffle_epi8(
20410            a,
20411            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20412            a,
20413            b,
20414        );
20415        #[rustfmt::skip]
20416        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20417                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
20418                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
20419                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
20420        assert_eq_m512i(r, e);
20421    }
20422
20423    #[simd_test(enable = "avx512bw")]
20424    fn test_mm512_maskz_shuffle_epi8() {
20425        #[rustfmt::skip]
20426        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20427                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20428                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20429                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
20430        let b = _mm512_set1_epi8(1);
20431        let r = _mm512_maskz_shuffle_epi8(0, a, b);
20432        assert_eq_m512i(r, _mm512_setzero_si512());
20433        let r = _mm512_maskz_shuffle_epi8(
20434            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20435            a,
20436            b,
20437        );
20438        #[rustfmt::skip]
20439        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20440                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
20441                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
20442                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
20443        assert_eq_m512i(r, e);
20444    }
20445
20446    #[simd_test(enable = "avx512bw,avx512vl")]
20447    fn test_mm256_mask_shuffle_epi8() {
20448        #[rustfmt::skip]
20449        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20450                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
20451        let b = _mm256_set1_epi8(1);
20452        let r = _mm256_mask_shuffle_epi8(a, 0, a, b);
20453        assert_eq_m256i(r, a);
20454        let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
20455        #[rustfmt::skip]
20456        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20457                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
20458        assert_eq_m256i(r, e);
20459    }
20460
20461    #[simd_test(enable = "avx512bw,avx512vl")]
20462    fn test_mm256_maskz_shuffle_epi8() {
20463        #[rustfmt::skip]
20464        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20465                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
20466        let b = _mm256_set1_epi8(1);
20467        let r = _mm256_maskz_shuffle_epi8(0, a, b);
20468        assert_eq_m256i(r, _mm256_setzero_si256());
20469        let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b);
20470        #[rustfmt::skip]
20471        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20472                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
20473        assert_eq_m256i(r, e);
20474    }
20475
20476    #[simd_test(enable = "avx512bw,avx512vl")]
20477    fn test_mm_mask_shuffle_epi8() {
20478        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20479        let b = _mm_set1_epi8(1);
20480        let r = _mm_mask_shuffle_epi8(a, 0, a, b);
20481        assert_eq_m128i(r, a);
20482        let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b);
20483        let e = _mm_set_epi8(
20484            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20485        );
20486        assert_eq_m128i(r, e);
20487    }
20488
20489    #[simd_test(enable = "avx512bw,avx512vl")]
20490    fn test_mm_maskz_shuffle_epi8() {
20491        #[rustfmt::skip]
20492        let a = _mm_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15);
20493        let b = _mm_set1_epi8(1);
20494        let r = _mm_maskz_shuffle_epi8(0, a, b);
20495        assert_eq_m128i(r, _mm_setzero_si128());
20496        let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b);
20497        let e = _mm_set_epi8(
20498            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20499        );
20500        assert_eq_m128i(r, e);
20501    }
20502
20503    #[simd_test(enable = "avx512bw")]
20504    const fn test_mm512_test_epi16_mask() {
20505        let a = _mm512_set1_epi16(1 << 0);
20506        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20507        let r = _mm512_test_epi16_mask(a, b);
20508        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20509        assert_eq!(r, e);
20510    }
20511
20512    #[simd_test(enable = "avx512bw")]
20513    const fn test_mm512_mask_test_epi16_mask() {
20514        let a = _mm512_set1_epi16(1 << 0);
20515        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20516        let r = _mm512_mask_test_epi16_mask(0, a, b);
20517        assert_eq!(r, 0);
20518        let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
20519        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20520        assert_eq!(r, e);
20521    }
20522
20523    #[simd_test(enable = "avx512bw,avx512vl")]
20524    const fn test_mm256_test_epi16_mask() {
20525        let a = _mm256_set1_epi16(1 << 0);
20526        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20527        let r = _mm256_test_epi16_mask(a, b);
20528        let e: __mmask16 = 0b11111111_11111111;
20529        assert_eq!(r, e);
20530    }
20531
20532    #[simd_test(enable = "avx512bw,avx512vl")]
20533    const fn test_mm256_mask_test_epi16_mask() {
20534        let a = _mm256_set1_epi16(1 << 0);
20535        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20536        let r = _mm256_mask_test_epi16_mask(0, a, b);
20537        assert_eq!(r, 0);
20538        let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b);
20539        let e: __mmask16 = 0b11111111_11111111;
20540        assert_eq!(r, e);
20541    }
20542
20543    #[simd_test(enable = "avx512bw,avx512vl")]
20544    const fn test_mm_test_epi16_mask() {
20545        let a = _mm_set1_epi16(1 << 0);
20546        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20547        let r = _mm_test_epi16_mask(a, b);
20548        let e: __mmask8 = 0b11111111;
20549        assert_eq!(r, e);
20550    }
20551
20552    #[simd_test(enable = "avx512bw,avx512vl")]
20553    const fn test_mm_mask_test_epi16_mask() {
20554        let a = _mm_set1_epi16(1 << 0);
20555        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20556        let r = _mm_mask_test_epi16_mask(0, a, b);
20557        assert_eq!(r, 0);
20558        let r = _mm_mask_test_epi16_mask(0b11111111, a, b);
20559        let e: __mmask8 = 0b11111111;
20560        assert_eq!(r, e);
20561    }
20562
20563    #[simd_test(enable = "avx512bw")]
20564    const fn test_mm512_test_epi8_mask() {
20565        let a = _mm512_set1_epi8(1 << 0);
20566        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20567        let r = _mm512_test_epi8_mask(a, b);
20568        let e: __mmask64 =
20569            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20570        assert_eq!(r, e);
20571    }
20572
20573    #[simd_test(enable = "avx512bw")]
20574    const fn test_mm512_mask_test_epi8_mask() {
20575        let a = _mm512_set1_epi8(1 << 0);
20576        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20577        let r = _mm512_mask_test_epi8_mask(0, a, b);
20578        assert_eq!(r, 0);
20579        let r = _mm512_mask_test_epi8_mask(
20580            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20581            a,
20582            b,
20583        );
20584        let e: __mmask64 =
20585            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20586        assert_eq!(r, e);
20587    }
20588
20589    #[simd_test(enable = "avx512bw,avx512vl")]
20590    const fn test_mm256_test_epi8_mask() {
20591        let a = _mm256_set1_epi8(1 << 0);
20592        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20593        let r = _mm256_test_epi8_mask(a, b);
20594        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20595        assert_eq!(r, e);
20596    }
20597
20598    #[simd_test(enable = "avx512bw,avx512vl")]
20599    const fn test_mm256_mask_test_epi8_mask() {
20600        let a = _mm256_set1_epi8(1 << 0);
20601        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20602        let r = _mm256_mask_test_epi8_mask(0, a, b);
20603        assert_eq!(r, 0);
20604        let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
20605        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20606        assert_eq!(r, e);
20607    }
20608
20609    #[simd_test(enable = "avx512bw,avx512vl")]
20610    const fn test_mm_test_epi8_mask() {
20611        let a = _mm_set1_epi8(1 << 0);
20612        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20613        let r = _mm_test_epi8_mask(a, b);
20614        let e: __mmask16 = 0b11111111_11111111;
20615        assert_eq!(r, e);
20616    }
20617
20618    #[simd_test(enable = "avx512bw,avx512vl")]
20619    const fn test_mm_mask_test_epi8_mask() {
20620        let a = _mm_set1_epi8(1 << 0);
20621        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20622        let r = _mm_mask_test_epi8_mask(0, a, b);
20623        assert_eq!(r, 0);
20624        let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b);
20625        let e: __mmask16 = 0b11111111_11111111;
20626        assert_eq!(r, e);
20627    }
20628
20629    #[simd_test(enable = "avx512bw")]
20630    const fn test_mm512_testn_epi16_mask() {
20631        let a = _mm512_set1_epi16(1 << 0);
20632        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20633        let r = _mm512_testn_epi16_mask(a, b);
20634        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20635        assert_eq!(r, e);
20636    }
20637
20638    #[simd_test(enable = "avx512bw")]
20639    const fn test_mm512_mask_testn_epi16_mask() {
20640        let a = _mm512_set1_epi16(1 << 0);
20641        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20642        let r = _mm512_mask_testn_epi16_mask(0, a, b);
20643        assert_eq!(r, 0);
20644        let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
20645        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20646        assert_eq!(r, e);
20647    }
20648
20649    #[simd_test(enable = "avx512bw,avx512vl")]
20650    const fn test_mm256_testn_epi16_mask() {
20651        let a = _mm256_set1_epi16(1 << 0);
20652        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20653        let r = _mm256_testn_epi16_mask(a, b);
20654        let e: __mmask16 = 0b00000000_00000000;
20655        assert_eq!(r, e);
20656    }
20657
20658    #[simd_test(enable = "avx512bw,avx512vl")]
20659    const fn test_mm256_mask_testn_epi16_mask() {
20660        let a = _mm256_set1_epi16(1 << 0);
20661        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20662        let r = _mm256_mask_testn_epi16_mask(0, a, b);
20663        assert_eq!(r, 0);
20664        let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b);
20665        let e: __mmask16 = 0b00000000_00000000;
20666        assert_eq!(r, e);
20667    }
20668
20669    #[simd_test(enable = "avx512bw,avx512vl")]
20670    const fn test_mm_testn_epi16_mask() {
20671        let a = _mm_set1_epi16(1 << 0);
20672        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20673        let r = _mm_testn_epi16_mask(a, b);
20674        let e: __mmask8 = 0b00000000;
20675        assert_eq!(r, e);
20676    }
20677
20678    #[simd_test(enable = "avx512bw,avx512vl")]
20679    const fn test_mm_mask_testn_epi16_mask() {
20680        let a = _mm_set1_epi16(1 << 0);
20681        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20682        let r = _mm_mask_testn_epi16_mask(0, a, b);
20683        assert_eq!(r, 0);
20684        let r = _mm_mask_testn_epi16_mask(0b11111111, a, b);
20685        let e: __mmask8 = 0b00000000;
20686        assert_eq!(r, e);
20687    }
20688
20689    #[simd_test(enable = "avx512bw")]
20690    const fn test_mm512_testn_epi8_mask() {
20691        let a = _mm512_set1_epi8(1 << 0);
20692        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20693        let r = _mm512_testn_epi8_mask(a, b);
20694        let e: __mmask64 =
20695            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20696        assert_eq!(r, e);
20697    }
20698
20699    #[simd_test(enable = "avx512bw")]
20700    const fn test_mm512_mask_testn_epi8_mask() {
20701        let a = _mm512_set1_epi8(1 << 0);
20702        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20703        let r = _mm512_mask_testn_epi8_mask(0, a, b);
20704        assert_eq!(r, 0);
20705        let r = _mm512_mask_testn_epi8_mask(
20706            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20707            a,
20708            b,
20709        );
20710        let e: __mmask64 =
20711            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20712        assert_eq!(r, e);
20713    }
20714
20715    #[simd_test(enable = "avx512bw,avx512vl")]
20716    const fn test_mm256_testn_epi8_mask() {
20717        let a = _mm256_set1_epi8(1 << 0);
20718        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20719        let r = _mm256_testn_epi8_mask(a, b);
20720        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20721        assert_eq!(r, e);
20722    }
20723
20724    #[simd_test(enable = "avx512bw,avx512vl")]
20725    const fn test_mm256_mask_testn_epi8_mask() {
20726        let a = _mm256_set1_epi8(1 << 0);
20727        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20728        let r = _mm256_mask_testn_epi8_mask(0, a, b);
20729        assert_eq!(r, 0);
20730        let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
20731        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20732        assert_eq!(r, e);
20733    }
20734
20735    #[simd_test(enable = "avx512bw,avx512vl")]
20736    const fn test_mm_testn_epi8_mask() {
20737        let a = _mm_set1_epi8(1 << 0);
20738        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20739        let r = _mm_testn_epi8_mask(a, b);
20740        let e: __mmask16 = 0b00000000_00000000;
20741        assert_eq!(r, e);
20742    }
20743
20744    #[simd_test(enable = "avx512bw,avx512vl")]
20745    const fn test_mm_mask_testn_epi8_mask() {
20746        let a = _mm_set1_epi8(1 << 0);
20747        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20748        let r = _mm_mask_testn_epi8_mask(0, a, b);
20749        assert_eq!(r, 0);
20750        let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b);
20751        let e: __mmask16 = 0b00000000_00000000;
20752        assert_eq!(r, e);
20753    }
20754
20755    #[simd_test(enable = "avx512bw")]
20756    const fn test_store_mask64() {
20757        let a: __mmask64 =
20758            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20759        let mut r = 0;
20760        unsafe {
20761            _store_mask64(&mut r, a);
20762        }
20763        assert_eq!(r, a);
20764    }
20765
20766    #[simd_test(enable = "avx512bw")]
20767    const fn test_store_mask32() {
20768        let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
20769        let mut r = 0;
20770        unsafe {
20771            _store_mask32(&mut r, a);
20772        }
20773        assert_eq!(r, a);
20774    }
20775
20776    #[simd_test(enable = "avx512bw")]
20777    const fn test_load_mask64() {
20778        let p: __mmask64 =
20779            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20780        let r = unsafe { _load_mask64(&p) };
20781        let e: __mmask64 =
20782            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20783        assert_eq!(r, e);
20784    }
20785
20786    #[simd_test(enable = "avx512bw")]
20787    const fn test_load_mask32() {
20788        let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
20789        let r = unsafe { _load_mask32(&p) };
20790        let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
20791        assert_eq!(r, e);
20792    }
20793
20794    #[simd_test(enable = "avx512bw")]
20795    fn test_mm512_sad_epu8() {
20796        let a = _mm512_set1_epi8(2);
20797        let b = _mm512_set1_epi8(4);
20798        let r = _mm512_sad_epu8(a, b);
20799        let e = _mm512_set1_epi64(16);
20800        assert_eq_m512i(r, e);
20801    }
20802
20803    #[simd_test(enable = "avx512bw")]
20804    fn test_mm512_dbsad_epu8() {
20805        let a = _mm512_set1_epi8(2);
20806        let b = _mm512_set1_epi8(4);
20807        let r = _mm512_dbsad_epu8::<0>(a, b);
20808        let e = _mm512_set1_epi16(8);
20809        assert_eq_m512i(r, e);
20810    }
20811
20812    #[simd_test(enable = "avx512bw")]
20813    fn test_mm512_mask_dbsad_epu8() {
20814        let src = _mm512_set1_epi16(1);
20815        let a = _mm512_set1_epi8(2);
20816        let b = _mm512_set1_epi8(4);
20817        let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b);
20818        assert_eq_m512i(r, src);
20819        let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b);
20820        let e = _mm512_set1_epi16(8);
20821        assert_eq_m512i(r, e);
20822    }
20823
20824    #[simd_test(enable = "avx512bw")]
20825    fn test_mm512_maskz_dbsad_epu8() {
20826        let a = _mm512_set1_epi8(2);
20827        let b = _mm512_set1_epi8(4);
20828        let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b);
20829        assert_eq_m512i(r, _mm512_setzero_si512());
20830        let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b);
20831        let e = _mm512_set1_epi16(8);
20832        assert_eq_m512i(r, e);
20833    }
20834
20835    #[simd_test(enable = "avx512bw,avx512vl")]
20836    fn test_mm256_dbsad_epu8() {
20837        let a = _mm256_set1_epi8(2);
20838        let b = _mm256_set1_epi8(4);
20839        let r = _mm256_dbsad_epu8::<0>(a, b);
20840        let e = _mm256_set1_epi16(8);
20841        assert_eq_m256i(r, e);
20842    }
20843
20844    #[simd_test(enable = "avx512bw,avx512vl")]
20845    fn test_mm256_mask_dbsad_epu8() {
20846        let src = _mm256_set1_epi16(1);
20847        let a = _mm256_set1_epi8(2);
20848        let b = _mm256_set1_epi8(4);
20849        let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b);
20850        assert_eq_m256i(r, src);
20851        let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b);
20852        let e = _mm256_set1_epi16(8);
20853        assert_eq_m256i(r, e);
20854    }
20855
20856    #[simd_test(enable = "avx512bw,avx512vl")]
20857    fn test_mm256_maskz_dbsad_epu8() {
20858        let a = _mm256_set1_epi8(2);
20859        let b = _mm256_set1_epi8(4);
20860        let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b);
20861        assert_eq_m256i(r, _mm256_setzero_si256());
20862        let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b);
20863        let e = _mm256_set1_epi16(8);
20864        assert_eq_m256i(r, e);
20865    }
20866
20867    #[simd_test(enable = "avx512bw,avx512vl")]
20868    fn test_mm_dbsad_epu8() {
20869        let a = _mm_set1_epi8(2);
20870        let b = _mm_set1_epi8(4);
20871        let r = _mm_dbsad_epu8::<0>(a, b);
20872        let e = _mm_set1_epi16(8);
20873        assert_eq_m128i(r, e);
20874    }
20875
20876    #[simd_test(enable = "avx512bw,avx512vl")]
20877    fn test_mm_mask_dbsad_epu8() {
20878        let src = _mm_set1_epi16(1);
20879        let a = _mm_set1_epi8(2);
20880        let b = _mm_set1_epi8(4);
20881        let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b);
20882        assert_eq_m128i(r, src);
20883        let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b);
20884        let e = _mm_set1_epi16(8);
20885        assert_eq_m128i(r, e);
20886    }
20887
20888    #[simd_test(enable = "avx512bw,avx512vl")]
20889    fn test_mm_maskz_dbsad_epu8() {
20890        let a = _mm_set1_epi8(2);
20891        let b = _mm_set1_epi8(4);
20892        let r = _mm_maskz_dbsad_epu8::<0>(0, a, b);
20893        assert_eq_m128i(r, _mm_setzero_si128());
20894        let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b);
20895        let e = _mm_set1_epi16(8);
20896        assert_eq_m128i(r, e);
20897    }
20898
20899    #[simd_test(enable = "avx512bw")]
20900    const fn test_mm512_movepi16_mask() {
20901        let a = _mm512_set1_epi16(1 << 15);
20902        let r = _mm512_movepi16_mask(a);
20903        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20904        assert_eq!(r, e);
20905    }
20906
20907    #[simd_test(enable = "avx512bw,avx512vl")]
20908    const fn test_mm256_movepi16_mask() {
20909        let a = _mm256_set1_epi16(1 << 15);
20910        let r = _mm256_movepi16_mask(a);
20911        let e: __mmask16 = 0b11111111_11111111;
20912        assert_eq!(r, e);
20913    }
20914
20915    #[simd_test(enable = "avx512bw,avx512vl")]
20916    const fn test_mm_movepi16_mask() {
20917        let a = _mm_set1_epi16(1 << 15);
20918        let r = _mm_movepi16_mask(a);
20919        let e: __mmask8 = 0b11111111;
20920        assert_eq!(r, e);
20921    }
20922
20923    #[simd_test(enable = "avx512bw")]
20924    const fn test_mm512_movepi8_mask() {
20925        let a = _mm512_set1_epi8(1 << 7);
20926        let r = _mm512_movepi8_mask(a);
20927        let e: __mmask64 =
20928            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20929        assert_eq!(r, e);
20930    }
20931
20932    #[simd_test(enable = "avx512bw,avx512vl")]
20933    const fn test_mm256_movepi8_mask() {
20934        let a = _mm256_set1_epi8(1 << 7);
20935        let r = _mm256_movepi8_mask(a);
20936        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20937        assert_eq!(r, e);
20938    }
20939
20940    #[simd_test(enable = "avx512bw,avx512vl")]
20941    const fn test_mm_movepi8_mask() {
20942        let a = _mm_set1_epi8(1 << 7);
20943        let r = _mm_movepi8_mask(a);
20944        let e: __mmask16 = 0b11111111_11111111;
20945        assert_eq!(r, e);
20946    }
20947
20948    #[simd_test(enable = "avx512bw")]
20949    const fn test_mm512_movm_epi16() {
20950        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20951        let r = _mm512_movm_epi16(a);
20952        let e = _mm512_set1_epi16(
20953            1 << 15
20954                | 1 << 14
20955                | 1 << 13
20956                | 1 << 12
20957                | 1 << 11
20958                | 1 << 10
20959                | 1 << 9
20960                | 1 << 8
20961                | 1 << 7
20962                | 1 << 6
20963                | 1 << 5
20964                | 1 << 4
20965                | 1 << 3
20966                | 1 << 2
20967                | 1 << 1
20968                | 1 << 0,
20969        );
20970        assert_eq_m512i(r, e);
20971    }
20972
20973    #[simd_test(enable = "avx512bw,avx512vl")]
20974    const fn test_mm256_movm_epi16() {
20975        let a: __mmask16 = 0b11111111_11111111;
20976        let r = _mm256_movm_epi16(a);
20977        let e = _mm256_set1_epi16(
20978            1 << 15
20979                | 1 << 14
20980                | 1 << 13
20981                | 1 << 12
20982                | 1 << 11
20983                | 1 << 10
20984                | 1 << 9
20985                | 1 << 8
20986                | 1 << 7
20987                | 1 << 6
20988                | 1 << 5
20989                | 1 << 4
20990                | 1 << 3
20991                | 1 << 2
20992                | 1 << 1
20993                | 1 << 0,
20994        );
20995        assert_eq_m256i(r, e);
20996    }
20997
20998    #[simd_test(enable = "avx512bw,avx512vl")]
20999    const fn test_mm_movm_epi16() {
21000        let a: __mmask8 = 0b11111111;
21001        let r = _mm_movm_epi16(a);
21002        let e = _mm_set1_epi16(
21003            1 << 15
21004                | 1 << 14
21005                | 1 << 13
21006                | 1 << 12
21007                | 1 << 11
21008                | 1 << 10
21009                | 1 << 9
21010                | 1 << 8
21011                | 1 << 7
21012                | 1 << 6
21013                | 1 << 5
21014                | 1 << 4
21015                | 1 << 3
21016                | 1 << 2
21017                | 1 << 1
21018                | 1 << 0,
21019        );
21020        assert_eq_m128i(r, e);
21021    }
21022
21023    #[simd_test(enable = "avx512bw")]
21024    const fn test_mm512_movm_epi8() {
21025        let a: __mmask64 =
21026            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
21027        let r = _mm512_movm_epi8(a);
21028        let e =
21029            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
21030        assert_eq_m512i(r, e);
21031    }
21032
21033    #[simd_test(enable = "avx512bw,avx512vl")]
21034    const fn test_mm256_movm_epi8() {
21035        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
21036        let r = _mm256_movm_epi8(a);
21037        let e =
21038            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
21039        assert_eq_m256i(r, e);
21040    }
21041
21042    #[simd_test(enable = "avx512bw,avx512vl")]
21043    const fn test_mm_movm_epi8() {
21044        let a: __mmask16 = 0b11111111_11111111;
21045        let r = _mm_movm_epi8(a);
21046        let e =
21047            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
21048        assert_eq_m128i(r, e);
21049    }
21050
21051    #[simd_test(enable = "avx512bw")]
21052    const fn test_cvtmask32_u32() {
21053        let a: __mmask32 = 0b11001100_00110011_01100110_10011001;
21054        let r = _cvtmask32_u32(a);
21055        let e: u32 = 0b11001100_00110011_01100110_10011001;
21056        assert_eq!(r, e);
21057    }
21058
21059    #[simd_test(enable = "avx512bw")]
21060    const fn test_cvtu32_mask32() {
21061        let a: u32 = 0b11001100_00110011_01100110_10011001;
21062        let r = _cvtu32_mask32(a);
21063        let e: __mmask32 = 0b11001100_00110011_01100110_10011001;
21064        assert_eq!(r, e);
21065    }
21066
21067    #[simd_test(enable = "avx512bw")]
21068    const fn test_kadd_mask32() {
21069        let a: __mmask32 = 11;
21070        let b: __mmask32 = 22;
21071        let r = _kadd_mask32(a, b);
21072        let e: __mmask32 = 33;
21073        assert_eq!(r, e);
21074    }
21075
21076    #[simd_test(enable = "avx512bw")]
21077    const fn test_kadd_mask64() {
21078        let a: __mmask64 = 11;
21079        let b: __mmask64 = 22;
21080        let r = _kadd_mask64(a, b);
21081        let e: __mmask64 = 33;
21082        assert_eq!(r, e);
21083    }
21084
21085    #[simd_test(enable = "avx512bw")]
21086    const fn test_kand_mask32() {
21087        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
21088        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21089        let r = _kand_mask32(a, b);
21090        let e: __mmask32 = 0b11001100_00110011_11001100_00110011;
21091        assert_eq!(r, e);
21092    }
21093
21094    #[simd_test(enable = "avx512bw")]
21095    const fn test_kand_mask64() {
21096        let a: __mmask64 =
21097            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21098        let b: __mmask64 =
21099            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21100        let r = _kand_mask64(a, b);
21101        let e: __mmask64 =
21102            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21103        assert_eq!(r, e);
21104    }
21105
21106    #[simd_test(enable = "avx512bw")]
21107    const fn test_knot_mask32() {
21108        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
21109        let r = _knot_mask32(a);
21110        let e: __mmask32 = 0b00110011_11001100_00110011_11001100;
21111        assert_eq!(r, e);
21112    }
21113
21114    #[simd_test(enable = "avx512bw")]
21115    const fn test_knot_mask64() {
21116        let a: __mmask64 =
21117            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21118        let r = _knot_mask64(a);
21119        let e: __mmask64 =
21120            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21121        assert_eq!(r, e);
21122    }
21123
21124    #[simd_test(enable = "avx512bw")]
21125    const fn test_kandn_mask32() {
21126        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
21127        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21128        let r = _kandn_mask32(a, b);
21129        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
21130        assert_eq!(r, e);
21131    }
21132
21133    #[simd_test(enable = "avx512bw")]
21134    const fn test_kandn_mask64() {
21135        let a: __mmask64 =
21136            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21137        let b: __mmask64 =
21138            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21139        let r = _kandn_mask64(a, b);
21140        let e: __mmask64 =
21141            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
21142        assert_eq!(r, e);
21143    }
21144
21145    #[simd_test(enable = "avx512bw")]
21146    const fn test_kor_mask32() {
21147        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
21148        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21149        let r = _kor_mask32(a, b);
21150        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
21151        assert_eq!(r, e);
21152    }
21153
21154    #[simd_test(enable = "avx512bw")]
21155    const fn test_kor_mask64() {
21156        let a: __mmask64 =
21157            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21158        let b: __mmask64 =
21159            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21160        let r = _kor_mask64(a, b);
21161        let e: __mmask64 =
21162            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
21163        assert_eq!(r, e);
21164    }
21165
21166    #[simd_test(enable = "avx512bw")]
21167    const fn test_kxor_mask32() {
21168        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
21169        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21170        let r = _kxor_mask32(a, b);
21171        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
21172        assert_eq!(r, e);
21173    }
21174
21175    #[simd_test(enable = "avx512bw")]
21176    const fn test_kxor_mask64() {
21177        let a: __mmask64 =
21178            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21179        let b: __mmask64 =
21180            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21181        let r = _kxor_mask64(a, b);
21182        let e: __mmask64 =
21183            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
21184        assert_eq!(r, e);
21185    }
21186
21187    #[simd_test(enable = "avx512bw")]
21188    const fn test_kxnor_mask32() {
21189        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
21190        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21191        let r = _kxnor_mask32(a, b);
21192        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
21193        assert_eq!(r, e);
21194    }
21195
21196    #[simd_test(enable = "avx512bw")]
21197    const fn test_kxnor_mask64() {
21198        let a: __mmask64 =
21199            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21200        let b: __mmask64 =
21201            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21202        let r = _kxnor_mask64(a, b);
21203        let e: __mmask64 =
21204            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
21205        assert_eq!(r, e);
21206    }
21207
21208    #[simd_test(enable = "avx512bw")]
21209    const fn test_kortest_mask32_u8() {
21210        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21211        let b: __mmask32 = 0b1011011010110110_1011011010110110;
21212        let mut all_ones: u8 = 0;
21213        let r = unsafe { _kortest_mask32_u8(a, b, &mut all_ones) };
21214        assert_eq!(r, 0);
21215        assert_eq!(all_ones, 1);
21216    }
21217
21218    #[simd_test(enable = "avx512bw")]
21219    const fn test_kortest_mask64_u8() {
21220        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21221        let b: __mmask64 = 0b1011011010110110_1011011010110110;
21222        let mut all_ones: u8 = 0;
21223        let r = unsafe { _kortest_mask64_u8(a, b, &mut all_ones) };
21224        assert_eq!(r, 0);
21225        assert_eq!(all_ones, 0);
21226    }
21227
21228    #[simd_test(enable = "avx512bw")]
21229    const fn test_kortestc_mask32_u8() {
21230        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21231        let b: __mmask32 = 0b1011011010110110_1011011010110110;
21232        let r = _kortestc_mask32_u8(a, b);
21233        assert_eq!(r, 1);
21234    }
21235
21236    #[simd_test(enable = "avx512bw")]
21237    const fn test_kortestc_mask64_u8() {
21238        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21239        let b: __mmask64 = 0b1011011010110110_1011011010110110;
21240        let r = _kortestc_mask64_u8(a, b);
21241        assert_eq!(r, 0);
21242    }
21243
21244    #[simd_test(enable = "avx512bw")]
21245    const fn test_kortestz_mask32_u8() {
21246        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21247        let b: __mmask32 = 0b1011011010110110_1011011010110110;
21248        let r = _kortestz_mask32_u8(a, b);
21249        assert_eq!(r, 0);
21250    }
21251
21252    #[simd_test(enable = "avx512bw")]
21253    const fn test_kortestz_mask64_u8() {
21254        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21255        let b: __mmask64 = 0b1011011010110110_1011011010110110;
21256        let r = _kortestz_mask64_u8(a, b);
21257        assert_eq!(r, 0);
21258    }
21259
21260    #[simd_test(enable = "avx512bw")]
21261    const fn test_kshiftli_mask32() {
21262        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21263        let r = _kshiftli_mask32::<3>(a);
21264        let e: __mmask32 = 0b0100101101001011_0100101101001000;
21265        assert_eq!(r, e);
21266
21267        let r = _kshiftli_mask32::<31>(a);
21268        let e: __mmask32 = 0b1000000000000000_0000000000000000;
21269        assert_eq!(r, e);
21270
21271        let r = _kshiftli_mask32::<32>(a);
21272        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21273        assert_eq!(r, e);
21274
21275        let r = _kshiftli_mask32::<33>(a);
21276        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21277        assert_eq!(r, e);
21278    }
21279
21280    #[simd_test(enable = "avx512bw")]
21281    const fn test_kshiftli_mask64() {
21282        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21283        let r = _kshiftli_mask64::<3>(a);
21284        let e: __mmask64 = 0b0110100101101001011_0100101101001000;
21285        assert_eq!(r, e);
21286
21287        let r = _kshiftli_mask64::<63>(a);
21288        let e: __mmask64 = 0b1000000000000000_0000000000000000_0000000000000000_0000000000000000;
21289        assert_eq!(r, e);
21290
21291        let r = _kshiftli_mask64::<64>(a);
21292        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21293        assert_eq!(r, e);
21294
21295        let r = _kshiftli_mask64::<65>(a);
21296        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21297        assert_eq!(r, e);
21298    }
21299
21300    #[simd_test(enable = "avx512bw")]
21301    const fn test_kshiftri_mask32() {
21302        let a: __mmask32 = 0b1010100101101001_0110100101101001;
21303        let r = _kshiftri_mask32::<3>(a);
21304        let e: __mmask32 = 0b0001010100101101_0010110100101101;
21305        assert_eq!(r, e);
21306
21307        let r = _kshiftri_mask32::<31>(a);
21308        let e: __mmask32 = 0b0000000000000000_0000000000000001;
21309        assert_eq!(r, e);
21310
21311        let r = _kshiftri_mask32::<32>(a);
21312        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21313        assert_eq!(r, e);
21314
21315        let r = _kshiftri_mask32::<33>(a);
21316        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21317        assert_eq!(r, e);
21318    }
21319
21320    #[simd_test(enable = "avx512bw")]
21321    const fn test_kshiftri_mask64() {
21322        let a: __mmask64 = 0b1010100101101001011_0100101101001000;
21323        let r = _kshiftri_mask64::<3>(a);
21324        let e: __mmask64 = 0b1010100101101001_0110100101101001;
21325        assert_eq!(r, e);
21326
21327        let r = _kshiftri_mask64::<34>(a);
21328        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000001;
21329        assert_eq!(r, e);
21330
21331        let r = _kshiftri_mask64::<35>(a);
21332        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21333        assert_eq!(r, e);
21334
21335        let r = _kshiftri_mask64::<64>(a);
21336        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21337        assert_eq!(r, e);
21338
21339        let r = _kshiftri_mask64::<65>(a);
21340        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21341        assert_eq!(r, e);
21342    }
21343
21344    #[simd_test(enable = "avx512bw")]
21345    const fn test_ktest_mask32_u8() {
21346        let a: __mmask32 = 0b0110100100111100_0110100100111100;
21347        let b: __mmask32 = 0b1001011011000011_1001011011000011;
21348        let mut and_not: u8 = 0;
21349        let r = unsafe { _ktest_mask32_u8(a, b, &mut and_not) };
21350        assert_eq!(r, 1);
21351        assert_eq!(and_not, 0);
21352    }
21353
21354    #[simd_test(enable = "avx512bw")]
21355    const fn test_ktestc_mask32_u8() {
21356        let a: __mmask32 = 0b0110100100111100_0110100100111100;
21357        let b: __mmask32 = 0b1001011011000011_1001011011000011;
21358        let r = _ktestc_mask32_u8(a, b);
21359        assert_eq!(r, 0);
21360    }
21361
21362    #[simd_test(enable = "avx512bw")]
21363    const fn test_ktestz_mask32_u8() {
21364        let a: __mmask32 = 0b0110100100111100_0110100100111100;
21365        let b: __mmask32 = 0b1001011011000011_1001011011000011;
21366        let r = _ktestz_mask32_u8(a, b);
21367        assert_eq!(r, 1);
21368    }
21369
21370    #[simd_test(enable = "avx512bw")]
21371    const fn test_ktest_mask64_u8() {
21372        let a: __mmask64 = 0b0110100100111100_0110100100111100;
21373        let b: __mmask64 = 0b1001011011000011_1001011011000011;
21374        let mut and_not: u8 = 0;
21375        let r = unsafe { _ktest_mask64_u8(a, b, &mut and_not) };
21376        assert_eq!(r, 1);
21377        assert_eq!(and_not, 0);
21378    }
21379
21380    #[simd_test(enable = "avx512bw")]
21381    const fn test_ktestc_mask64_u8() {
21382        let a: __mmask64 = 0b0110100100111100_0110100100111100;
21383        let b: __mmask64 = 0b1001011011000011_1001011011000011;
21384        let r = _ktestc_mask64_u8(a, b);
21385        assert_eq!(r, 0);
21386    }
21387
21388    #[simd_test(enable = "avx512bw")]
21389    const fn test_ktestz_mask64_u8() {
21390        let a: __mmask64 = 0b0110100100111100_0110100100111100;
21391        let b: __mmask64 = 0b1001011011000011_1001011011000011;
21392        let r = _ktestz_mask64_u8(a, b);
21393        assert_eq!(r, 1);
21394    }
21395
21396    #[simd_test(enable = "avx512bw")]
21397    const fn test_mm512_kunpackw() {
21398        let a: u32 = 0x00110011;
21399        let b: u32 = 0x00001011;
21400        let r = _mm512_kunpackw(a, b);
21401        let e: u32 = 0x00111011;
21402        assert_eq!(r, e);
21403    }
21404
21405    #[simd_test(enable = "avx512bw")]
21406    const fn test_mm512_kunpackd() {
21407        let a: u64 = 0x11001100_00110011;
21408        let b: u64 = 0x00101110_00001011;
21409        let r = _mm512_kunpackd(a, b);
21410        let e: u64 = 0x00110011_00001011;
21411        assert_eq!(r, e);
21412    }
21413
21414    #[simd_test(enable = "avx512bw")]
21415    const fn test_mm512_cvtepi16_epi8() {
21416        let a = _mm512_set1_epi16(2);
21417        let r = _mm512_cvtepi16_epi8(a);
21418        let e = _mm256_set1_epi8(2);
21419        assert_eq_m256i(r, e);
21420    }
21421
21422    #[simd_test(enable = "avx512bw")]
21423    const fn test_mm512_mask_cvtepi16_epi8() {
21424        let src = _mm256_set1_epi8(1);
21425        let a = _mm512_set1_epi16(2);
21426        let r = _mm512_mask_cvtepi16_epi8(src, 0, a);
21427        assert_eq_m256i(r, src);
21428        let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
21429        let e = _mm256_set1_epi8(2);
21430        assert_eq_m256i(r, e);
21431    }
21432
21433    #[simd_test(enable = "avx512bw")]
21434    const fn test_mm512_maskz_cvtepi16_epi8() {
21435        let a = _mm512_set1_epi16(2);
21436        let r = _mm512_maskz_cvtepi16_epi8(0, a);
21437        assert_eq_m256i(r, _mm256_setzero_si256());
21438        let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a);
21439        let e = _mm256_set1_epi8(2);
21440        assert_eq_m256i(r, e);
21441    }
21442
21443    #[simd_test(enable = "avx512bw,avx512vl")]
21444    const fn test_mm256_cvtepi16_epi8() {
21445        let a = _mm256_set1_epi16(2);
21446        let r = _mm256_cvtepi16_epi8(a);
21447        let e = _mm_set1_epi8(2);
21448        assert_eq_m128i(r, e);
21449    }
21450
21451    #[simd_test(enable = "avx512bw,avx512vl")]
21452    const fn test_mm256_mask_cvtepi16_epi8() {
21453        let src = _mm_set1_epi8(1);
21454        let a = _mm256_set1_epi16(2);
21455        let r = _mm256_mask_cvtepi16_epi8(src, 0, a);
21456        assert_eq_m128i(r, src);
21457        let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a);
21458        let e = _mm_set1_epi8(2);
21459        assert_eq_m128i(r, e);
21460    }
21461
21462    #[simd_test(enable = "avx512bw,avx512vl")]
21463    const fn test_mm256_maskz_cvtepi16_epi8() {
21464        let a = _mm256_set1_epi16(2);
21465        let r = _mm256_maskz_cvtepi16_epi8(0, a);
21466        assert_eq_m128i(r, _mm_setzero_si128());
21467        let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a);
21468        let e = _mm_set1_epi8(2);
21469        assert_eq_m128i(r, e);
21470    }
21471
21472    #[simd_test(enable = "avx512bw,avx512vl")]
21473    const fn test_mm_cvtepi16_epi8() {
21474        let a = _mm_set1_epi16(2);
21475        let r = _mm_cvtepi16_epi8(a);
21476        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
21477        assert_eq_m128i(r, e);
21478    }
21479
21480    #[simd_test(enable = "avx512bw,avx512vl")]
21481    const fn test_mm_mask_cvtepi16_epi8() {
21482        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
21483        let a = _mm_set1_epi16(2);
21484        let r = _mm_mask_cvtepi16_epi8(src, 0, a);
21485        assert_eq_m128i(r, src);
21486        let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a);
21487        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
21488        assert_eq_m128i(r, e);
21489    }
21490
21491    #[simd_test(enable = "avx512bw,avx512vl")]
21492    const fn test_mm_maskz_cvtepi16_epi8() {
21493        let a = _mm_set1_epi16(2);
21494        let r = _mm_maskz_cvtepi16_epi8(0, a);
21495        assert_eq_m128i(r, _mm_setzero_si128());
21496        let r = _mm_maskz_cvtepi16_epi8(0b11111111, a);
21497        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
21498        assert_eq_m128i(r, e);
21499    }
21500
21501    #[simd_test(enable = "avx512bw")]
21502    fn test_mm512_cvtsepi16_epi8() {
21503        let a = _mm512_set1_epi16(i16::MAX);
21504        let r = _mm512_cvtsepi16_epi8(a);
21505        let e = _mm256_set1_epi8(i8::MAX);
21506        assert_eq_m256i(r, e);
21507    }
21508
21509    #[simd_test(enable = "avx512bw")]
21510    fn test_mm512_mask_cvtsepi16_epi8() {
21511        let src = _mm256_set1_epi8(1);
21512        let a = _mm512_set1_epi16(i16::MAX);
21513        let r = _mm512_mask_cvtsepi16_epi8(src, 0, a);
21514        assert_eq_m256i(r, src);
21515        let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
21516        let e = _mm256_set1_epi8(i8::MAX);
21517        assert_eq_m256i(r, e);
21518    }
21519
21520    #[simd_test(enable = "avx512bw,avx512vl")]
21521    fn test_mm256_cvtsepi16_epi8() {
21522        let a = _mm256_set1_epi16(i16::MAX);
21523        let r = _mm256_cvtsepi16_epi8(a);
21524        let e = _mm_set1_epi8(i8::MAX);
21525        assert_eq_m128i(r, e);
21526    }
21527
21528    #[simd_test(enable = "avx512bw,avx512vl")]
21529    fn test_mm256_mask_cvtsepi16_epi8() {
21530        let src = _mm_set1_epi8(1);
21531        let a = _mm256_set1_epi16(i16::MAX);
21532        let r = _mm256_mask_cvtsepi16_epi8(src, 0, a);
21533        assert_eq_m128i(r, src);
21534        let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a);
21535        let e = _mm_set1_epi8(i8::MAX);
21536        assert_eq_m128i(r, e);
21537    }
21538
21539    #[simd_test(enable = "avx512bw,avx512vl")]
21540    fn test_mm256_maskz_cvtsepi16_epi8() {
21541        let a = _mm256_set1_epi16(i16::MAX);
21542        let r = _mm256_maskz_cvtsepi16_epi8(0, a);
21543        assert_eq_m128i(r, _mm_setzero_si128());
21544        let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a);
21545        let e = _mm_set1_epi8(i8::MAX);
21546        assert_eq_m128i(r, e);
21547    }
21548
21549    #[simd_test(enable = "avx512bw,avx512vl")]
21550    fn test_mm_cvtsepi16_epi8() {
21551        let a = _mm_set1_epi16(i16::MAX);
21552        let r = _mm_cvtsepi16_epi8(a);
21553        #[rustfmt::skip]
21554        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
21555        assert_eq_m128i(r, e);
21556    }
21557
21558    #[simd_test(enable = "avx512bw,avx512vl")]
21559    fn test_mm_mask_cvtsepi16_epi8() {
21560        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
21561        let a = _mm_set1_epi16(i16::MAX);
21562        let r = _mm_mask_cvtsepi16_epi8(src, 0, a);
21563        assert_eq_m128i(r, src);
21564        let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a);
21565        #[rustfmt::skip]
21566        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
21567        assert_eq_m128i(r, e);
21568    }
21569
21570    #[simd_test(enable = "avx512bw,avx512vl")]
21571    fn test_mm_maskz_cvtsepi16_epi8() {
21572        let a = _mm_set1_epi16(i16::MAX);
21573        let r = _mm_maskz_cvtsepi16_epi8(0, a);
21574        assert_eq_m128i(r, _mm_setzero_si128());
21575        let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a);
21576        #[rustfmt::skip]
21577        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
21578        assert_eq_m128i(r, e);
21579    }
21580
21581    #[simd_test(enable = "avx512bw")]
21582    fn test_mm512_maskz_cvtsepi16_epi8() {
21583        let a = _mm512_set1_epi16(i16::MAX);
21584        let r = _mm512_maskz_cvtsepi16_epi8(0, a);
21585        assert_eq_m256i(r, _mm256_setzero_si256());
21586        let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a);
21587        let e = _mm256_set1_epi8(i8::MAX);
21588        assert_eq_m256i(r, e);
21589    }
21590
21591    #[simd_test(enable = "avx512bw")]
21592    fn test_mm512_cvtusepi16_epi8() {
21593        let a = _mm512_set1_epi16(i16::MIN);
21594        let r = _mm512_cvtusepi16_epi8(a);
21595        let e = _mm256_set1_epi8(-1);
21596        assert_eq_m256i(r, e);
21597    }
21598
21599    #[simd_test(enable = "avx512bw")]
21600    fn test_mm512_mask_cvtusepi16_epi8() {
21601        let src = _mm256_set1_epi8(1);
21602        let a = _mm512_set1_epi16(i16::MIN);
21603        let r = _mm512_mask_cvtusepi16_epi8(src, 0, a);
21604        assert_eq_m256i(r, src);
21605        let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
21606        let e = _mm256_set1_epi8(-1);
21607        assert_eq_m256i(r, e);
21608    }
21609
21610    #[simd_test(enable = "avx512bw")]
21611    fn test_mm512_maskz_cvtusepi16_epi8() {
21612        let a = _mm512_set1_epi16(i16::MIN);
21613        let r = _mm512_maskz_cvtusepi16_epi8(0, a);
21614        assert_eq_m256i(r, _mm256_setzero_si256());
21615        let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a);
21616        let e = _mm256_set1_epi8(-1);
21617        assert_eq_m256i(r, e);
21618    }
21619
21620    #[simd_test(enable = "avx512bw,avx512vl")]
21621    fn test_mm256_cvtusepi16_epi8() {
21622        let a = _mm256_set1_epi16(i16::MIN);
21623        let r = _mm256_cvtusepi16_epi8(a);
21624        let e = _mm_set1_epi8(-1);
21625        assert_eq_m128i(r, e);
21626    }
21627
21628    #[simd_test(enable = "avx512bw,avx512vl")]
21629    fn test_mm256_mask_cvtusepi16_epi8() {
21630        let src = _mm_set1_epi8(1);
21631        let a = _mm256_set1_epi16(i16::MIN);
21632        let r = _mm256_mask_cvtusepi16_epi8(src, 0, a);
21633        assert_eq_m128i(r, src);
21634        let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a);
21635        let e = _mm_set1_epi8(-1);
21636        assert_eq_m128i(r, e);
21637    }
21638
21639    #[simd_test(enable = "avx512bw,avx512vl")]
21640    fn test_mm256_maskz_cvtusepi16_epi8() {
21641        let a = _mm256_set1_epi16(i16::MIN);
21642        let r = _mm256_maskz_cvtusepi16_epi8(0, a);
21643        assert_eq_m128i(r, _mm_setzero_si128());
21644        let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a);
21645        let e = _mm_set1_epi8(-1);
21646        assert_eq_m128i(r, e);
21647    }
21648
21649    #[simd_test(enable = "avx512bw,avx512vl")]
21650    fn test_mm_cvtusepi16_epi8() {
21651        let a = _mm_set1_epi16(i16::MIN);
21652        let r = _mm_cvtusepi16_epi8(a);
21653        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
21654        assert_eq_m128i(r, e);
21655    }
21656
21657    #[simd_test(enable = "avx512bw,avx512vl")]
21658    fn test_mm_mask_cvtusepi16_epi8() {
21659        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
21660        let a = _mm_set1_epi16(i16::MIN);
21661        let r = _mm_mask_cvtusepi16_epi8(src, 0, a);
21662        assert_eq_m128i(r, src);
21663        let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a);
21664        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
21665        assert_eq_m128i(r, e);
21666    }
21667
21668    #[simd_test(enable = "avx512bw,avx512vl")]
21669    fn test_mm_maskz_cvtusepi16_epi8() {
21670        let a = _mm_set1_epi16(i16::MIN);
21671        let r = _mm_maskz_cvtusepi16_epi8(0, a);
21672        assert_eq_m128i(r, _mm_setzero_si128());
21673        let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a);
21674        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
21675        assert_eq_m128i(r, e);
21676    }
21677
21678    #[simd_test(enable = "avx512bw")]
21679    const fn test_mm512_cvtepi8_epi16() {
21680        let a = _mm256_set1_epi8(2);
21681        let r = _mm512_cvtepi8_epi16(a);
21682        let e = _mm512_set1_epi16(2);
21683        assert_eq_m512i(r, e);
21684    }
21685
21686    #[simd_test(enable = "avx512bw")]
21687    const fn test_mm512_mask_cvtepi8_epi16() {
21688        let src = _mm512_set1_epi16(1);
21689        let a = _mm256_set1_epi8(2);
21690        let r = _mm512_mask_cvtepi8_epi16(src, 0, a);
21691        assert_eq_m512i(r, src);
21692        let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
21693        let e = _mm512_set1_epi16(2);
21694        assert_eq_m512i(r, e);
21695    }
21696
21697    #[simd_test(enable = "avx512bw")]
21698    const fn test_mm512_maskz_cvtepi8_epi16() {
21699        let a = _mm256_set1_epi8(2);
21700        let r = _mm512_maskz_cvtepi8_epi16(0, a);
21701        assert_eq_m512i(r, _mm512_setzero_si512());
21702        let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a);
21703        let e = _mm512_set1_epi16(2);
21704        assert_eq_m512i(r, e);
21705    }
21706
21707    #[simd_test(enable = "avx512bw,avx512vl")]
21708    const fn test_mm256_mask_cvtepi8_epi16() {
21709        let src = _mm256_set1_epi16(1);
21710        let a = _mm_set1_epi8(2);
21711        let r = _mm256_mask_cvtepi8_epi16(src, 0, a);
21712        assert_eq_m256i(r, src);
21713        let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a);
21714        let e = _mm256_set1_epi16(2);
21715        assert_eq_m256i(r, e);
21716    }
21717
21718    #[simd_test(enable = "avx512bw,avx512vl")]
21719    const fn test_mm256_maskz_cvtepi8_epi16() {
21720        let a = _mm_set1_epi8(2);
21721        let r = _mm256_maskz_cvtepi8_epi16(0, a);
21722        assert_eq_m256i(r, _mm256_setzero_si256());
21723        let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a);
21724        let e = _mm256_set1_epi16(2);
21725        assert_eq_m256i(r, e);
21726    }
21727
21728    #[simd_test(enable = "avx512bw,avx512vl")]
21729    const fn test_mm_mask_cvtepi8_epi16() {
21730        let src = _mm_set1_epi16(1);
21731        let a = _mm_set1_epi8(2);
21732        let r = _mm_mask_cvtepi8_epi16(src, 0, a);
21733        assert_eq_m128i(r, src);
21734        let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a);
21735        let e = _mm_set1_epi16(2);
21736        assert_eq_m128i(r, e);
21737    }
21738
21739    #[simd_test(enable = "avx512bw,avx512vl")]
21740    const fn test_mm_maskz_cvtepi8_epi16() {
21741        let a = _mm_set1_epi8(2);
21742        let r = _mm_maskz_cvtepi8_epi16(0, a);
21743        assert_eq_m128i(r, _mm_setzero_si128());
21744        let r = _mm_maskz_cvtepi8_epi16(0b11111111, a);
21745        let e = _mm_set1_epi16(2);
21746        assert_eq_m128i(r, e);
21747    }
21748
21749    #[simd_test(enable = "avx512bw")]
21750    const fn test_mm512_cvtepu8_epi16() {
21751        let a = _mm256_set1_epi8(2);
21752        let r = _mm512_cvtepu8_epi16(a);
21753        let e = _mm512_set1_epi16(2);
21754        assert_eq_m512i(r, e);
21755    }
21756
21757    #[simd_test(enable = "avx512bw")]
21758    const fn test_mm512_mask_cvtepu8_epi16() {
21759        let src = _mm512_set1_epi16(1);
21760        let a = _mm256_set1_epi8(2);
21761        let r = _mm512_mask_cvtepu8_epi16(src, 0, a);
21762        assert_eq_m512i(r, src);
21763        let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
21764        let e = _mm512_set1_epi16(2);
21765        assert_eq_m512i(r, e);
21766    }
21767
21768    #[simd_test(enable = "avx512bw")]
21769    const fn test_mm512_maskz_cvtepu8_epi16() {
21770        let a = _mm256_set1_epi8(2);
21771        let r = _mm512_maskz_cvtepu8_epi16(0, a);
21772        assert_eq_m512i(r, _mm512_setzero_si512());
21773        let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a);
21774        let e = _mm512_set1_epi16(2);
21775        assert_eq_m512i(r, e);
21776    }
21777
21778    #[simd_test(enable = "avx512bw,avx512vl")]
21779    const fn test_mm256_mask_cvtepu8_epi16() {
21780        let src = _mm256_set1_epi16(1);
21781        let a = _mm_set1_epi8(2);
21782        let r = _mm256_mask_cvtepu8_epi16(src, 0, a);
21783        assert_eq_m256i(r, src);
21784        let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a);
21785        let e = _mm256_set1_epi16(2);
21786        assert_eq_m256i(r, e);
21787    }
21788
21789    #[simd_test(enable = "avx512bw,avx512vl")]
21790    const fn test_mm256_maskz_cvtepu8_epi16() {
21791        let a = _mm_set1_epi8(2);
21792        let r = _mm256_maskz_cvtepu8_epi16(0, a);
21793        assert_eq_m256i(r, _mm256_setzero_si256());
21794        let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a);
21795        let e = _mm256_set1_epi16(2);
21796        assert_eq_m256i(r, e);
21797    }
21798
21799    #[simd_test(enable = "avx512bw,avx512vl")]
21800    const fn test_mm_mask_cvtepu8_epi16() {
21801        let src = _mm_set1_epi16(1);
21802        let a = _mm_set1_epi8(2);
21803        let r = _mm_mask_cvtepu8_epi16(src, 0, a);
21804        assert_eq_m128i(r, src);
21805        let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a);
21806        let e = _mm_set1_epi16(2);
21807        assert_eq_m128i(r, e);
21808    }
21809
21810    #[simd_test(enable = "avx512bw,avx512vl")]
21811    const fn test_mm_maskz_cvtepu8_epi16() {
21812        let a = _mm_set1_epi8(2);
21813        let r = _mm_maskz_cvtepu8_epi16(0, a);
21814        assert_eq_m128i(r, _mm_setzero_si128());
21815        let r = _mm_maskz_cvtepu8_epi16(0b11111111, a);
21816        let e = _mm_set1_epi16(2);
21817        assert_eq_m128i(r, e);
21818    }
21819
21820    #[simd_test(enable = "avx512bw")]
21821    const fn test_mm512_bslli_epi128() {
21822        #[rustfmt::skip]
21823        let a = _mm512_set_epi8(
21824            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21825            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21826            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21827            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21828        );
21829        let r = _mm512_bslli_epi128::<9>(a);
21830        #[rustfmt::skip]
21831        let e = _mm512_set_epi8(
21832            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21833            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21834            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21835            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21836        );
21837        assert_eq_m512i(r, e);
21838    }
21839
21840    #[simd_test(enable = "avx512bw")]
21841    const fn test_mm512_bsrli_epi128() {
21842        #[rustfmt::skip]
21843        let a = _mm512_set_epi8(
21844            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
21845            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
21846            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
21847            49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
21848        );
21849        let r = _mm512_bsrli_epi128::<3>(a);
21850        #[rustfmt::skip]
21851        let e = _mm512_set_epi8(
21852            0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
21853            0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
21854            0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
21855            0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
21856        );
21857        assert_eq_m512i(r, e);
21858    }
21859
21860    #[simd_test(enable = "avx512bw")]
21861    const fn test_mm512_alignr_epi8() {
21862        #[rustfmt::skip]
21863        let a = _mm512_set_epi8(
21864            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21865            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21866            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21867            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21868        );
21869        let b = _mm512_set1_epi8(1);
21870        let r = _mm512_alignr_epi8::<14>(a, b);
21871        #[rustfmt::skip]
21872        let e = _mm512_set_epi8(
21873            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21874            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21875            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21876            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21877        );
21878        assert_eq_m512i(r, e);
21879    }
21880
21881    #[simd_test(enable = "avx512bw")]
21882    const fn test_mm512_mask_alignr_epi8() {
21883        #[rustfmt::skip]
21884        let a = _mm512_set_epi8(
21885            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21886            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21887            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21888            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21889        );
21890        let b = _mm512_set1_epi8(1);
21891        let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b);
21892        assert_eq_m512i(r, a);
21893        let r = _mm512_mask_alignr_epi8::<14>(
21894            a,
21895            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
21896            a,
21897            b,
21898        );
21899        #[rustfmt::skip]
21900        let e = _mm512_set_epi8(
21901            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21902            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21903            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21904            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21905        );
21906        assert_eq_m512i(r, e);
21907    }
21908
21909    #[simd_test(enable = "avx512bw")]
21910    const fn test_mm512_maskz_alignr_epi8() {
21911        #[rustfmt::skip]
21912        let a = _mm512_set_epi8(
21913            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21914            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21915            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21916            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21917        );
21918        let b = _mm512_set1_epi8(1);
21919        let r = _mm512_maskz_alignr_epi8::<14>(0, a, b);
21920        assert_eq_m512i(r, _mm512_setzero_si512());
21921        let r = _mm512_maskz_alignr_epi8::<14>(
21922            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
21923            a,
21924            b,
21925        );
21926        #[rustfmt::skip]
21927        let e = _mm512_set_epi8(
21928            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21929            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21930            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21931            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21932        );
21933        assert_eq_m512i(r, e);
21934    }
21935
21936    #[simd_test(enable = "avx512bw,avx512vl")]
21937    const fn test_mm256_mask_alignr_epi8() {
21938        #[rustfmt::skip]
21939        let a = _mm256_set_epi8(
21940            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21941            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21942        );
21943        let b = _mm256_set1_epi8(1);
21944        let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b);
21945        assert_eq_m256i(r, a);
21946        let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b);
21947        #[rustfmt::skip]
21948        let e = _mm256_set_epi8(
21949            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21950            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21951        );
21952        assert_eq_m256i(r, e);
21953    }
21954
21955    #[simd_test(enable = "avx512bw,avx512vl")]
21956    const fn test_mm256_maskz_alignr_epi8() {
21957        #[rustfmt::skip]
21958        let a = _mm256_set_epi8(
21959            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21960            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21961        );
21962        let b = _mm256_set1_epi8(1);
21963        let r = _mm256_maskz_alignr_epi8::<14>(0, a, b);
21964        assert_eq_m256i(r, _mm256_setzero_si256());
21965        let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b);
21966        #[rustfmt::skip]
21967        let e = _mm256_set_epi8(
21968            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21969            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21970        );
21971        assert_eq_m256i(r, e);
21972    }
21973
21974    #[simd_test(enable = "avx512bw,avx512vl")]
21975    const fn test_mm_mask_alignr_epi8() {
21976        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21977        let b = _mm_set1_epi8(1);
21978        let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b);
21979        assert_eq_m128i(r, a);
21980        let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b);
21981        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21982        assert_eq_m128i(r, e);
21983    }
21984
21985    #[simd_test(enable = "avx512bw,avx512vl")]
21986    const fn test_mm_maskz_alignr_epi8() {
21987        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21988        let b = _mm_set1_epi8(1);
21989        let r = _mm_maskz_alignr_epi8::<14>(0, a, b);
21990        assert_eq_m128i(r, _mm_setzero_si128());
21991        let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b);
21992        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21993        assert_eq_m128i(r, e);
21994    }
21995
21996    #[simd_test(enable = "avx512bw")]
21997    fn test_mm512_mask_cvtsepi16_storeu_epi8() {
21998        let a = _mm512_set1_epi16(i16::MAX);
21999        let mut r = _mm256_undefined_si256();
22000        unsafe {
22001            _mm512_mask_cvtsepi16_storeu_epi8(
22002                &mut r as *mut _ as *mut i8,
22003                0b11111111_11111111_11111111_11111111,
22004                a,
22005            );
22006        }
22007        let e = _mm256_set1_epi8(i8::MAX);
22008        assert_eq_m256i(r, e);
22009    }
22010
22011    #[simd_test(enable = "avx512bw,avx512vl")]
22012    fn test_mm256_mask_cvtsepi16_storeu_epi8() {
22013        let a = _mm256_set1_epi16(i16::MAX);
22014        let mut r = _mm_undefined_si128();
22015        unsafe {
22016            _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
22017        }
22018        let e = _mm_set1_epi8(i8::MAX);
22019        assert_eq_m128i(r, e);
22020    }
22021
22022    #[simd_test(enable = "avx512bw,avx512vl")]
22023    fn test_mm_mask_cvtsepi16_storeu_epi8() {
22024        let a = _mm_set1_epi16(i16::MAX);
22025        let mut r = _mm_set1_epi8(0);
22026        unsafe {
22027            _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
22028        }
22029        #[rustfmt::skip]
22030        let e = _mm_set_epi8(
22031            0, 0, 0, 0, 0, 0, 0, 0,
22032            i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
22033        );
22034        assert_eq_m128i(r, e);
22035    }
22036
22037    #[simd_test(enable = "avx512bw")]
22038    fn test_mm512_mask_cvtepi16_storeu_epi8() {
22039        let a = _mm512_set1_epi16(8);
22040        let mut r = _mm256_undefined_si256();
22041        unsafe {
22042            _mm512_mask_cvtepi16_storeu_epi8(
22043                &mut r as *mut _ as *mut i8,
22044                0b11111111_11111111_11111111_11111111,
22045                a,
22046            );
22047        }
22048        let e = _mm256_set1_epi8(8);
22049        assert_eq_m256i(r, e);
22050    }
22051
22052    #[simd_test(enable = "avx512bw,avx512vl")]
22053    fn test_mm256_mask_cvtepi16_storeu_epi8() {
22054        let a = _mm256_set1_epi16(8);
22055        let mut r = _mm_undefined_si128();
22056        unsafe {
22057            _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
22058        }
22059        let e = _mm_set1_epi8(8);
22060        assert_eq_m128i(r, e);
22061    }
22062
22063    #[simd_test(enable = "avx512bw,avx512vl")]
22064    fn test_mm_mask_cvtepi16_storeu_epi8() {
22065        let a = _mm_set1_epi16(8);
22066        let mut r = _mm_set1_epi8(0);
22067        unsafe {
22068            _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
22069        }
22070        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
22071        assert_eq_m128i(r, e);
22072    }
22073
22074    #[simd_test(enable = "avx512bw")]
22075    fn test_mm512_mask_cvtusepi16_storeu_epi8() {
22076        let a = _mm512_set1_epi16(i16::MAX);
22077        let mut r = _mm256_undefined_si256();
22078        unsafe {
22079            _mm512_mask_cvtusepi16_storeu_epi8(
22080                &mut r as *mut _ as *mut i8,
22081                0b11111111_11111111_11111111_11111111,
22082                a,
22083            );
22084        }
22085        let e = _mm256_set1_epi8(u8::MAX as i8);
22086        assert_eq_m256i(r, e);
22087    }
22088
22089    #[simd_test(enable = "avx512bw,avx512vl")]
22090    fn test_mm256_mask_cvtusepi16_storeu_epi8() {
22091        let a = _mm256_set1_epi16(i16::MAX);
22092        let mut r = _mm_undefined_si128();
22093        unsafe {
22094            _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
22095        }
22096        let e = _mm_set1_epi8(u8::MAX as i8);
22097        assert_eq_m128i(r, e);
22098    }
22099
22100    #[simd_test(enable = "avx512bw,avx512vl")]
22101    fn test_mm_mask_cvtusepi16_storeu_epi8() {
22102        let a = _mm_set1_epi16(i16::MAX);
22103        let mut r = _mm_set1_epi8(0);
22104        unsafe {
22105            _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
22106        }
22107        #[rustfmt::skip]
22108        let e = _mm_set_epi8(
22109            0, 0, 0, 0,
22110            0, 0, 0, 0,
22111            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
22112            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
22113        );
22114        assert_eq_m128i(r, e);
22115    }
22116}