core/stdarch/crates/core_arch/src/x86/
avx512bw.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4    ptr,
5};
6
7#[cfg(test)]
8use stdarch_test::assert_instr;
9
10/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
11///
12/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi16&expand=30)
13#[inline]
14#[target_feature(enable = "avx512bw")]
15#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16#[cfg_attr(test, assert_instr(vpabsw))]
17#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
18pub const fn _mm512_abs_epi16(a: __m512i) -> __m512i {
19    unsafe {
20        let a = a.as_i16x32();
21        let cmp: i16x32 = simd_gt(a, i16x32::ZERO);
22        transmute(simd_select(cmp, a, simd_neg(a)))
23    }
24}
25
26/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27///
28/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi16&expand=31)
29#[inline]
30#[target_feature(enable = "avx512bw")]
31#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32#[cfg_attr(test, assert_instr(vpabsw))]
33#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34pub const fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
35    unsafe {
36        let abs = _mm512_abs_epi16(a).as_i16x32();
37        transmute(simd_select_bitmask(k, abs, src.as_i16x32()))
38    }
39}
40
41/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
42///
43/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi16&expand=32)
44#[inline]
45#[target_feature(enable = "avx512bw")]
46#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
47#[cfg_attr(test, assert_instr(vpabsw))]
48#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
49pub const fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
50    unsafe {
51        let abs = _mm512_abs_epi16(a).as_i16x32();
52        transmute(simd_select_bitmask(k, abs, i16x32::ZERO))
53    }
54}
55
56/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
57///
58/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi16&expand=28)
59#[inline]
60#[target_feature(enable = "avx512bw,avx512vl")]
61#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
62#[cfg_attr(test, assert_instr(vpabsw))]
63#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
64pub const fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
65    unsafe {
66        let abs = _mm256_abs_epi16(a).as_i16x16();
67        transmute(simd_select_bitmask(k, abs, src.as_i16x16()))
68    }
69}
70
71/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
72///
73/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi16&expand=29)
74#[inline]
75#[target_feature(enable = "avx512bw,avx512vl")]
76#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
77#[cfg_attr(test, assert_instr(vpabsw))]
78#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
79pub const fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
80    unsafe {
81        let abs = _mm256_abs_epi16(a).as_i16x16();
82        transmute(simd_select_bitmask(k, abs, i16x16::ZERO))
83    }
84}
85
86/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
87///
88/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi16&expand=25)
89#[inline]
90#[target_feature(enable = "avx512bw,avx512vl")]
91#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
92#[cfg_attr(test, assert_instr(vpabsw))]
93#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
94pub const fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
95    unsafe {
96        let abs = _mm_abs_epi16(a).as_i16x8();
97        transmute(simd_select_bitmask(k, abs, src.as_i16x8()))
98    }
99}
100
101/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
102///
103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi16&expand=26)
104#[inline]
105#[target_feature(enable = "avx512bw,avx512vl")]
106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
107#[cfg_attr(test, assert_instr(vpabsw))]
108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
109pub const fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
110    unsafe {
111        let abs = _mm_abs_epi16(a).as_i16x8();
112        transmute(simd_select_bitmask(k, abs, i16x8::ZERO))
113    }
114}
115
116/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi8&expand=57)
119#[inline]
120#[target_feature(enable = "avx512bw")]
121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
122#[cfg_attr(test, assert_instr(vpabsb))]
123#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
124pub const fn _mm512_abs_epi8(a: __m512i) -> __m512i {
125    unsafe {
126        let a = a.as_i8x64();
127        let cmp: i8x64 = simd_gt(a, i8x64::ZERO);
128        transmute(simd_select(cmp, a, simd_neg(a)))
129    }
130}
131
132/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
133///
134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi8&expand=58)
135#[inline]
136#[target_feature(enable = "avx512bw")]
137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
138#[cfg_attr(test, assert_instr(vpabsb))]
139#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
140pub const fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
141    unsafe {
142        let abs = _mm512_abs_epi8(a).as_i8x64();
143        transmute(simd_select_bitmask(k, abs, src.as_i8x64()))
144    }
145}
146
147/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
148///
149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi8&expand=59)
150#[inline]
151#[target_feature(enable = "avx512bw")]
152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
153#[cfg_attr(test, assert_instr(vpabsb))]
154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
155pub const fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
156    unsafe {
157        let abs = _mm512_abs_epi8(a).as_i8x64();
158        transmute(simd_select_bitmask(k, abs, i8x64::ZERO))
159    }
160}
161
162/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
163///
164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi8&expand=55)
165#[inline]
166#[target_feature(enable = "avx512bw,avx512vl")]
167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
168#[cfg_attr(test, assert_instr(vpabsb))]
169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
170pub const fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
171    unsafe {
172        let abs = _mm256_abs_epi8(a).as_i8x32();
173        transmute(simd_select_bitmask(k, abs, src.as_i8x32()))
174    }
175}
176
177/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
178///
179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi8&expand=56)
180#[inline]
181#[target_feature(enable = "avx512bw,avx512vl")]
182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
183#[cfg_attr(test, assert_instr(vpabsb))]
184#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
185pub const fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
186    unsafe {
187        let abs = _mm256_abs_epi8(a).as_i8x32();
188        transmute(simd_select_bitmask(k, abs, i8x32::ZERO))
189    }
190}
191
192/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set)
193///
194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi8&expand=52)
195#[inline]
196#[target_feature(enable = "avx512bw,avx512vl")]
197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
198#[cfg_attr(test, assert_instr(vpabsb))]
199#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
200pub const fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
201    unsafe {
202        let abs = _mm_abs_epi8(a).as_i8x16();
203        transmute(simd_select_bitmask(k, abs, src.as_i8x16()))
204    }
205}
206
207/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
208///
209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi8&expand=53)
210#[inline]
211#[target_feature(enable = "avx512bw,avx512vl")]
212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
213#[cfg_attr(test, assert_instr(vpabsb))]
214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
215pub const fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
216    unsafe {
217        let abs = _mm_abs_epi8(a).as_i8x16();
218        transmute(simd_select_bitmask(k, abs, i8x16::ZERO))
219    }
220}
221
222/// Add packed 16-bit integers in a and b, and store the results in dst.
223///
224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi16&expand=91)
225#[inline]
226#[target_feature(enable = "avx512bw")]
227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
228#[cfg_attr(test, assert_instr(vpaddw))]
229#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
230pub const fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
231    unsafe { transmute(simd_add(a.as_i16x32(), b.as_i16x32())) }
232}
233
234/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
235///
236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi16&expand=92)
237#[inline]
238#[target_feature(enable = "avx512bw")]
239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
240#[cfg_attr(test, assert_instr(vpaddw))]
241#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
242pub const fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
243    unsafe {
244        let add = _mm512_add_epi16(a, b).as_i16x32();
245        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
246    }
247}
248
249/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
250///
251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi16&expand=93)
252#[inline]
253#[target_feature(enable = "avx512bw")]
254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
255#[cfg_attr(test, assert_instr(vpaddw))]
256#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
257pub const fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
258    unsafe {
259        let add = _mm512_add_epi16(a, b).as_i16x32();
260        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
261    }
262}
263
264/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
265///
266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi16&expand=89)
267#[inline]
268#[target_feature(enable = "avx512bw,avx512vl")]
269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
270#[cfg_attr(test, assert_instr(vpaddw))]
271#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
272pub const fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
273    unsafe {
274        let add = _mm256_add_epi16(a, b).as_i16x16();
275        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
276    }
277}
278
279/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
280///
281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi16&expand=90)
282#[inline]
283#[target_feature(enable = "avx512bw,avx512vl")]
284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
285#[cfg_attr(test, assert_instr(vpaddw))]
286#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
287pub const fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
288    unsafe {
289        let add = _mm256_add_epi16(a, b).as_i16x16();
290        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
291    }
292}
293
294/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
295///
296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi16&expand=86)
297#[inline]
298#[target_feature(enable = "avx512bw,avx512vl")]
299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
300#[cfg_attr(test, assert_instr(vpaddw))]
301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
302pub const fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
303    unsafe {
304        let add = _mm_add_epi16(a, b).as_i16x8();
305        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
306    }
307}
308
309/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
310///
311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi16&expand=87)
312#[inline]
313#[target_feature(enable = "avx512bw,avx512vl")]
314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
315#[cfg_attr(test, assert_instr(vpaddw))]
316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317pub const fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
318    unsafe {
319        let add = _mm_add_epi16(a, b).as_i16x8();
320        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
321    }
322}
323
324/// Add packed 8-bit integers in a and b, and store the results in dst.
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi8&expand=118)
327#[inline]
328#[target_feature(enable = "avx512bw")]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[cfg_attr(test, assert_instr(vpaddb))]
331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332pub const fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
333    unsafe { transmute(simd_add(a.as_i8x64(), b.as_i8x64())) }
334}
335
336/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
337///
338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi8&expand=119)
339#[inline]
340#[target_feature(enable = "avx512bw")]
341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
342#[cfg_attr(test, assert_instr(vpaddb))]
343#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
344pub const fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
345    unsafe {
346        let add = _mm512_add_epi8(a, b).as_i8x64();
347        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
348    }
349}
350
351/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
352///
353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi8&expand=120)
354#[inline]
355#[target_feature(enable = "avx512bw")]
356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
357#[cfg_attr(test, assert_instr(vpaddb))]
358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
359pub const fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
360    unsafe {
361        let add = _mm512_add_epi8(a, b).as_i8x64();
362        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
363    }
364}
365
366/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
367///
368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi8&expand=116)
369#[inline]
370#[target_feature(enable = "avx512bw,avx512vl")]
371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
372#[cfg_attr(test, assert_instr(vpaddb))]
373#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
374pub const fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
375    unsafe {
376        let add = _mm256_add_epi8(a, b).as_i8x32();
377        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
378    }
379}
380
381/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
382///
383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi8&expand=117)
384#[inline]
385#[target_feature(enable = "avx512bw,avx512vl")]
386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
387#[cfg_attr(test, assert_instr(vpaddb))]
388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
389pub const fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
390    unsafe {
391        let add = _mm256_add_epi8(a, b).as_i8x32();
392        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
393    }
394}
395
396/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
397///
398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi8&expand=113)
399#[inline]
400#[target_feature(enable = "avx512bw,avx512vl")]
401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
402#[cfg_attr(test, assert_instr(vpaddb))]
403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
404pub const fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
405    unsafe {
406        let add = _mm_add_epi8(a, b).as_i8x16();
407        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
408    }
409}
410
411/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
412///
413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi8&expand=114)
414#[inline]
415#[target_feature(enable = "avx512bw,avx512vl")]
416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
417#[cfg_attr(test, assert_instr(vpaddb))]
418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
419pub const fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
420    unsafe {
421        let add = _mm_add_epi8(a, b).as_i8x16();
422        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
423    }
424}
425
426/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst.
427///
428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu16&expand=197)
429#[inline]
430#[target_feature(enable = "avx512bw")]
431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
432#[cfg_attr(test, assert_instr(vpaddusw))]
433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
434pub const fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
435    unsafe { transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32())) }
436}
437
438/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
439///
440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu16&expand=198)
441#[inline]
442#[target_feature(enable = "avx512bw")]
443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
444#[cfg_attr(test, assert_instr(vpaddusw))]
445#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
446pub const fn _mm512_mask_adds_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
447    unsafe {
448        let add = _mm512_adds_epu16(a, b).as_u16x32();
449        transmute(simd_select_bitmask(k, add, src.as_u16x32()))
450    }
451}
452
453/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
454///
455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu16&expand=199)
456#[inline]
457#[target_feature(enable = "avx512bw")]
458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
459#[cfg_attr(test, assert_instr(vpaddusw))]
460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
461pub const fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
462    unsafe {
463        let add = _mm512_adds_epu16(a, b).as_u16x32();
464        transmute(simd_select_bitmask(k, add, u16x32::ZERO))
465    }
466}
467
468/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
469///
470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu16&expand=195)
471#[inline]
472#[target_feature(enable = "avx512bw,avx512vl")]
473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
474#[cfg_attr(test, assert_instr(vpaddusw))]
475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
476pub const fn _mm256_mask_adds_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
477    unsafe {
478        let add = _mm256_adds_epu16(a, b).as_u16x16();
479        transmute(simd_select_bitmask(k, add, src.as_u16x16()))
480    }
481}
482
483/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
484///
485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu16&expand=196)
486#[inline]
487#[target_feature(enable = "avx512bw,avx512vl")]
488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
489#[cfg_attr(test, assert_instr(vpaddusw))]
490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
491pub const fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
492    unsafe {
493        let add = _mm256_adds_epu16(a, b).as_u16x16();
494        transmute(simd_select_bitmask(k, add, u16x16::ZERO))
495    }
496}
497
498/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
499///
500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu16&expand=192)
501#[inline]
502#[target_feature(enable = "avx512bw,avx512vl")]
503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
504#[cfg_attr(test, assert_instr(vpaddusw))]
505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
506pub const fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
507    unsafe {
508        let add = _mm_adds_epu16(a, b).as_u16x8();
509        transmute(simd_select_bitmask(k, add, src.as_u16x8()))
510    }
511}
512
513/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu16&expand=193)
516#[inline]
517#[target_feature(enable = "avx512bw,avx512vl")]
518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
519#[cfg_attr(test, assert_instr(vpaddusw))]
520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
521pub const fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
522    unsafe {
523        let add = _mm_adds_epu16(a, b).as_u16x8();
524        transmute(simd_select_bitmask(k, add, u16x8::ZERO))
525    }
526}
527
528/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst.
529///
530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu8&expand=206)
531#[inline]
532#[target_feature(enable = "avx512bw")]
533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
534#[cfg_attr(test, assert_instr(vpaddusb))]
535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
536pub const fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
537    unsafe { transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64())) }
538}
539
540/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
541///
542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu8&expand=207)
543#[inline]
544#[target_feature(enable = "avx512bw")]
545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
546#[cfg_attr(test, assert_instr(vpaddusb))]
547#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
548pub const fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
549    unsafe {
550        let add = _mm512_adds_epu8(a, b).as_u8x64();
551        transmute(simd_select_bitmask(k, add, src.as_u8x64()))
552    }
553}
554
555/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
556///
557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu8&expand=208)
558#[inline]
559#[target_feature(enable = "avx512bw")]
560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
561#[cfg_attr(test, assert_instr(vpaddusb))]
562#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
563pub const fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
564    unsafe {
565        let add = _mm512_adds_epu8(a, b).as_u8x64();
566        transmute(simd_select_bitmask(k, add, u8x64::ZERO))
567    }
568}
569
570/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
571///
572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu8&expand=204)
573#[inline]
574#[target_feature(enable = "avx512bw,avx512vl")]
575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
576#[cfg_attr(test, assert_instr(vpaddusb))]
577#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
578pub const fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
579    unsafe {
580        let add = _mm256_adds_epu8(a, b).as_u8x32();
581        transmute(simd_select_bitmask(k, add, src.as_u8x32()))
582    }
583}
584
585/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
586///
587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu8&expand=205)
588#[inline]
589#[target_feature(enable = "avx512bw,avx512vl")]
590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
591#[cfg_attr(test, assert_instr(vpaddusb))]
592#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
593pub const fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
594    unsafe {
595        let add = _mm256_adds_epu8(a, b).as_u8x32();
596        transmute(simd_select_bitmask(k, add, u8x32::ZERO))
597    }
598}
599
600/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
601///
602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu8&expand=201)
603#[inline]
604#[target_feature(enable = "avx512bw,avx512vl")]
605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
606#[cfg_attr(test, assert_instr(vpaddusb))]
607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
608pub const fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
609    unsafe {
610        let add = _mm_adds_epu8(a, b).as_u8x16();
611        transmute(simd_select_bitmask(k, add, src.as_u8x16()))
612    }
613}
614
615/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
616///
617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu8&expand=202)
618#[inline]
619#[target_feature(enable = "avx512bw,avx512vl")]
620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
621#[cfg_attr(test, assert_instr(vpaddusb))]
622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
623pub const fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
624    unsafe {
625        let add = _mm_adds_epu8(a, b).as_u8x16();
626        transmute(simd_select_bitmask(k, add, u8x16::ZERO))
627    }
628}
629
630/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst.
631///
632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi16&expand=179)
633#[inline]
634#[target_feature(enable = "avx512bw")]
635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
636#[cfg_attr(test, assert_instr(vpaddsw))]
637#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
638pub const fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
639    unsafe { transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32())) }
640}
641
642/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
643///
644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi16&expand=180)
645#[inline]
646#[target_feature(enable = "avx512bw")]
647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
648#[cfg_attr(test, assert_instr(vpaddsw))]
649#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
650pub const fn _mm512_mask_adds_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
651    unsafe {
652        let add = _mm512_adds_epi16(a, b).as_i16x32();
653        transmute(simd_select_bitmask(k, add, src.as_i16x32()))
654    }
655}
656
657/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
658///
659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi16&expand=181)
660#[inline]
661#[target_feature(enable = "avx512bw")]
662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
663#[cfg_attr(test, assert_instr(vpaddsw))]
664#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
665pub const fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
666    unsafe {
667        let add = _mm512_adds_epi16(a, b).as_i16x32();
668        transmute(simd_select_bitmask(k, add, i16x32::ZERO))
669    }
670}
671
672/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
673///
674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi16&expand=177)
675#[inline]
676#[target_feature(enable = "avx512bw,avx512vl")]
677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
678#[cfg_attr(test, assert_instr(vpaddsw))]
679#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
680pub const fn _mm256_mask_adds_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
681    unsafe {
682        let add = _mm256_adds_epi16(a, b).as_i16x16();
683        transmute(simd_select_bitmask(k, add, src.as_i16x16()))
684    }
685}
686
687/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
688///
689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi16&expand=178)
690#[inline]
691#[target_feature(enable = "avx512bw,avx512vl")]
692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
693#[cfg_attr(test, assert_instr(vpaddsw))]
694#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
695pub const fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
696    unsafe {
697        let add = _mm256_adds_epi16(a, b).as_i16x16();
698        transmute(simd_select_bitmask(k, add, i16x16::ZERO))
699    }
700}
701
702/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
703///
704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi16&expand=174)
705#[inline]
706#[target_feature(enable = "avx512bw,avx512vl")]
707#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
708#[cfg_attr(test, assert_instr(vpaddsw))]
709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
710pub const fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
711    unsafe {
712        let add = _mm_adds_epi16(a, b).as_i16x8();
713        transmute(simd_select_bitmask(k, add, src.as_i16x8()))
714    }
715}
716
717/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
718///
719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi16&expand=175)
720#[inline]
721#[target_feature(enable = "avx512bw,avx512vl")]
722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
723#[cfg_attr(test, assert_instr(vpaddsw))]
724#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
725pub const fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
726    unsafe {
727        let add = _mm_adds_epi16(a, b).as_i16x8();
728        transmute(simd_select_bitmask(k, add, i16x8::ZERO))
729    }
730}
731
732/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst.
733///
734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi8&expand=188)
735#[inline]
736#[target_feature(enable = "avx512bw")]
737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
738#[cfg_attr(test, assert_instr(vpaddsb))]
739#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
740pub const fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
741    unsafe { transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64())) }
742}
743
744/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
745///
746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi8&expand=189)
747#[inline]
748#[target_feature(enable = "avx512bw")]
749#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
750#[cfg_attr(test, assert_instr(vpaddsb))]
751#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
752pub const fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
753    unsafe {
754        let add = _mm512_adds_epi8(a, b).as_i8x64();
755        transmute(simd_select_bitmask(k, add, src.as_i8x64()))
756    }
757}
758
759/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi8&expand=190)
762#[inline]
763#[target_feature(enable = "avx512bw")]
764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
765#[cfg_attr(test, assert_instr(vpaddsb))]
766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
767pub const fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
768    unsafe {
769        let add = _mm512_adds_epi8(a, b).as_i8x64();
770        transmute(simd_select_bitmask(k, add, i8x64::ZERO))
771    }
772}
773
774/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
775///
776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi8&expand=186)
777#[inline]
778#[target_feature(enable = "avx512bw,avx512vl")]
779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
780#[cfg_attr(test, assert_instr(vpaddsb))]
781#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
782pub const fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
783    unsafe {
784        let add = _mm256_adds_epi8(a, b).as_i8x32();
785        transmute(simd_select_bitmask(k, add, src.as_i8x32()))
786    }
787}
788
789/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
790///
791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi8&expand=187)
792#[inline]
793#[target_feature(enable = "avx512bw,avx512vl")]
794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
795#[cfg_attr(test, assert_instr(vpaddsb))]
796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
797pub const fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
798    unsafe {
799        let add = _mm256_adds_epi8(a, b).as_i8x32();
800        transmute(simd_select_bitmask(k, add, i8x32::ZERO))
801    }
802}
803
804/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
805///
806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi8&expand=183)
807#[inline]
808#[target_feature(enable = "avx512bw,avx512vl")]
809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
810#[cfg_attr(test, assert_instr(vpaddsb))]
811#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
812pub const fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
813    unsafe {
814        let add = _mm_adds_epi8(a, b).as_i8x16();
815        transmute(simd_select_bitmask(k, add, src.as_i8x16()))
816    }
817}
818
819/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
820///
821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi8&expand=184)
822#[inline]
823#[target_feature(enable = "avx512bw,avx512vl")]
824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
825#[cfg_attr(test, assert_instr(vpaddsb))]
826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
827pub const fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
828    unsafe {
829        let add = _mm_adds_epi8(a, b).as_i8x16();
830        transmute(simd_select_bitmask(k, add, i8x16::ZERO))
831    }
832}
833
834/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst.
835///
836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi16&expand=5685)
837#[inline]
838#[target_feature(enable = "avx512bw")]
839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
840#[cfg_attr(test, assert_instr(vpsubw))]
841#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
842pub const fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
843    unsafe { transmute(simd_sub(a.as_i16x32(), b.as_i16x32())) }
844}
845
846/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
847///
848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi16&expand=5683)
849#[inline]
850#[target_feature(enable = "avx512bw")]
851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
852#[cfg_attr(test, assert_instr(vpsubw))]
853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
854pub const fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
855    unsafe {
856        let sub = _mm512_sub_epi16(a, b).as_i16x32();
857        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
858    }
859}
860
861/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
862///
863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi16&expand=5684)
864#[inline]
865#[target_feature(enable = "avx512bw")]
866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
867#[cfg_attr(test, assert_instr(vpsubw))]
868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
869pub const fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
870    unsafe {
871        let sub = _mm512_sub_epi16(a, b).as_i16x32();
872        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
873    }
874}
875
876/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
877///
878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi16&expand=5680)
879#[inline]
880#[target_feature(enable = "avx512bw,avx512vl")]
881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
882#[cfg_attr(test, assert_instr(vpsubw))]
883#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
884pub const fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
885    unsafe {
886        let sub = _mm256_sub_epi16(a, b).as_i16x16();
887        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
888    }
889}
890
891/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
892///
893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi16&expand=5681)
894#[inline]
895#[target_feature(enable = "avx512bw,avx512vl")]
896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
897#[cfg_attr(test, assert_instr(vpsubw))]
898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
899pub const fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
900    unsafe {
901        let sub = _mm256_sub_epi16(a, b).as_i16x16();
902        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
903    }
904}
905
906/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
907///
908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi16&expand=5677)
909#[inline]
910#[target_feature(enable = "avx512bw,avx512vl")]
911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
912#[cfg_attr(test, assert_instr(vpsubw))]
913#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
914pub const fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
915    unsafe {
916        let sub = _mm_sub_epi16(a, b).as_i16x8();
917        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
918    }
919}
920
921/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
922///
923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi16&expand=5678)
924#[inline]
925#[target_feature(enable = "avx512bw,avx512vl")]
926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
927#[cfg_attr(test, assert_instr(vpsubw))]
928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
929pub const fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
930    unsafe {
931        let sub = _mm_sub_epi16(a, b).as_i16x8();
932        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
933    }
934}
935
936/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst.
937///
938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi8&expand=5712)
939#[inline]
940#[target_feature(enable = "avx512bw")]
941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
942#[cfg_attr(test, assert_instr(vpsubb))]
943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
944pub const fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
945    unsafe { transmute(simd_sub(a.as_i8x64(), b.as_i8x64())) }
946}
947
948/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
949///
950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi8&expand=5710)
951#[inline]
952#[target_feature(enable = "avx512bw")]
953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
954#[cfg_attr(test, assert_instr(vpsubb))]
955#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
956pub const fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
957    unsafe {
958        let sub = _mm512_sub_epi8(a, b).as_i8x64();
959        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
960    }
961}
962
963/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi8&expand=5711)
966#[inline]
967#[target_feature(enable = "avx512bw")]
968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
969#[cfg_attr(test, assert_instr(vpsubb))]
970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
971pub const fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
972    unsafe {
973        let sub = _mm512_sub_epi8(a, b).as_i8x64();
974        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
975    }
976}
977
978/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
979///
980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi8&expand=5707)
981#[inline]
982#[target_feature(enable = "avx512bw,avx512vl")]
983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
984#[cfg_attr(test, assert_instr(vpsubb))]
985#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
986pub const fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
987    unsafe {
988        let sub = _mm256_sub_epi8(a, b).as_i8x32();
989        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
990    }
991}
992
993/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
994///
995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi8&expand=5708)
996#[inline]
997#[target_feature(enable = "avx512bw,avx512vl")]
998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
999#[cfg_attr(test, assert_instr(vpsubb))]
1000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1001pub const fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1002    unsafe {
1003        let sub = _mm256_sub_epi8(a, b).as_i8x32();
1004        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1005    }
1006}
1007
1008/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1009///
1010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi8&expand=5704)
1011#[inline]
1012#[target_feature(enable = "avx512bw,avx512vl")]
1013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1014#[cfg_attr(test, assert_instr(vpsubb))]
1015#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1016pub const fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1017    unsafe {
1018        let sub = _mm_sub_epi8(a, b).as_i8x16();
1019        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1020    }
1021}
1022
1023/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1024///
1025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi8&expand=5705)
1026#[inline]
1027#[target_feature(enable = "avx512bw,avx512vl")]
1028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1029#[cfg_attr(test, assert_instr(vpsubb))]
1030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1031pub const fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1032    unsafe {
1033        let sub = _mm_sub_epi8(a, b).as_i8x16();
1034        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1035    }
1036}
1037
1038/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst.
1039///
1040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu16&expand=5793)
1041#[inline]
1042#[target_feature(enable = "avx512bw")]
1043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1044#[cfg_attr(test, assert_instr(vpsubusw))]
1045#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1046pub const fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
1047    unsafe { transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32())) }
1048}
1049
1050/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1051///
1052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu16&expand=5791)
1053#[inline]
1054#[target_feature(enable = "avx512bw")]
1055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1056#[cfg_attr(test, assert_instr(vpsubusw))]
1057#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1058pub const fn _mm512_mask_subs_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1059    unsafe {
1060        let sub = _mm512_subs_epu16(a, b).as_u16x32();
1061        transmute(simd_select_bitmask(k, sub, src.as_u16x32()))
1062    }
1063}
1064
1065/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1066///
1067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu16&expand=5792)
1068#[inline]
1069#[target_feature(enable = "avx512bw")]
1070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1071#[cfg_attr(test, assert_instr(vpsubusw))]
1072#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1073pub const fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1074    unsafe {
1075        let sub = _mm512_subs_epu16(a, b).as_u16x32();
1076        transmute(simd_select_bitmask(k, sub, u16x32::ZERO))
1077    }
1078}
1079
1080/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1081///
1082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu16&expand=5788)
1083#[inline]
1084#[target_feature(enable = "avx512bw,avx512vl")]
1085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1086#[cfg_attr(test, assert_instr(vpsubusw))]
1087#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1088pub const fn _mm256_mask_subs_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1089    unsafe {
1090        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1091        transmute(simd_select_bitmask(k, sub, src.as_u16x16()))
1092    }
1093}
1094
1095/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1096///
1097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu16&expand=5789)
1098#[inline]
1099#[target_feature(enable = "avx512bw,avx512vl")]
1100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1101#[cfg_attr(test, assert_instr(vpsubusw))]
1102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1103pub const fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1104    unsafe {
1105        let sub = _mm256_subs_epu16(a, b).as_u16x16();
1106        transmute(simd_select_bitmask(k, sub, u16x16::ZERO))
1107    }
1108}
1109
1110/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1111///
1112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu16&expand=5785)
1113#[inline]
1114#[target_feature(enable = "avx512bw,avx512vl")]
1115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1116#[cfg_attr(test, assert_instr(vpsubusw))]
1117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1118pub const fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1119    unsafe {
1120        let sub = _mm_subs_epu16(a, b).as_u16x8();
1121        transmute(simd_select_bitmask(k, sub, src.as_u16x8()))
1122    }
1123}
1124
1125/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1126///
1127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu16&expand=5786)
1128#[inline]
1129#[target_feature(enable = "avx512bw,avx512vl")]
1130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1131#[cfg_attr(test, assert_instr(vpsubusw))]
1132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1133pub const fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1134    unsafe {
1135        let sub = _mm_subs_epu16(a, b).as_u16x8();
1136        transmute(simd_select_bitmask(k, sub, u16x8::ZERO))
1137    }
1138}
1139
1140/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst.
1141///
1142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu8&expand=5802)
1143#[inline]
1144#[target_feature(enable = "avx512bw")]
1145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1146#[cfg_attr(test, assert_instr(vpsubusb))]
1147#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1148pub const fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
1149    unsafe { transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64())) }
1150}
1151
1152/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1153///
1154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu8&expand=5800)
1155#[inline]
1156#[target_feature(enable = "avx512bw")]
1157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1158#[cfg_attr(test, assert_instr(vpsubusb))]
1159#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1160pub const fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1161    unsafe {
1162        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1163        transmute(simd_select_bitmask(k, sub, src.as_u8x64()))
1164    }
1165}
1166
1167/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1168///
1169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu8&expand=5801)
1170#[inline]
1171#[target_feature(enable = "avx512bw")]
1172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1173#[cfg_attr(test, assert_instr(vpsubusb))]
1174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1175pub const fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1176    unsafe {
1177        let sub = _mm512_subs_epu8(a, b).as_u8x64();
1178        transmute(simd_select_bitmask(k, sub, u8x64::ZERO))
1179    }
1180}
1181
1182/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1183///
1184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu8&expand=5797)
1185#[inline]
1186#[target_feature(enable = "avx512bw,avx512vl")]
1187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1188#[cfg_attr(test, assert_instr(vpsubusb))]
1189#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1190pub const fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1191    unsafe {
1192        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1193        transmute(simd_select_bitmask(k, sub, src.as_u8x32()))
1194    }
1195}
1196
1197/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1198///
1199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu8&expand=5798)
1200#[inline]
1201#[target_feature(enable = "avx512bw,avx512vl")]
1202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1203#[cfg_attr(test, assert_instr(vpsubusb))]
1204#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1205pub const fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1206    unsafe {
1207        let sub = _mm256_subs_epu8(a, b).as_u8x32();
1208        transmute(simd_select_bitmask(k, sub, u8x32::ZERO))
1209    }
1210}
1211
1212/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1213///
1214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu8&expand=5794)
1215#[inline]
1216#[target_feature(enable = "avx512bw,avx512vl")]
1217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1218#[cfg_attr(test, assert_instr(vpsubusb))]
1219#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1220pub const fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1221    unsafe {
1222        let sub = _mm_subs_epu8(a, b).as_u8x16();
1223        transmute(simd_select_bitmask(k, sub, src.as_u8x16()))
1224    }
1225}
1226
1227/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1228///
1229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu8&expand=5795)
1230#[inline]
1231#[target_feature(enable = "avx512bw,avx512vl")]
1232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1233#[cfg_attr(test, assert_instr(vpsubusb))]
1234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1235pub const fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1236    unsafe {
1237        let sub = _mm_subs_epu8(a, b).as_u8x16();
1238        transmute(simd_select_bitmask(k, sub, u8x16::ZERO))
1239    }
1240}
1241
1242/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst.
1243///
1244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi16&expand=5775)
1245#[inline]
1246#[target_feature(enable = "avx512bw")]
1247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1248#[cfg_attr(test, assert_instr(vpsubsw))]
1249#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1250pub const fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
1251    unsafe { transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32())) }
1252}
1253
1254/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1255///
1256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi16&expand=5773)
1257#[inline]
1258#[target_feature(enable = "avx512bw")]
1259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1260#[cfg_attr(test, assert_instr(vpsubsw))]
1261#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1262pub const fn _mm512_mask_subs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1263    unsafe {
1264        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1265        transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
1266    }
1267}
1268
1269/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1270///
1271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi16&expand=5774)
1272#[inline]
1273#[target_feature(enable = "avx512bw")]
1274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1275#[cfg_attr(test, assert_instr(vpsubsw))]
1276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1277pub const fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1278    unsafe {
1279        let sub = _mm512_subs_epi16(a, b).as_i16x32();
1280        transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
1281    }
1282}
1283
1284/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1285///
1286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi16&expand=5770)
1287#[inline]
1288#[target_feature(enable = "avx512bw,avx512vl")]
1289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1290#[cfg_attr(test, assert_instr(vpsubsw))]
1291#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1292pub const fn _mm256_mask_subs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1293    unsafe {
1294        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1295        transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
1296    }
1297}
1298
1299/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1300///
1301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi16&expand=5771)
1302#[inline]
1303#[target_feature(enable = "avx512bw,avx512vl")]
1304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1305#[cfg_attr(test, assert_instr(vpsubsw))]
1306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1307pub const fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1308    unsafe {
1309        let sub = _mm256_subs_epi16(a, b).as_i16x16();
1310        transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
1311    }
1312}
1313
1314/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1315///
1316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi16&expand=5767)
1317#[inline]
1318#[target_feature(enable = "avx512bw,avx512vl")]
1319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1320#[cfg_attr(test, assert_instr(vpsubsw))]
1321#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1322pub const fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1323    unsafe {
1324        let sub = _mm_subs_epi16(a, b).as_i16x8();
1325        transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
1326    }
1327}
1328
1329/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1330///
1331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi16&expand=5768)
1332#[inline]
1333#[target_feature(enable = "avx512bw,avx512vl")]
1334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1335#[cfg_attr(test, assert_instr(vpsubsw))]
1336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1337pub const fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1338    unsafe {
1339        let sub = _mm_subs_epi16(a, b).as_i16x8();
1340        transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
1341    }
1342}
1343
1344/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst.
1345///
1346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi8&expand=5784)
1347#[inline]
1348#[target_feature(enable = "avx512bw")]
1349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1350#[cfg_attr(test, assert_instr(vpsubsb))]
1351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1352pub const fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
1353    unsafe { transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64())) }
1354}
1355
1356/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1357///
1358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi8&expand=5782)
1359#[inline]
1360#[target_feature(enable = "avx512bw")]
1361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1362#[cfg_attr(test, assert_instr(vpsubsb))]
1363#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1364pub const fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1365    unsafe {
1366        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1367        transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
1368    }
1369}
1370
1371/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1372///
1373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi8&expand=5783)
1374#[inline]
1375#[target_feature(enable = "avx512bw")]
1376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1377#[cfg_attr(test, assert_instr(vpsubsb))]
1378#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1379pub const fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1380    unsafe {
1381        let sub = _mm512_subs_epi8(a, b).as_i8x64();
1382        transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
1383    }
1384}
1385
1386/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1387///
1388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi8&expand=5779)
1389#[inline]
1390#[target_feature(enable = "avx512bw,avx512vl")]
1391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1392#[cfg_attr(test, assert_instr(vpsubsb))]
1393#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1394pub const fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1395    unsafe {
1396        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1397        transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
1398    }
1399}
1400
1401/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1402///
1403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi8&expand=5780)
1404#[inline]
1405#[target_feature(enable = "avx512bw,avx512vl")]
1406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1407#[cfg_attr(test, assert_instr(vpsubsb))]
1408#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1409pub const fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1410    unsafe {
1411        let sub = _mm256_subs_epi8(a, b).as_i8x32();
1412        transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1413    }
1414}
1415
1416/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1417///
1418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi8&expand=5776)
1419#[inline]
1420#[target_feature(enable = "avx512bw,avx512vl")]
1421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1422#[cfg_attr(test, assert_instr(vpsubsb))]
1423#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1424pub const fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1425    unsafe {
1426        let sub = _mm_subs_epi8(a, b).as_i8x16();
1427        transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1428    }
1429}
1430
1431/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1432///
1433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi8&expand=5777)
1434#[inline]
1435#[target_feature(enable = "avx512bw,avx512vl")]
1436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1437#[cfg_attr(test, assert_instr(vpsubsb))]
1438#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1439pub const fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1440    unsafe {
1441        let sub = _mm_subs_epi8(a, b).as_i8x16();
1442        transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1443    }
1444}
1445
1446/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1447///
1448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epu16&expand=3973)
1449#[inline]
1450#[target_feature(enable = "avx512bw")]
1451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1452#[cfg_attr(test, assert_instr(vpmulhuw))]
1453#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1454pub const fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
1455    unsafe {
1456        let a = simd_cast::<_, u32x32>(a.as_u16x32());
1457        let b = simd_cast::<_, u32x32>(b.as_u16x32());
1458        let r = simd_shr(simd_mul(a, b), u32x32::splat(16));
1459        transmute(simd_cast::<u32x32, u16x32>(r))
1460    }
1461}
1462
1463/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1464///
1465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epu16&expand=3971)
1466#[inline]
1467#[target_feature(enable = "avx512bw")]
1468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1469#[cfg_attr(test, assert_instr(vpmulhuw))]
1470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1471pub const fn _mm512_mask_mulhi_epu16(
1472    src: __m512i,
1473    k: __mmask32,
1474    a: __m512i,
1475    b: __m512i,
1476) -> __m512i {
1477    unsafe {
1478        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1479        transmute(simd_select_bitmask(k, mul, src.as_u16x32()))
1480    }
1481}
1482
1483/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1484///
1485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epu16&expand=3972)
1486#[inline]
1487#[target_feature(enable = "avx512bw")]
1488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1489#[cfg_attr(test, assert_instr(vpmulhuw))]
1490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1491pub const fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1492    unsafe {
1493        let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1494        transmute(simd_select_bitmask(k, mul, u16x32::ZERO))
1495    }
1496}
1497
1498/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1499///
1500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epu16&expand=3968)
1501#[inline]
1502#[target_feature(enable = "avx512bw,avx512vl")]
1503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1504#[cfg_attr(test, assert_instr(vpmulhuw))]
1505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1506pub const fn _mm256_mask_mulhi_epu16(
1507    src: __m256i,
1508    k: __mmask16,
1509    a: __m256i,
1510    b: __m256i,
1511) -> __m256i {
1512    unsafe {
1513        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1514        transmute(simd_select_bitmask(k, mul, src.as_u16x16()))
1515    }
1516}
1517
1518/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1519///
1520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epu16&expand=3969)
1521#[inline]
1522#[target_feature(enable = "avx512bw,avx512vl")]
1523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1524#[cfg_attr(test, assert_instr(vpmulhuw))]
1525#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1526pub const fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1527    unsafe {
1528        let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1529        transmute(simd_select_bitmask(k, mul, u16x16::ZERO))
1530    }
1531}
1532
1533/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1534///
1535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epu16&expand=3965)
1536#[inline]
1537#[target_feature(enable = "avx512bw,avx512vl")]
1538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1539#[cfg_attr(test, assert_instr(vpmulhuw))]
1540#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1541pub const fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1542    unsafe {
1543        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1544        transmute(simd_select_bitmask(k, mul, src.as_u16x8()))
1545    }
1546}
1547
1548/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1549///
1550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epu16&expand=3966)
1551#[inline]
1552#[target_feature(enable = "avx512bw,avx512vl")]
1553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1554#[cfg_attr(test, assert_instr(vpmulhuw))]
1555#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1556pub const fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1557    unsafe {
1558        let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1559        transmute(simd_select_bitmask(k, mul, u16x8::ZERO))
1560    }
1561}
1562
1563/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1564///
1565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epi16&expand=3962)
1566#[inline]
1567#[target_feature(enable = "avx512bw")]
1568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1569#[cfg_attr(test, assert_instr(vpmulhw))]
1570#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1571pub const fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
1572    unsafe {
1573        let a = simd_cast::<_, i32x32>(a.as_i16x32());
1574        let b = simd_cast::<_, i32x32>(b.as_i16x32());
1575        let r = simd_shr(simd_mul(a, b), i32x32::splat(16));
1576        transmute(simd_cast::<i32x32, i16x32>(r))
1577    }
1578}
1579
1580/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1581///
1582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epi16&expand=3960)
1583#[inline]
1584#[target_feature(enable = "avx512bw")]
1585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1586#[cfg_attr(test, assert_instr(vpmulhw))]
1587#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1588pub const fn _mm512_mask_mulhi_epi16(
1589    src: __m512i,
1590    k: __mmask32,
1591    a: __m512i,
1592    b: __m512i,
1593) -> __m512i {
1594    unsafe {
1595        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1596        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1597    }
1598}
1599
1600/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1601///
1602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epi16&expand=3961)
1603#[inline]
1604#[target_feature(enable = "avx512bw")]
1605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1606#[cfg_attr(test, assert_instr(vpmulhw))]
1607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1608pub const fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1609    unsafe {
1610        let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1611        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1612    }
1613}
1614
1615/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1616///
1617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epi16&expand=3957)
1618#[inline]
1619#[target_feature(enable = "avx512bw,avx512vl")]
1620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1621#[cfg_attr(test, assert_instr(vpmulhw))]
1622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1623pub const fn _mm256_mask_mulhi_epi16(
1624    src: __m256i,
1625    k: __mmask16,
1626    a: __m256i,
1627    b: __m256i,
1628) -> __m256i {
1629    unsafe {
1630        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1631        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1632    }
1633}
1634
1635/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1636///
1637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epi16&expand=3958)
1638#[inline]
1639#[target_feature(enable = "avx512bw,avx512vl")]
1640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1641#[cfg_attr(test, assert_instr(vpmulhw))]
1642#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1643pub const fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1644    unsafe {
1645        let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1646        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1647    }
1648}
1649
1650/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1651///
1652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epi16&expand=3954)
1653#[inline]
1654#[target_feature(enable = "avx512bw,avx512vl")]
1655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1656#[cfg_attr(test, assert_instr(vpmulhw))]
1657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1658pub const fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1659    unsafe {
1660        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1661        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1662    }
1663}
1664
1665/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1666///
1667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epi16&expand=3955)
1668#[inline]
1669#[target_feature(enable = "avx512bw,avx512vl")]
1670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1671#[cfg_attr(test, assert_instr(vpmulhw))]
1672#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1673pub const fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1674    unsafe {
1675        let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1676        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1677    }
1678}
1679
1680/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst.
1681///
1682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhrs_epi16&expand=3986)
1683#[inline]
1684#[target_feature(enable = "avx512bw")]
1685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1686#[cfg_attr(test, assert_instr(vpmulhrsw))]
1687pub fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i {
1688    unsafe { transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32())) }
1689}
1690
1691/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1692///
1693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhrs_epi16&expand=3984)
1694#[inline]
1695#[target_feature(enable = "avx512bw")]
1696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1697#[cfg_attr(test, assert_instr(vpmulhrsw))]
1698pub fn _mm512_mask_mulhrs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1699    unsafe {
1700        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1701        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1702    }
1703}
1704
1705/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1706///
1707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhrs_epi16&expand=3985)
1708#[inline]
1709#[target_feature(enable = "avx512bw")]
1710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1711#[cfg_attr(test, assert_instr(vpmulhrsw))]
1712pub fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1713    unsafe {
1714        let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1715        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1716    }
1717}
1718
1719/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1720///
1721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhrs_epi16&expand=3981)
1722#[inline]
1723#[target_feature(enable = "avx512bw,avx512vl")]
1724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1725#[cfg_attr(test, assert_instr(vpmulhrsw))]
1726pub fn _mm256_mask_mulhrs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1727    unsafe {
1728        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1729        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1730    }
1731}
1732
1733/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1734///
1735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhrs_epi16&expand=3982)
1736#[inline]
1737#[target_feature(enable = "avx512bw,avx512vl")]
1738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1739#[cfg_attr(test, assert_instr(vpmulhrsw))]
1740pub fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1741    unsafe {
1742        let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1743        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1744    }
1745}
1746
1747/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1748///
1749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhrs_epi16&expand=3978)
1750#[inline]
1751#[target_feature(enable = "avx512bw,avx512vl")]
1752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1753#[cfg_attr(test, assert_instr(vpmulhrsw))]
1754pub fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1755    unsafe {
1756        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1757        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1758    }
1759}
1760
1761/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1762///
1763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhrs_epi16&expand=3979)
1764#[inline]
1765#[target_feature(enable = "avx512bw,avx512vl")]
1766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1767#[cfg_attr(test, assert_instr(vpmulhrsw))]
1768pub fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1769    unsafe {
1770        let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1771        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1772    }
1773}
1774
1775/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst.
1776///
1777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi16&expand=3996)
1778#[inline]
1779#[target_feature(enable = "avx512bw")]
1780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1781#[cfg_attr(test, assert_instr(vpmullw))]
1782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1783pub const fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
1784    unsafe { transmute(simd_mul(a.as_i16x32(), b.as_i16x32())) }
1785}
1786
1787/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi16&expand=3994)
1790#[inline]
1791#[target_feature(enable = "avx512bw")]
1792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1793#[cfg_attr(test, assert_instr(vpmullw))]
1794#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1795pub const fn _mm512_mask_mullo_epi16(
1796    src: __m512i,
1797    k: __mmask32,
1798    a: __m512i,
1799    b: __m512i,
1800) -> __m512i {
1801    unsafe {
1802        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1803        transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1804    }
1805}
1806
1807/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1808///
1809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi16&expand=3995)
1810#[inline]
1811#[target_feature(enable = "avx512bw")]
1812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1813#[cfg_attr(test, assert_instr(vpmullw))]
1814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1815pub const fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1816    unsafe {
1817        let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1818        transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1819    }
1820}
1821
1822/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1823///
1824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi16&expand=3991)
1825#[inline]
1826#[target_feature(enable = "avx512bw,avx512vl")]
1827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1828#[cfg_attr(test, assert_instr(vpmullw))]
1829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1830pub const fn _mm256_mask_mullo_epi16(
1831    src: __m256i,
1832    k: __mmask16,
1833    a: __m256i,
1834    b: __m256i,
1835) -> __m256i {
1836    unsafe {
1837        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1838        transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1839    }
1840}
1841
1842/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1843///
1844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi16&expand=3992)
1845#[inline]
1846#[target_feature(enable = "avx512bw,avx512vl")]
1847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1848#[cfg_attr(test, assert_instr(vpmullw))]
1849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1850pub const fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1851    unsafe {
1852        let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1853        transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1854    }
1855}
1856
1857/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1858///
1859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi16&expand=3988)
1860#[inline]
1861#[target_feature(enable = "avx512bw,avx512vl")]
1862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1863#[cfg_attr(test, assert_instr(vpmullw))]
1864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1865pub const fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1866    unsafe {
1867        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1868        transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1869    }
1870}
1871
1872/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1873///
1874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi16&expand=3989)
1875#[inline]
1876#[target_feature(enable = "avx512bw,avx512vl")]
1877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1878#[cfg_attr(test, assert_instr(vpmullw))]
1879#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1880pub const fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1881    unsafe {
1882        let mul = _mm_mullo_epi16(a, b).as_i16x8();
1883        transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1884    }
1885}
1886
1887/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst.
1888///
1889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu16&expand=3609)
1890#[inline]
1891#[target_feature(enable = "avx512bw")]
1892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1893#[cfg_attr(test, assert_instr(vpmaxuw))]
1894#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1895pub const fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
1896    unsafe { simd_imax(a.as_u16x32(), b.as_u16x32()).as_m512i() }
1897}
1898
1899/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1900///
1901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu16&expand=3607)
1902#[inline]
1903#[target_feature(enable = "avx512bw")]
1904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1905#[cfg_attr(test, assert_instr(vpmaxuw))]
1906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1907pub const fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1908    unsafe {
1909        let max = _mm512_max_epu16(a, b).as_u16x32();
1910        transmute(simd_select_bitmask(k, max, src.as_u16x32()))
1911    }
1912}
1913
1914/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1915///
1916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu16&expand=3608)
1917#[inline]
1918#[target_feature(enable = "avx512bw")]
1919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1920#[cfg_attr(test, assert_instr(vpmaxuw))]
1921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1922pub const fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1923    unsafe {
1924        let max = _mm512_max_epu16(a, b).as_u16x32();
1925        transmute(simd_select_bitmask(k, max, u16x32::ZERO))
1926    }
1927}
1928
1929/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1930///
1931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu16&expand=3604)
1932#[inline]
1933#[target_feature(enable = "avx512bw,avx512vl")]
1934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1935#[cfg_attr(test, assert_instr(vpmaxuw))]
1936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1937pub const fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1938    unsafe {
1939        let max = _mm256_max_epu16(a, b).as_u16x16();
1940        transmute(simd_select_bitmask(k, max, src.as_u16x16()))
1941    }
1942}
1943
1944/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1945///
1946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu16&expand=3605)
1947#[inline]
1948#[target_feature(enable = "avx512bw,avx512vl")]
1949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1950#[cfg_attr(test, assert_instr(vpmaxuw))]
1951#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1952pub const fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1953    unsafe {
1954        let max = _mm256_max_epu16(a, b).as_u16x16();
1955        transmute(simd_select_bitmask(k, max, u16x16::ZERO))
1956    }
1957}
1958
1959/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1960///
1961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu16&expand=3601)
1962#[inline]
1963#[target_feature(enable = "avx512bw,avx512vl")]
1964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1965#[cfg_attr(test, assert_instr(vpmaxuw))]
1966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1967pub const fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1968    unsafe {
1969        let max = _mm_max_epu16(a, b).as_u16x8();
1970        transmute(simd_select_bitmask(k, max, src.as_u16x8()))
1971    }
1972}
1973
1974/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1975///
1976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu16&expand=3602)
1977#[inline]
1978#[target_feature(enable = "avx512bw,avx512vl")]
1979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1980#[cfg_attr(test, assert_instr(vpmaxuw))]
1981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1982pub const fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1983    unsafe {
1984        let max = _mm_max_epu16(a, b).as_u16x8();
1985        transmute(simd_select_bitmask(k, max, u16x8::ZERO))
1986    }
1987}
1988
1989/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst.
1990///
1991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu8&expand=3636)
1992#[inline]
1993#[target_feature(enable = "avx512bw")]
1994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1995#[cfg_attr(test, assert_instr(vpmaxub))]
1996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1997pub const fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
1998    unsafe { simd_imax(a.as_u8x64(), b.as_u8x64()).as_m512i() }
1999}
2000
2001/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2002///
2003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu8&expand=3634)
2004#[inline]
2005#[target_feature(enable = "avx512bw")]
2006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2007#[cfg_attr(test, assert_instr(vpmaxub))]
2008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2009pub const fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2010    unsafe {
2011        let max = _mm512_max_epu8(a, b).as_u8x64();
2012        transmute(simd_select_bitmask(k, max, src.as_u8x64()))
2013    }
2014}
2015
2016/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2017///
2018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu8&expand=3635)
2019#[inline]
2020#[target_feature(enable = "avx512bw")]
2021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2022#[cfg_attr(test, assert_instr(vpmaxub))]
2023#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2024pub const fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2025    unsafe {
2026        let max = _mm512_max_epu8(a, b).as_u8x64();
2027        transmute(simd_select_bitmask(k, max, u8x64::ZERO))
2028    }
2029}
2030
2031/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2032///
2033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu8&expand=3631)
2034#[inline]
2035#[target_feature(enable = "avx512bw,avx512vl")]
2036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2037#[cfg_attr(test, assert_instr(vpmaxub))]
2038#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2039pub const fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2040    unsafe {
2041        let max = _mm256_max_epu8(a, b).as_u8x32();
2042        transmute(simd_select_bitmask(k, max, src.as_u8x32()))
2043    }
2044}
2045
2046/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2047///
2048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu8&expand=3632)
2049#[inline]
2050#[target_feature(enable = "avx512bw,avx512vl")]
2051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2052#[cfg_attr(test, assert_instr(vpmaxub))]
2053#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2054pub const fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2055    unsafe {
2056        let max = _mm256_max_epu8(a, b).as_u8x32();
2057        transmute(simd_select_bitmask(k, max, u8x32::ZERO))
2058    }
2059}
2060
2061/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2062///
2063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu8&expand=3628)
2064#[inline]
2065#[target_feature(enable = "avx512bw,avx512vl")]
2066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2067#[cfg_attr(test, assert_instr(vpmaxub))]
2068#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2069pub const fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2070    unsafe {
2071        let max = _mm_max_epu8(a, b).as_u8x16();
2072        transmute(simd_select_bitmask(k, max, src.as_u8x16()))
2073    }
2074}
2075
2076/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2077///
2078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu8&expand=3629)
2079#[inline]
2080#[target_feature(enable = "avx512bw,avx512vl")]
2081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2082#[cfg_attr(test, assert_instr(vpmaxub))]
2083#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2084pub const fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2085    unsafe {
2086        let max = _mm_max_epu8(a, b).as_u8x16();
2087        transmute(simd_select_bitmask(k, max, u8x16::ZERO))
2088    }
2089}
2090
2091/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst.
2092///
2093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi16&expand=3573)
2094#[inline]
2095#[target_feature(enable = "avx512bw")]
2096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2097#[cfg_attr(test, assert_instr(vpmaxsw))]
2098#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2099pub const fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
2100    unsafe { simd_imax(a.as_i16x32(), b.as_i16x32()).as_m512i() }
2101}
2102
2103/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2104///
2105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi16&expand=3571)
2106#[inline]
2107#[target_feature(enable = "avx512bw")]
2108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2109#[cfg_attr(test, assert_instr(vpmaxsw))]
2110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2111pub const fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2112    unsafe {
2113        let max = _mm512_max_epi16(a, b).as_i16x32();
2114        transmute(simd_select_bitmask(k, max, src.as_i16x32()))
2115    }
2116}
2117
2118/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2119///
2120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi16&expand=3572)
2121#[inline]
2122#[target_feature(enable = "avx512bw")]
2123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2124#[cfg_attr(test, assert_instr(vpmaxsw))]
2125#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2126pub const fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2127    unsafe {
2128        let max = _mm512_max_epi16(a, b).as_i16x32();
2129        transmute(simd_select_bitmask(k, max, i16x32::ZERO))
2130    }
2131}
2132
2133/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2134///
2135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi16&expand=3568)
2136#[inline]
2137#[target_feature(enable = "avx512bw,avx512vl")]
2138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2139#[cfg_attr(test, assert_instr(vpmaxsw))]
2140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2141pub const fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2142    unsafe {
2143        let max = _mm256_max_epi16(a, b).as_i16x16();
2144        transmute(simd_select_bitmask(k, max, src.as_i16x16()))
2145    }
2146}
2147
2148/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2149///
2150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi16&expand=3569)
2151#[inline]
2152#[target_feature(enable = "avx512bw,avx512vl")]
2153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2154#[cfg_attr(test, assert_instr(vpmaxsw))]
2155#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2156pub const fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2157    unsafe {
2158        let max = _mm256_max_epi16(a, b).as_i16x16();
2159        transmute(simd_select_bitmask(k, max, i16x16::ZERO))
2160    }
2161}
2162
2163/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2164///
2165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi16&expand=3565)
2166#[inline]
2167#[target_feature(enable = "avx512bw,avx512vl")]
2168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2169#[cfg_attr(test, assert_instr(vpmaxsw))]
2170#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2171pub const fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2172    unsafe {
2173        let max = _mm_max_epi16(a, b).as_i16x8();
2174        transmute(simd_select_bitmask(k, max, src.as_i16x8()))
2175    }
2176}
2177
2178/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2179///
2180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi16&expand=3566)
2181#[inline]
2182#[target_feature(enable = "avx512bw,avx512vl")]
2183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2184#[cfg_attr(test, assert_instr(vpmaxsw))]
2185#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2186pub const fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2187    unsafe {
2188        let max = _mm_max_epi16(a, b).as_i16x8();
2189        transmute(simd_select_bitmask(k, max, i16x8::ZERO))
2190    }
2191}
2192
2193/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst.
2194///
2195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi8&expand=3600)
2196#[inline]
2197#[target_feature(enable = "avx512bw")]
2198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2199#[cfg_attr(test, assert_instr(vpmaxsb))]
2200#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2201pub const fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
2202    unsafe { simd_imax(a.as_i8x64(), b.as_i8x64()).as_m512i() }
2203}
2204
2205/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2206///
2207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi8&expand=3598)
2208#[inline]
2209#[target_feature(enable = "avx512bw")]
2210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2211#[cfg_attr(test, assert_instr(vpmaxsb))]
2212#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2213pub const fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2214    unsafe {
2215        let max = _mm512_max_epi8(a, b).as_i8x64();
2216        transmute(simd_select_bitmask(k, max, src.as_i8x64()))
2217    }
2218}
2219
2220/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2221///
2222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi8&expand=3599)
2223#[inline]
2224#[target_feature(enable = "avx512bw")]
2225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2226#[cfg_attr(test, assert_instr(vpmaxsb))]
2227#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2228pub const fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2229    unsafe {
2230        let max = _mm512_max_epi8(a, b).as_i8x64();
2231        transmute(simd_select_bitmask(k, max, i8x64::ZERO))
2232    }
2233}
2234
2235/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2236///
2237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi8&expand=3595)
2238#[inline]
2239#[target_feature(enable = "avx512bw,avx512vl")]
2240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2241#[cfg_attr(test, assert_instr(vpmaxsb))]
2242#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2243pub const fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2244    unsafe {
2245        let max = _mm256_max_epi8(a, b).as_i8x32();
2246        transmute(simd_select_bitmask(k, max, src.as_i8x32()))
2247    }
2248}
2249
2250/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2251///
2252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi8&expand=3596)
2253#[inline]
2254#[target_feature(enable = "avx512bw,avx512vl")]
2255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2256#[cfg_attr(test, assert_instr(vpmaxsb))]
2257#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2258pub const fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2259    unsafe {
2260        let max = _mm256_max_epi8(a, b).as_i8x32();
2261        transmute(simd_select_bitmask(k, max, i8x32::ZERO))
2262    }
2263}
2264
2265/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2266///
2267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi8&expand=3592)
2268#[inline]
2269#[target_feature(enable = "avx512bw,avx512vl")]
2270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2271#[cfg_attr(test, assert_instr(vpmaxsb))]
2272#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2273pub const fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2274    unsafe {
2275        let max = _mm_max_epi8(a, b).as_i8x16();
2276        transmute(simd_select_bitmask(k, max, src.as_i8x16()))
2277    }
2278}
2279
2280/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2281///
2282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi8&expand=3593)
2283#[inline]
2284#[target_feature(enable = "avx512bw,avx512vl")]
2285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2286#[cfg_attr(test, assert_instr(vpmaxsb))]
2287#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2288pub const fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2289    unsafe {
2290        let max = _mm_max_epi8(a, b).as_i8x16();
2291        transmute(simd_select_bitmask(k, max, i8x16::ZERO))
2292    }
2293}
2294
2295/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst.
2296///
2297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu16&expand=3723)
2298#[inline]
2299#[target_feature(enable = "avx512bw")]
2300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2301#[cfg_attr(test, assert_instr(vpminuw))]
2302#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2303pub const fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
2304    unsafe { simd_imin(a.as_u16x32(), b.as_u16x32()).as_m512i() }
2305}
2306
2307/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2308///
2309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu16&expand=3721)
2310#[inline]
2311#[target_feature(enable = "avx512bw")]
2312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2313#[cfg_attr(test, assert_instr(vpminuw))]
2314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2315pub const fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2316    unsafe {
2317        let min = _mm512_min_epu16(a, b).as_u16x32();
2318        transmute(simd_select_bitmask(k, min, src.as_u16x32()))
2319    }
2320}
2321
2322/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2323///
2324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu16&expand=3722)
2325#[inline]
2326#[target_feature(enable = "avx512bw")]
2327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2328#[cfg_attr(test, assert_instr(vpminuw))]
2329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2330pub const fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2331    unsafe {
2332        let min = _mm512_min_epu16(a, b).as_u16x32();
2333        transmute(simd_select_bitmask(k, min, u16x32::ZERO))
2334    }
2335}
2336
2337/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2338///
2339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu16&expand=3718)
2340#[inline]
2341#[target_feature(enable = "avx512bw,avx512vl")]
2342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2343#[cfg_attr(test, assert_instr(vpminuw))]
2344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2345pub const fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2346    unsafe {
2347        let min = _mm256_min_epu16(a, b).as_u16x16();
2348        transmute(simd_select_bitmask(k, min, src.as_u16x16()))
2349    }
2350}
2351
2352/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2353///
2354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu16&expand=3719)
2355#[inline]
2356#[target_feature(enable = "avx512bw,avx512vl")]
2357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2358#[cfg_attr(test, assert_instr(vpminuw))]
2359#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2360pub const fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2361    unsafe {
2362        let min = _mm256_min_epu16(a, b).as_u16x16();
2363        transmute(simd_select_bitmask(k, min, u16x16::ZERO))
2364    }
2365}
2366
2367/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2368///
2369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu16&expand=3715)
2370#[inline]
2371#[target_feature(enable = "avx512bw,avx512vl")]
2372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2373#[cfg_attr(test, assert_instr(vpminuw))]
2374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2375pub const fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2376    unsafe {
2377        let min = _mm_min_epu16(a, b).as_u16x8();
2378        transmute(simd_select_bitmask(k, min, src.as_u16x8()))
2379    }
2380}
2381
2382/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2383///
2384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu16&expand=3716)
2385#[inline]
2386#[target_feature(enable = "avx512bw,avx512vl")]
2387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2388#[cfg_attr(test, assert_instr(vpminuw))]
2389#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2390pub const fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2391    unsafe {
2392        let min = _mm_min_epu16(a, b).as_u16x8();
2393        transmute(simd_select_bitmask(k, min, u16x8::ZERO))
2394    }
2395}
2396
2397/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst.
2398///
2399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu8&expand=3750)
2400#[inline]
2401#[target_feature(enable = "avx512bw")]
2402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2403#[cfg_attr(test, assert_instr(vpminub))]
2404#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2405pub const fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
2406    unsafe { simd_imin(a.as_u8x64(), b.as_u8x64()).as_m512i() }
2407}
2408
2409/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2410///
2411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu8&expand=3748)
2412#[inline]
2413#[target_feature(enable = "avx512bw")]
2414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2415#[cfg_attr(test, assert_instr(vpminub))]
2416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2417pub const fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2418    unsafe {
2419        let min = _mm512_min_epu8(a, b).as_u8x64();
2420        transmute(simd_select_bitmask(k, min, src.as_u8x64()))
2421    }
2422}
2423
2424/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2425///
2426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu8&expand=3749)
2427#[inline]
2428#[target_feature(enable = "avx512bw")]
2429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2430#[cfg_attr(test, assert_instr(vpminub))]
2431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2432pub const fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2433    unsafe {
2434        let min = _mm512_min_epu8(a, b).as_u8x64();
2435        transmute(simd_select_bitmask(k, min, u8x64::ZERO))
2436    }
2437}
2438
2439/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2440///
2441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu8&expand=3745)
2442#[inline]
2443#[target_feature(enable = "avx512bw,avx512vl")]
2444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2445#[cfg_attr(test, assert_instr(vpminub))]
2446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2447pub const fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2448    unsafe {
2449        let min = _mm256_min_epu8(a, b).as_u8x32();
2450        transmute(simd_select_bitmask(k, min, src.as_u8x32()))
2451    }
2452}
2453
2454/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2455///
2456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu8&expand=3746)
2457#[inline]
2458#[target_feature(enable = "avx512bw,avx512vl")]
2459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2460#[cfg_attr(test, assert_instr(vpminub))]
2461#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2462pub const fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2463    unsafe {
2464        let min = _mm256_min_epu8(a, b).as_u8x32();
2465        transmute(simd_select_bitmask(k, min, u8x32::ZERO))
2466    }
2467}
2468
2469/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2470///
2471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu8&expand=3742)
2472#[inline]
2473#[target_feature(enable = "avx512bw,avx512vl")]
2474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2475#[cfg_attr(test, assert_instr(vpminub))]
2476#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2477pub const fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2478    unsafe {
2479        let min = _mm_min_epu8(a, b).as_u8x16();
2480        transmute(simd_select_bitmask(k, min, src.as_u8x16()))
2481    }
2482}
2483
2484/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2485///
2486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu8&expand=3743)
2487#[inline]
2488#[target_feature(enable = "avx512bw,avx512vl")]
2489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2490#[cfg_attr(test, assert_instr(vpminub))]
2491#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2492pub const fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2493    unsafe {
2494        let min = _mm_min_epu8(a, b).as_u8x16();
2495        transmute(simd_select_bitmask(k, min, u8x16::ZERO))
2496    }
2497}
2498
2499/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst.
2500///
2501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi16&expand=3687)
2502#[inline]
2503#[target_feature(enable = "avx512bw")]
2504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2505#[cfg_attr(test, assert_instr(vpminsw))]
2506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2507pub const fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
2508    unsafe { simd_imin(a.as_i16x32(), b.as_i16x32()).as_m512i() }
2509}
2510
2511/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2512///
2513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi16&expand=3685)
2514#[inline]
2515#[target_feature(enable = "avx512bw")]
2516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2517#[cfg_attr(test, assert_instr(vpminsw))]
2518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2519pub const fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2520    unsafe {
2521        let min = _mm512_min_epi16(a, b).as_i16x32();
2522        transmute(simd_select_bitmask(k, min, src.as_i16x32()))
2523    }
2524}
2525
2526/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2527///
2528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi16&expand=3686)
2529#[inline]
2530#[target_feature(enable = "avx512bw")]
2531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2532#[cfg_attr(test, assert_instr(vpminsw))]
2533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2534pub const fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2535    unsafe {
2536        let min = _mm512_min_epi16(a, b).as_i16x32();
2537        transmute(simd_select_bitmask(k, min, i16x32::ZERO))
2538    }
2539}
2540
2541/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2542///
2543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi16&expand=3682)
2544#[inline]
2545#[target_feature(enable = "avx512bw,avx512vl")]
2546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2547#[cfg_attr(test, assert_instr(vpminsw))]
2548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2549pub const fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2550    unsafe {
2551        let min = _mm256_min_epi16(a, b).as_i16x16();
2552        transmute(simd_select_bitmask(k, min, src.as_i16x16()))
2553    }
2554}
2555
2556/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2557///
2558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi16&expand=3683)
2559#[inline]
2560#[target_feature(enable = "avx512bw,avx512vl")]
2561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2562#[cfg_attr(test, assert_instr(vpminsw))]
2563#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2564pub const fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2565    unsafe {
2566        let min = _mm256_min_epi16(a, b).as_i16x16();
2567        transmute(simd_select_bitmask(k, min, i16x16::ZERO))
2568    }
2569}
2570
2571/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2572///
2573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi16&expand=3679)
2574#[inline]
2575#[target_feature(enable = "avx512bw,avx512vl")]
2576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2577#[cfg_attr(test, assert_instr(vpminsw))]
2578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2579pub const fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2580    unsafe {
2581        let min = _mm_min_epi16(a, b).as_i16x8();
2582        transmute(simd_select_bitmask(k, min, src.as_i16x8()))
2583    }
2584}
2585
2586/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2587///
2588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi16&expand=3680)
2589#[inline]
2590#[target_feature(enable = "avx512bw,avx512vl")]
2591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2592#[cfg_attr(test, assert_instr(vpminsw))]
2593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2594pub const fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2595    unsafe {
2596        let min = _mm_min_epi16(a, b).as_i16x8();
2597        transmute(simd_select_bitmask(k, min, i16x8::ZERO))
2598    }
2599}
2600
2601/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst.
2602///
2603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi8&expand=3714)
2604#[inline]
2605#[target_feature(enable = "avx512bw")]
2606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2607#[cfg_attr(test, assert_instr(vpminsb))]
2608#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2609pub const fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
2610    unsafe { simd_imin(a.as_i8x64(), b.as_i8x64()).as_m512i() }
2611}
2612
2613/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2614///
2615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi8&expand=3712)
2616#[inline]
2617#[target_feature(enable = "avx512bw")]
2618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2619#[cfg_attr(test, assert_instr(vpminsb))]
2620#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2621pub const fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2622    unsafe {
2623        let min = _mm512_min_epi8(a, b).as_i8x64();
2624        transmute(simd_select_bitmask(k, min, src.as_i8x64()))
2625    }
2626}
2627
2628/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2629///
2630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi8&expand=3713)
2631#[inline]
2632#[target_feature(enable = "avx512bw")]
2633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2634#[cfg_attr(test, assert_instr(vpminsb))]
2635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2636pub const fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2637    unsafe {
2638        let min = _mm512_min_epi8(a, b).as_i8x64();
2639        transmute(simd_select_bitmask(k, min, i8x64::ZERO))
2640    }
2641}
2642
2643/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2644///
2645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi8&expand=3709)
2646#[inline]
2647#[target_feature(enable = "avx512bw,avx512vl")]
2648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2649#[cfg_attr(test, assert_instr(vpminsb))]
2650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2651pub const fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2652    unsafe {
2653        let min = _mm256_min_epi8(a, b).as_i8x32();
2654        transmute(simd_select_bitmask(k, min, src.as_i8x32()))
2655    }
2656}
2657
2658/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2659///
2660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi8&expand=3710)
2661#[inline]
2662#[target_feature(enable = "avx512bw,avx512vl")]
2663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2664#[cfg_attr(test, assert_instr(vpminsb))]
2665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2666pub const fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2667    unsafe {
2668        let min = _mm256_min_epi8(a, b).as_i8x32();
2669        transmute(simd_select_bitmask(k, min, i8x32::ZERO))
2670    }
2671}
2672
2673/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2674///
2675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi8&expand=3706)
2676#[inline]
2677#[target_feature(enable = "avx512bw,avx512vl")]
2678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2679#[cfg_attr(test, assert_instr(vpminsb))]
2680#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2681pub const fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2682    unsafe {
2683        let min = _mm_min_epi8(a, b).as_i8x16();
2684        transmute(simd_select_bitmask(k, min, src.as_i8x16()))
2685    }
2686}
2687
2688/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2689///
2690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi8&expand=3707)
2691#[inline]
2692#[target_feature(enable = "avx512bw,avx512vl")]
2693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2694#[cfg_attr(test, assert_instr(vpminsb))]
2695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2696pub const fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2697    unsafe {
2698        let min = _mm_min_epi8(a, b).as_i8x16();
2699        transmute(simd_select_bitmask(k, min, i8x16::ZERO))
2700    }
2701}
2702
2703/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2704///
2705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu16_mask&expand=1050)
2706#[inline]
2707#[target_feature(enable = "avx512bw")]
2708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2709#[cfg_attr(test, assert_instr(vpcmp))]
2710#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2711pub const fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2712    unsafe { simd_bitmask::<u16x32, _>(simd_lt(a.as_u16x32(), b.as_u16x32())) }
2713}
2714
2715/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2716///
2717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu16_mask&expand=1051)
2718#[inline]
2719#[target_feature(enable = "avx512bw")]
2720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2721#[cfg_attr(test, assert_instr(vpcmp))]
2722#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2723pub const fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2724    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2725}
2726
2727/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2728///
2729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu16_mask&expand=1050)
2730#[inline]
2731#[target_feature(enable = "avx512bw,avx512vl")]
2732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2733#[cfg_attr(test, assert_instr(vpcmp))]
2734#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2735pub const fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2736    unsafe { simd_bitmask::<u16x16, _>(simd_lt(a.as_u16x16(), b.as_u16x16())) }
2737}
2738
2739/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2740///
2741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu16_mask&expand=1049)
2742#[inline]
2743#[target_feature(enable = "avx512bw,avx512vl")]
2744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2745#[cfg_attr(test, assert_instr(vpcmp))]
2746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2747pub const fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2748    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2749}
2750
2751/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2752///
2753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu16_mask&expand=1018)
2754#[inline]
2755#[target_feature(enable = "avx512bw,avx512vl")]
2756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2757#[cfg_attr(test, assert_instr(vpcmp))]
2758#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2759pub const fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2760    unsafe { simd_bitmask::<u16x8, _>(simd_lt(a.as_u16x8(), b.as_u16x8())) }
2761}
2762
2763/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2764///
2765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu16_mask&expand=1019)
2766#[inline]
2767#[target_feature(enable = "avx512bw,avx512vl")]
2768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2769#[cfg_attr(test, assert_instr(vpcmp))]
2770#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2771pub const fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2772    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2773}
2774
2775/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2776///
2777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_cmplt_epu8_mask&expand=1068)
2778#[inline]
2779#[target_feature(enable = "avx512bw")]
2780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2781#[cfg_attr(test, assert_instr(vpcmp))]
2782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2783pub const fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2784    unsafe { simd_bitmask::<u8x64, _>(simd_lt(a.as_u8x64(), b.as_u8x64())) }
2785}
2786
2787/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2788///
2789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu8_mask&expand=1069)
2790#[inline]
2791#[target_feature(enable = "avx512bw")]
2792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2793#[cfg_attr(test, assert_instr(vpcmp))]
2794#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2795pub const fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2796    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2797}
2798
2799/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2800///
2801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu8_mask&expand=1066)
2802#[inline]
2803#[target_feature(enable = "avx512bw,avx512vl")]
2804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2805#[cfg_attr(test, assert_instr(vpcmp))]
2806#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2807pub const fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2808    unsafe { simd_bitmask::<u8x32, _>(simd_lt(a.as_u8x32(), b.as_u8x32())) }
2809}
2810
2811/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2812///
2813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu8_mask&expand=1067)
2814#[inline]
2815#[target_feature(enable = "avx512bw,avx512vl")]
2816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2817#[cfg_attr(test, assert_instr(vpcmp))]
2818#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2819pub const fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2820    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2821}
2822
2823/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2824///
2825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu8_mask&expand=1064)
2826#[inline]
2827#[target_feature(enable = "avx512bw,avx512vl")]
2828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2829#[cfg_attr(test, assert_instr(vpcmp))]
2830#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2831pub const fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2832    unsafe { simd_bitmask::<u8x16, _>(simd_lt(a.as_u8x16(), b.as_u8x16())) }
2833}
2834
2835/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2836///
2837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu8_mask&expand=1065)
2838#[inline]
2839#[target_feature(enable = "avx512bw,avx512vl")]
2840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2841#[cfg_attr(test, assert_instr(vpcmp))]
2842#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2843pub const fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2844    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2845}
2846
2847/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2848///
2849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi16_mask&expand=1022)
2850#[inline]
2851#[target_feature(enable = "avx512bw")]
2852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2853#[cfg_attr(test, assert_instr(vpcmp))]
2854#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2855pub const fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2856    unsafe { simd_bitmask::<i16x32, _>(simd_lt(a.as_i16x32(), b.as_i16x32())) }
2857}
2858
2859/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2860///
2861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi16_mask&expand=1023)
2862#[inline]
2863#[target_feature(enable = "avx512bw")]
2864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2865#[cfg_attr(test, assert_instr(vpcmp))]
2866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2867pub const fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2868    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2869}
2870
2871/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2872///
2873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi16_mask&expand=1020)
2874#[inline]
2875#[target_feature(enable = "avx512bw,avx512vl")]
2876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2877#[cfg_attr(test, assert_instr(vpcmp))]
2878#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2879pub const fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2880    unsafe { simd_bitmask::<i16x16, _>(simd_lt(a.as_i16x16(), b.as_i16x16())) }
2881}
2882
2883/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2884///
2885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi16_mask&expand=1021)
2886#[inline]
2887#[target_feature(enable = "avx512bw,avx512vl")]
2888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2889#[cfg_attr(test, assert_instr(vpcmp))]
2890#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2891pub const fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2892    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2893}
2894
2895/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2896///
2897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16_mask&expand=1018)
2898#[inline]
2899#[target_feature(enable = "avx512bw,avx512vl")]
2900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2901#[cfg_attr(test, assert_instr(vpcmp))]
2902#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2903pub const fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2904    unsafe { simd_bitmask::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
2905}
2906
2907/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2908///
2909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi16_mask&expand=1019)
2910#[inline]
2911#[target_feature(enable = "avx512bw,avx512vl")]
2912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2913#[cfg_attr(test, assert_instr(vpcmp))]
2914#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2915pub const fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2916    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2917}
2918
2919/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2920///
2921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi8_mask&expand=1044)
2922#[inline]
2923#[target_feature(enable = "avx512bw")]
2924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2925#[cfg_attr(test, assert_instr(vpcmp))]
2926#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2927pub const fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2928    unsafe { simd_bitmask::<i8x64, _>(simd_lt(a.as_i8x64(), b.as_i8x64())) }
2929}
2930
2931/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2932///
2933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi8_mask&expand=1045)
2934#[inline]
2935#[target_feature(enable = "avx512bw")]
2936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2937#[cfg_attr(test, assert_instr(vpcmp))]
2938#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2939pub const fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2940    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2941}
2942
2943/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2944///
2945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi8_mask&expand=1042)
2946#[inline]
2947#[target_feature(enable = "avx512bw,avx512vl")]
2948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2949#[cfg_attr(test, assert_instr(vpcmp))]
2950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2951pub const fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2952    unsafe { simd_bitmask::<i8x32, _>(simd_lt(a.as_i8x32(), b.as_i8x32())) }
2953}
2954
2955/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2956///
2957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi8_mask&expand=1043)
2958#[inline]
2959#[target_feature(enable = "avx512bw,avx512vl")]
2960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2961#[cfg_attr(test, assert_instr(vpcmp))]
2962#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2963pub const fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2964    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2965}
2966
2967/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2968///
2969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8_mask&expand=1040)
2970#[inline]
2971#[target_feature(enable = "avx512bw,avx512vl")]
2972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2973#[cfg_attr(test, assert_instr(vpcmp))]
2974#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2975pub const fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2976    unsafe { simd_bitmask::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
2977}
2978
2979/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2980///
2981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi8_mask&expand=1041)
2982#[inline]
2983#[target_feature(enable = "avx512bw,avx512vl")]
2984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2985#[cfg_attr(test, assert_instr(vpcmp))]
2986#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2987pub const fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2988    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2989}
2990
2991/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2992///
2993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu16_mask&expand=927)
2994#[inline]
2995#[target_feature(enable = "avx512bw")]
2996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2997#[cfg_attr(test, assert_instr(vpcmp))]
2998#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2999pub const fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3000    unsafe { simd_bitmask::<u16x32, _>(simd_gt(a.as_u16x32(), b.as_u16x32())) }
3001}
3002
3003/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3004///
3005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu16_mask&expand=928)
3006#[inline]
3007#[target_feature(enable = "avx512bw")]
3008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3009#[cfg_attr(test, assert_instr(vpcmp))]
3010#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3011pub const fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3012    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3013}
3014
3015/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3016///
3017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu16_mask&expand=925)
3018#[inline]
3019#[target_feature(enable = "avx512bw,avx512vl")]
3020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3021#[cfg_attr(test, assert_instr(vpcmp))]
3022#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3023pub const fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3024    unsafe { simd_bitmask::<u16x16, _>(simd_gt(a.as_u16x16(), b.as_u16x16())) }
3025}
3026
3027/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3028///
3029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu16_mask&expand=926)
3030#[inline]
3031#[target_feature(enable = "avx512bw,avx512vl")]
3032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3033#[cfg_attr(test, assert_instr(vpcmp))]
3034#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3035pub const fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3036    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3037}
3038
3039/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3040///
3041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu16_mask&expand=923)
3042#[inline]
3043#[target_feature(enable = "avx512bw,avx512vl")]
3044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3045#[cfg_attr(test, assert_instr(vpcmp))]
3046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3047pub const fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3048    unsafe { simd_bitmask::<u16x8, _>(simd_gt(a.as_u16x8(), b.as_u16x8())) }
3049}
3050
3051/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3052///
3053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu16_mask&expand=924)
3054#[inline]
3055#[target_feature(enable = "avx512bw,avx512vl")]
3056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3057#[cfg_attr(test, assert_instr(vpcmp))]
3058#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3059pub const fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3060    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3061}
3062
3063/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3064///
3065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu8_mask&expand=945)
3066#[inline]
3067#[target_feature(enable = "avx512bw")]
3068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3069#[cfg_attr(test, assert_instr(vpcmp))]
3070#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3071pub const fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3072    unsafe { simd_bitmask::<u8x64, _>(simd_gt(a.as_u8x64(), b.as_u8x64())) }
3073}
3074
3075/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3076///
3077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu8_mask&expand=946)
3078#[inline]
3079#[target_feature(enable = "avx512bw")]
3080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3081#[cfg_attr(test, assert_instr(vpcmp))]
3082#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3083pub const fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3084    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3085}
3086
3087/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3088///
3089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu8_mask&expand=943)
3090#[inline]
3091#[target_feature(enable = "avx512bw,avx512vl")]
3092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3093#[cfg_attr(test, assert_instr(vpcmp))]
3094#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3095pub const fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3096    unsafe { simd_bitmask::<u8x32, _>(simd_gt(a.as_u8x32(), b.as_u8x32())) }
3097}
3098
3099/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3100///
3101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu8_mask&expand=944)
3102#[inline]
3103#[target_feature(enable = "avx512bw,avx512vl")]
3104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3105#[cfg_attr(test, assert_instr(vpcmp))]
3106#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3107pub const fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3108    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3109}
3110
3111/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3112///
3113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu8_mask&expand=941)
3114#[inline]
3115#[target_feature(enable = "avx512bw,avx512vl")]
3116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3117#[cfg_attr(test, assert_instr(vpcmp))]
3118#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3119pub const fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3120    unsafe { simd_bitmask::<u8x16, _>(simd_gt(a.as_u8x16(), b.as_u8x16())) }
3121}
3122
3123/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3124///
3125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu8_mask&expand=942)
3126#[inline]
3127#[target_feature(enable = "avx512bw,avx512vl")]
3128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3129#[cfg_attr(test, assert_instr(vpcmp))]
3130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3131pub const fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3132    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3133}
3134
3135/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3136///
3137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi16_mask&expand=897)
3138#[inline]
3139#[target_feature(enable = "avx512bw")]
3140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3141#[cfg_attr(test, assert_instr(vpcmp))]
3142#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3143pub const fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3144    unsafe { simd_bitmask::<i16x32, _>(simd_gt(a.as_i16x32(), b.as_i16x32())) }
3145}
3146
3147/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3148///
3149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi16_mask&expand=898)
3150#[inline]
3151#[target_feature(enable = "avx512bw")]
3152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3153#[cfg_attr(test, assert_instr(vpcmp))]
3154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3155pub const fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3156    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3157}
3158
3159/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3160///
3161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16_mask&expand=895)
3162#[inline]
3163#[target_feature(enable = "avx512bw,avx512vl")]
3164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3165#[cfg_attr(test, assert_instr(vpcmp))]
3166#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3167pub const fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3168    unsafe { simd_bitmask::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
3169}
3170
3171/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3172///
3173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi16_mask&expand=896)
3174#[inline]
3175#[target_feature(enable = "avx512bw,avx512vl")]
3176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3177#[cfg_attr(test, assert_instr(vpcmp))]
3178#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3179pub const fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3180    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3181}
3182
3183/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
3184///
3185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16_mask&expand=893)
3186#[inline]
3187#[target_feature(enable = "avx512bw,avx512vl")]
3188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3189#[cfg_attr(test, assert_instr(vpcmp))]
3190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3191pub const fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3192    unsafe { simd_bitmask::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
3193}
3194
3195/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3196///
3197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi16_mask&expand=894)
3198#[inline]
3199#[target_feature(enable = "avx512bw,avx512vl")]
3200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3201#[cfg_attr(test, assert_instr(vpcmp))]
3202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3203pub const fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3204    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
3205}
3206
3207/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3208///
3209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi8_mask&expand=921)
3210#[inline]
3211#[target_feature(enable = "avx512bw")]
3212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3213#[cfg_attr(test, assert_instr(vpcmp))]
3214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3215pub const fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3216    unsafe { simd_bitmask::<i8x64, _>(simd_gt(a.as_i8x64(), b.as_i8x64())) }
3217}
3218
3219/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3220///
3221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi8_mask&expand=922)
3222#[inline]
3223#[target_feature(enable = "avx512bw")]
3224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3225#[cfg_attr(test, assert_instr(vpcmp))]
3226#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3227pub const fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3228    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3229}
3230
3231/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3232///
3233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8_mask&expand=919)
3234#[inline]
3235#[target_feature(enable = "avx512bw,avx512vl")]
3236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3237#[cfg_attr(test, assert_instr(vpcmp))]
3238#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3239pub const fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3240    unsafe { simd_bitmask::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
3241}
3242
3243/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3244///
3245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi8_mask&expand=920)
3246#[inline]
3247#[target_feature(enable = "avx512bw,avx512vl")]
3248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3249#[cfg_attr(test, assert_instr(vpcmp))]
3250#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3251pub const fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3252    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3253}
3254
3255/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
3256///
3257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8_mask&expand=917)
3258#[inline]
3259#[target_feature(enable = "avx512bw,avx512vl")]
3260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3261#[cfg_attr(test, assert_instr(vpcmp))]
3262#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3263pub const fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3264    unsafe { simd_bitmask::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
3265}
3266
3267/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3268///
3269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi8_mask&expand=918)
3270#[inline]
3271#[target_feature(enable = "avx512bw,avx512vl")]
3272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3273#[cfg_attr(test, assert_instr(vpcmp))]
3274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3275pub const fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3276    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
3277}
3278
3279/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3280///
3281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu16_mask&expand=989)
3282#[inline]
3283#[target_feature(enable = "avx512bw")]
3284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3285#[cfg_attr(test, assert_instr(vpcmp))]
3286#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3287pub const fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3288    unsafe { simd_bitmask::<u16x32, _>(simd_le(a.as_u16x32(), b.as_u16x32())) }
3289}
3290
3291/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3292///
3293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu16_mask&expand=990)
3294#[inline]
3295#[target_feature(enable = "avx512bw")]
3296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3297#[cfg_attr(test, assert_instr(vpcmp))]
3298#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3299pub const fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3300    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3301}
3302
3303/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3304///
3305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu16_mask&expand=987)
3306#[inline]
3307#[target_feature(enable = "avx512bw,avx512vl")]
3308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3309#[cfg_attr(test, assert_instr(vpcmp))]
3310#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3311pub const fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3312    unsafe { simd_bitmask::<u16x16, _>(simd_le(a.as_u16x16(), b.as_u16x16())) }
3313}
3314
3315/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3316///
3317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu16_mask&expand=988)
3318#[inline]
3319#[target_feature(enable = "avx512bw,avx512vl")]
3320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3321#[cfg_attr(test, assert_instr(vpcmp))]
3322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3323pub const fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3324    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3325}
3326
3327/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3328///
3329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu16_mask&expand=985)
3330#[inline]
3331#[target_feature(enable = "avx512bw,avx512vl")]
3332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3333#[cfg_attr(test, assert_instr(vpcmp))]
3334#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3335pub const fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3336    unsafe { simd_bitmask::<u16x8, _>(simd_le(a.as_u16x8(), b.as_u16x8())) }
3337}
3338
3339/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3340///
3341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu16_mask&expand=986)
3342#[inline]
3343#[target_feature(enable = "avx512bw,avx512vl")]
3344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3345#[cfg_attr(test, assert_instr(vpcmp))]
3346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3347pub const fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3348    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
3349}
3350
3351/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3352///
3353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu8_mask&expand=1007)
3354#[inline]
3355#[target_feature(enable = "avx512bw")]
3356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3357#[cfg_attr(test, assert_instr(vpcmp))]
3358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3359pub const fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3360    unsafe { simd_bitmask::<u8x64, _>(simd_le(a.as_u8x64(), b.as_u8x64())) }
3361}
3362
3363/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3364///
3365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu8_mask&expand=1008)
3366#[inline]
3367#[target_feature(enable = "avx512bw")]
3368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3369#[cfg_attr(test, assert_instr(vpcmp))]
3370#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3371pub const fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3372    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3373}
3374
3375/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3376///
3377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu8_mask&expand=1005)
3378#[inline]
3379#[target_feature(enable = "avx512bw,avx512vl")]
3380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3381#[cfg_attr(test, assert_instr(vpcmp))]
3382#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3383pub const fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3384    unsafe { simd_bitmask::<u8x32, _>(simd_le(a.as_u8x32(), b.as_u8x32())) }
3385}
3386
3387/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3388///
3389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu8_mask&expand=1006)
3390#[inline]
3391#[target_feature(enable = "avx512bw,avx512vl")]
3392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3393#[cfg_attr(test, assert_instr(vpcmp))]
3394#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3395pub const fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3396    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3397}
3398
3399/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
3400///
3401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu8_mask&expand=1003)
3402#[inline]
3403#[target_feature(enable = "avx512bw,avx512vl")]
3404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3405#[cfg_attr(test, assert_instr(vpcmp))]
3406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3407pub const fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3408    unsafe { simd_bitmask::<u8x16, _>(simd_le(a.as_u8x16(), b.as_u8x16())) }
3409}
3410
3411/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3412///
3413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu8_mask&expand=1004)
3414#[inline]
3415#[target_feature(enable = "avx512bw,avx512vl")]
3416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3417#[cfg_attr(test, assert_instr(vpcmp))]
3418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3419pub const fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3420    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
3421}
3422
3423/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3424///
3425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi16_mask&expand=965)
3426#[inline]
3427#[target_feature(enable = "avx512bw")]
3428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3429#[cfg_attr(test, assert_instr(vpcmp))]
3430#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3431pub const fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3432    unsafe { simd_bitmask::<i16x32, _>(simd_le(a.as_i16x32(), b.as_i16x32())) }
3433}
3434
3435/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3436///
3437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi16_mask&expand=966)
3438#[inline]
3439#[target_feature(enable = "avx512bw")]
3440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3441#[cfg_attr(test, assert_instr(vpcmp))]
3442#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3443pub const fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3444    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3445}
3446
3447/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3448///
3449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi16_mask&expand=963)
3450#[inline]
3451#[target_feature(enable = "avx512bw,avx512vl")]
3452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3453#[cfg_attr(test, assert_instr(vpcmp))]
3454#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3455pub const fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3456    unsafe { simd_bitmask::<i16x16, _>(simd_le(a.as_i16x16(), b.as_i16x16())) }
3457}
3458
3459/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3460///
3461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi16_mask&expand=964)
3462#[inline]
3463#[target_feature(enable = "avx512bw,avx512vl")]
3464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3465#[cfg_attr(test, assert_instr(vpcmp))]
3466#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3467pub const fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3468    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3469}
3470
3471/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3472///
3473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi16_mask&expand=961)
3474#[inline]
3475#[target_feature(enable = "avx512bw,avx512vl")]
3476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3477#[cfg_attr(test, assert_instr(vpcmp))]
3478#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3479pub const fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3480    unsafe { simd_bitmask::<i16x8, _>(simd_le(a.as_i16x8(), b.as_i16x8())) }
3481}
3482
3483/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3484///
3485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi16_mask&expand=962)
3486#[inline]
3487#[target_feature(enable = "avx512bw,avx512vl")]
3488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3489#[cfg_attr(test, assert_instr(vpcmp))]
3490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3491pub const fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3492    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3493}
3494
3495/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3496///
3497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi8_mask&expand=983)
3498#[inline]
3499#[target_feature(enable = "avx512bw")]
3500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3501#[cfg_attr(test, assert_instr(vpcmp))]
3502#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3503pub const fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3504    unsafe { simd_bitmask::<i8x64, _>(simd_le(a.as_i8x64(), b.as_i8x64())) }
3505}
3506
3507/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3508///
3509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi8_mask&expand=984)
3510#[inline]
3511#[target_feature(enable = "avx512bw")]
3512#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3513#[cfg_attr(test, assert_instr(vpcmp))]
3514#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3515pub const fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3516    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3517}
3518
3519/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3520///
3521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi8_mask&expand=981)
3522#[inline]
3523#[target_feature(enable = "avx512bw,avx512vl")]
3524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3525#[cfg_attr(test, assert_instr(vpcmp))]
3526#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3527pub const fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3528    unsafe { simd_bitmask::<i8x32, _>(simd_le(a.as_i8x32(), b.as_i8x32())) }
3529}
3530
3531/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3532///
3533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi8_mask&expand=982)
3534#[inline]
3535#[target_feature(enable = "avx512bw,avx512vl")]
3536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3537#[cfg_attr(test, assert_instr(vpcmp))]
3538#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3539pub const fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3540    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3541}
3542
3543/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3544///
3545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi8_mask&expand=979)
3546#[inline]
3547#[target_feature(enable = "avx512bw,avx512vl")]
3548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3549#[cfg_attr(test, assert_instr(vpcmp))]
3550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3551pub const fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3552    unsafe { simd_bitmask::<i8x16, _>(simd_le(a.as_i8x16(), b.as_i8x16())) }
3553}
3554
3555/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3556///
3557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi8_mask&expand=980)
3558#[inline]
3559#[target_feature(enable = "avx512bw,avx512vl")]
3560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3561#[cfg_attr(test, assert_instr(vpcmp))]
3562#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3563pub const fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3564    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3565}
3566
3567/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3568///
3569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu16_mask&expand=867)
3570#[inline]
3571#[target_feature(enable = "avx512bw")]
3572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3573#[cfg_attr(test, assert_instr(vpcmp))]
3574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3575pub const fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3576    unsafe { simd_bitmask::<u16x32, _>(simd_ge(a.as_u16x32(), b.as_u16x32())) }
3577}
3578
3579/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3580///
3581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu16_mask&expand=868)
3582#[inline]
3583#[target_feature(enable = "avx512bw")]
3584#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3585#[cfg_attr(test, assert_instr(vpcmp))]
3586#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3587pub const fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3588    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3589}
3590
3591/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3592///
3593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu16_mask&expand=865)
3594#[inline]
3595#[target_feature(enable = "avx512bw,avx512vl")]
3596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3597#[cfg_attr(test, assert_instr(vpcmp))]
3598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3599pub const fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3600    unsafe { simd_bitmask::<u16x16, _>(simd_ge(a.as_u16x16(), b.as_u16x16())) }
3601}
3602
3603/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3604///
3605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu16_mask&expand=866)
3606#[inline]
3607#[target_feature(enable = "avx512bw,avx512vl")]
3608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3609#[cfg_attr(test, assert_instr(vpcmp))]
3610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3611pub const fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3612    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3613}
3614
3615/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3616///
3617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu16_mask&expand=863)
3618#[inline]
3619#[target_feature(enable = "avx512bw,avx512vl")]
3620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3621#[cfg_attr(test, assert_instr(vpcmp))]
3622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3623pub const fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3624    unsafe { simd_bitmask::<u16x8, _>(simd_ge(a.as_u16x8(), b.as_u16x8())) }
3625}
3626
3627/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3628///
3629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu16_mask&expand=864)
3630#[inline]
3631#[target_feature(enable = "avx512bw,avx512vl")]
3632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3633#[cfg_attr(test, assert_instr(vpcmp))]
3634#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3635pub const fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3636    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3637}
3638
3639/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3640///
3641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu8_mask&expand=885)
3642#[inline]
3643#[target_feature(enable = "avx512bw")]
3644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3645#[cfg_attr(test, assert_instr(vpcmp))]
3646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3647pub const fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3648    unsafe { simd_bitmask::<u8x64, _>(simd_ge(a.as_u8x64(), b.as_u8x64())) }
3649}
3650
3651/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3652///
3653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu8_mask&expand=886)
3654#[inline]
3655#[target_feature(enable = "avx512bw")]
3656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3657#[cfg_attr(test, assert_instr(vpcmp))]
3658#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3659pub const fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3660    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3661}
3662
3663/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3664///
3665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu8_mask&expand=883)
3666#[inline]
3667#[target_feature(enable = "avx512bw,avx512vl")]
3668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3669#[cfg_attr(test, assert_instr(vpcmp))]
3670#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3671pub const fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3672    unsafe { simd_bitmask::<u8x32, _>(simd_ge(a.as_u8x32(), b.as_u8x32())) }
3673}
3674
3675/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3676///
3677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu8_mask&expand=884)
3678#[inline]
3679#[target_feature(enable = "avx512bw,avx512vl")]
3680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3681#[cfg_attr(test, assert_instr(vpcmp))]
3682#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3683pub const fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3684    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3685}
3686
3687/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3688///
3689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu8_mask&expand=881)
3690#[inline]
3691#[target_feature(enable = "avx512bw,avx512vl")]
3692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3693#[cfg_attr(test, assert_instr(vpcmp))]
3694#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3695pub const fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3696    unsafe { simd_bitmask::<u8x16, _>(simd_ge(a.as_u8x16(), b.as_u8x16())) }
3697}
3698
3699/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3700///
3701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu8_mask&expand=882)
3702#[inline]
3703#[target_feature(enable = "avx512bw,avx512vl")]
3704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3705#[cfg_attr(test, assert_instr(vpcmp))]
3706#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3707pub const fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3708    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3709}
3710
3711/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3712///
3713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi16_mask&expand=843)
3714#[inline]
3715#[target_feature(enable = "avx512bw")]
3716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3717#[cfg_attr(test, assert_instr(vpcmp))]
3718#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3719pub const fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3720    unsafe { simd_bitmask::<i16x32, _>(simd_ge(a.as_i16x32(), b.as_i16x32())) }
3721}
3722
3723/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3724///
3725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi16_mask&expand=844)
3726#[inline]
3727#[target_feature(enable = "avx512bw")]
3728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3729#[cfg_attr(test, assert_instr(vpcmp))]
3730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3731pub const fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3732    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3733}
3734
3735/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3736///
3737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi16_mask&expand=841)
3738#[inline]
3739#[target_feature(enable = "avx512bw,avx512vl")]
3740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3741#[cfg_attr(test, assert_instr(vpcmp))]
3742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3743pub const fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3744    unsafe { simd_bitmask::<i16x16, _>(simd_ge(a.as_i16x16(), b.as_i16x16())) }
3745}
3746
3747/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3748///
3749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi16_mask&expand=842)
3750#[inline]
3751#[target_feature(enable = "avx512bw,avx512vl")]
3752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3753#[cfg_attr(test, assert_instr(vpcmp))]
3754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3755pub const fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3756    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3757}
3758
3759/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3760///
3761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi16_mask&expand=839)
3762#[inline]
3763#[target_feature(enable = "avx512bw,avx512vl")]
3764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3765#[cfg_attr(test, assert_instr(vpcmp))]
3766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3767pub const fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3768    unsafe { simd_bitmask::<i16x8, _>(simd_ge(a.as_i16x8(), b.as_i16x8())) }
3769}
3770
3771/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3772///
3773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi16_mask&expand=840)
3774#[inline]
3775#[target_feature(enable = "avx512bw,avx512vl")]
3776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3777#[cfg_attr(test, assert_instr(vpcmp))]
3778#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3779pub const fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3780    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3781}
3782
3783/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3784///
3785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi8_mask&expand=861)
3786#[inline]
3787#[target_feature(enable = "avx512bw")]
3788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3789#[cfg_attr(test, assert_instr(vpcmp))]
3790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3791pub const fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3792    unsafe { simd_bitmask::<i8x64, _>(simd_ge(a.as_i8x64(), b.as_i8x64())) }
3793}
3794
3795/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3796///
3797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi8_mask&expand=862)
3798#[inline]
3799#[target_feature(enable = "avx512bw")]
3800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3801#[cfg_attr(test, assert_instr(vpcmp))]
3802#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3803pub const fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3804    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3805}
3806
3807/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3808///
3809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi8_mask&expand=859)
3810#[inline]
3811#[target_feature(enable = "avx512bw,avx512vl")]
3812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3813#[cfg_attr(test, assert_instr(vpcmp))]
3814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3815pub const fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3816    unsafe { simd_bitmask::<i8x32, _>(simd_ge(a.as_i8x32(), b.as_i8x32())) }
3817}
3818
3819/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3820///
3821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi8_mask&expand=860)
3822#[inline]
3823#[target_feature(enable = "avx512bw,avx512vl")]
3824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3825#[cfg_attr(test, assert_instr(vpcmp))]
3826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3827pub const fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3828    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3829}
3830
3831/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3832///
3833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi8_mask&expand=857)
3834#[inline]
3835#[target_feature(enable = "avx512bw,avx512vl")]
3836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3837#[cfg_attr(test, assert_instr(vpcmp))]
3838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3839pub const fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3840    unsafe { simd_bitmask::<i8x16, _>(simd_ge(a.as_i8x16(), b.as_i8x16())) }
3841}
3842
3843/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3844///
3845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi8_mask&expand=858)
3846#[inline]
3847#[target_feature(enable = "avx512bw,avx512vl")]
3848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3849#[cfg_attr(test, assert_instr(vpcmp))]
3850#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3851pub const fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3852    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3853}
3854
3855/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3856///
3857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu16_mask&expand=801)
3858#[inline]
3859#[target_feature(enable = "avx512bw")]
3860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3861#[cfg_attr(test, assert_instr(vpcmp))]
3862#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3863pub const fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3864    unsafe { simd_bitmask::<u16x32, _>(simd_eq(a.as_u16x32(), b.as_u16x32())) }
3865}
3866
3867/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3868///
3869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu16_mask&expand=802)
3870#[inline]
3871#[target_feature(enable = "avx512bw")]
3872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3873#[cfg_attr(test, assert_instr(vpcmp))]
3874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3875pub const fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3876    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3877}
3878
3879/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3880///
3881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu16_mask&expand=799)
3882#[inline]
3883#[target_feature(enable = "avx512bw,avx512vl")]
3884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3885#[cfg_attr(test, assert_instr(vpcmp))]
3886#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3887pub const fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3888    unsafe { simd_bitmask::<u16x16, _>(simd_eq(a.as_u16x16(), b.as_u16x16())) }
3889}
3890
3891/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3892///
3893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu16_mask&expand=800)
3894#[inline]
3895#[target_feature(enable = "avx512bw,avx512vl")]
3896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3897#[cfg_attr(test, assert_instr(vpcmp))]
3898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3899pub const fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3900    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3901}
3902
3903/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3904///
3905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu16_mask&expand=797)
3906#[inline]
3907#[target_feature(enable = "avx512bw,avx512vl")]
3908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3909#[cfg_attr(test, assert_instr(vpcmp))]
3910#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3911pub const fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3912    unsafe { simd_bitmask::<u16x8, _>(simd_eq(a.as_u16x8(), b.as_u16x8())) }
3913}
3914
3915/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3916///
3917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu16_mask&expand=798)
3918#[inline]
3919#[target_feature(enable = "avx512bw,avx512vl")]
3920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3921#[cfg_attr(test, assert_instr(vpcmp))]
3922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3923pub const fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3924    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3925}
3926
3927/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3928///
3929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu8_mask&expand=819)
3930#[inline]
3931#[target_feature(enable = "avx512bw")]
3932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3933#[cfg_attr(test, assert_instr(vpcmp))]
3934#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3935pub const fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3936    unsafe { simd_bitmask::<u8x64, _>(simd_eq(a.as_u8x64(), b.as_u8x64())) }
3937}
3938
3939/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3940///
3941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu8_mask&expand=820)
3942#[inline]
3943#[target_feature(enable = "avx512bw")]
3944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3945#[cfg_attr(test, assert_instr(vpcmp))]
3946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3947pub const fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3948    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3949}
3950
3951/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3952///
3953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu8_mask&expand=817)
3954#[inline]
3955#[target_feature(enable = "avx512bw,avx512vl")]
3956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3957#[cfg_attr(test, assert_instr(vpcmp))]
3958#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3959pub const fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3960    unsafe { simd_bitmask::<u8x32, _>(simd_eq(a.as_u8x32(), b.as_u8x32())) }
3961}
3962
3963/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3964///
3965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu8_mask&expand=818)
3966#[inline]
3967#[target_feature(enable = "avx512bw,avx512vl")]
3968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3969#[cfg_attr(test, assert_instr(vpcmp))]
3970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3971pub const fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3972    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3973}
3974
3975/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3976///
3977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu8_mask&expand=815)
3978#[inline]
3979#[target_feature(enable = "avx512bw,avx512vl")]
3980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3981#[cfg_attr(test, assert_instr(vpcmp))]
3982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3983pub const fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3984    unsafe { simd_bitmask::<u8x16, _>(simd_eq(a.as_u8x16(), b.as_u8x16())) }
3985}
3986
3987/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3988///
3989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu8_mask&expand=816)
3990#[inline]
3991#[target_feature(enable = "avx512bw,avx512vl")]
3992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3993#[cfg_attr(test, assert_instr(vpcmp))]
3994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3995pub const fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3996    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3997}
3998
3999/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
4000///
4001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi16_mask&expand=771)
4002#[inline]
4003#[target_feature(enable = "avx512bw")]
4004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4005#[cfg_attr(test, assert_instr(vpcmp))]
4006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4007pub const fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
4008    unsafe { simd_bitmask::<i16x32, _>(simd_eq(a.as_i16x32(), b.as_i16x32())) }
4009}
4010
4011/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4012///
4013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi16_mask&expand=772)
4014#[inline]
4015#[target_feature(enable = "avx512bw")]
4016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4017#[cfg_attr(test, assert_instr(vpcmp))]
4018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4019pub const fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4020    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
4021}
4022
4023/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
4024///
4025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16_mask&expand=769)
4026#[inline]
4027#[target_feature(enable = "avx512bw,avx512vl")]
4028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4029#[cfg_attr(test, assert_instr(vpcmp))]
4030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4031pub const fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4032    unsafe { simd_bitmask::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
4033}
4034
4035/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4036///
4037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi16_mask&expand=770)
4038#[inline]
4039#[target_feature(enable = "avx512bw,avx512vl")]
4040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4041#[cfg_attr(test, assert_instr(vpcmp))]
4042#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4043pub const fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4044    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
4045}
4046
4047/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
4048///
4049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16_mask&expand=767)
4050#[inline]
4051#[target_feature(enable = "avx512bw,avx512vl")]
4052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4053#[cfg_attr(test, assert_instr(vpcmp))]
4054#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4055pub const fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4056    unsafe { simd_bitmask::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
4057}
4058
4059/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4060///
4061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi16_mask&expand=768)
4062#[inline]
4063#[target_feature(enable = "avx512bw,avx512vl")]
4064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4065#[cfg_attr(test, assert_instr(vpcmp))]
4066#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4067pub const fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4068    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
4069}
4070
4071/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
4072///
4073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi8_mask&expand=795)
4074#[inline]
4075#[target_feature(enable = "avx512bw")]
4076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4077#[cfg_attr(test, assert_instr(vpcmp))]
4078#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4079pub const fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4080    unsafe { simd_bitmask::<i8x64, _>(simd_eq(a.as_i8x64(), b.as_i8x64())) }
4081}
4082
4083/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4084///
4085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi8_mask&expand=796)
4086#[inline]
4087#[target_feature(enable = "avx512bw")]
4088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4089#[cfg_attr(test, assert_instr(vpcmp))]
4090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4091pub const fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4092    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
4093}
4094
4095/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
4096///
4097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8_mask&expand=793)
4098#[inline]
4099#[target_feature(enable = "avx512bw,avx512vl")]
4100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4101#[cfg_attr(test, assert_instr(vpcmp))]
4102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4103pub const fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4104    unsafe { simd_bitmask::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
4105}
4106
4107/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4108///
4109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi8_mask&expand=794)
4110#[inline]
4111#[target_feature(enable = "avx512bw,avx512vl")]
4112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4113#[cfg_attr(test, assert_instr(vpcmp))]
4114#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4115pub const fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4116    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
4117}
4118
4119/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
4120///
4121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8_mask&expand=791)
4122#[inline]
4123#[target_feature(enable = "avx512bw,avx512vl")]
4124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4125#[cfg_attr(test, assert_instr(vpcmp))]
4126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4127pub const fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4128    unsafe { simd_bitmask::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
4129}
4130
4131/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4132///
4133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi8_mask&expand=792)
4134#[inline]
4135#[target_feature(enable = "avx512bw,avx512vl")]
4136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4137#[cfg_attr(test, assert_instr(vpcmp))]
4138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4139pub const fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4140    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
4141}
4142
4143/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4144///
4145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu16_mask&expand=1106)
4146#[inline]
4147#[target_feature(enable = "avx512bw")]
4148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4149#[cfg_attr(test, assert_instr(vpcmp))]
4150#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4151pub const fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
4152    unsafe { simd_bitmask::<u16x32, _>(simd_ne(a.as_u16x32(), b.as_u16x32())) }
4153}
4154
4155/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4156///
4157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu16_mask&expand=1107)
4158#[inline]
4159#[target_feature(enable = "avx512bw")]
4160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4161#[cfg_attr(test, assert_instr(vpcmp))]
4162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4163pub const fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4164    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
4165}
4166
4167/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4168///
4169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu16_mask&expand=1104)
4170#[inline]
4171#[target_feature(enable = "avx512bw,avx512vl")]
4172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4173#[cfg_attr(test, assert_instr(vpcmp))]
4174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4175pub const fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4176    unsafe { simd_bitmask::<u16x16, _>(simd_ne(a.as_u16x16(), b.as_u16x16())) }
4177}
4178
4179/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4180///
4181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu16_mask&expand=1105)
4182#[inline]
4183#[target_feature(enable = "avx512bw,avx512vl")]
4184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4185#[cfg_attr(test, assert_instr(vpcmp))]
4186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4187pub const fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4188    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
4189}
4190
4191/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4192///
4193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu16_mask&expand=1102)
4194#[inline]
4195#[target_feature(enable = "avx512bw,avx512vl")]
4196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4197#[cfg_attr(test, assert_instr(vpcmp))]
4198#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4199pub const fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4200    unsafe { simd_bitmask::<u16x8, _>(simd_ne(a.as_u16x8(), b.as_u16x8())) }
4201}
4202
4203/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4204///
4205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu16_mask&expand=1103)
4206#[inline]
4207#[target_feature(enable = "avx512bw,avx512vl")]
4208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4209#[cfg_attr(test, assert_instr(vpcmp))]
4210#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4211pub const fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4212    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
4213}
4214
4215/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4216///
4217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu8_mask&expand=1124)
4218#[inline]
4219#[target_feature(enable = "avx512bw")]
4220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4221#[cfg_attr(test, assert_instr(vpcmp))]
4222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4223pub const fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4224    unsafe { simd_bitmask::<u8x64, _>(simd_ne(a.as_u8x64(), b.as_u8x64())) }
4225}
4226
4227/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4228///
4229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu8_mask&expand=1125)
4230#[inline]
4231#[target_feature(enable = "avx512bw")]
4232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4233#[cfg_attr(test, assert_instr(vpcmp))]
4234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4235pub const fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4236    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
4237}
4238
4239/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4240///
4241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu8_mask&expand=1122)
4242#[inline]
4243#[target_feature(enable = "avx512bw,avx512vl")]
4244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4245#[cfg_attr(test, assert_instr(vpcmp))]
4246#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4247pub const fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4248    unsafe { simd_bitmask::<u8x32, _>(simd_ne(a.as_u8x32(), b.as_u8x32())) }
4249}
4250
4251/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4252///
4253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu8_mask&expand=1123)
4254#[inline]
4255#[target_feature(enable = "avx512bw,avx512vl")]
4256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4257#[cfg_attr(test, assert_instr(vpcmp))]
4258#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4259pub const fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4260    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
4261}
4262
4263/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4264///
4265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu8_mask&expand=1120)
4266#[inline]
4267#[target_feature(enable = "avx512bw,avx512vl")]
4268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4269#[cfg_attr(test, assert_instr(vpcmp))]
4270#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4271pub const fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4272    unsafe { simd_bitmask::<u8x16, _>(simd_ne(a.as_u8x16(), b.as_u8x16())) }
4273}
4274
4275/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4276///
4277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu8_mask&expand=1121)
4278#[inline]
4279#[target_feature(enable = "avx512bw,avx512vl")]
4280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4281#[cfg_attr(test, assert_instr(vpcmp))]
4282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4283pub const fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4284    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
4285}
4286
4287/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4288///
4289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi16_mask&expand=1082)
4290#[inline]
4291#[target_feature(enable = "avx512bw")]
4292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4293#[cfg_attr(test, assert_instr(vpcmp))]
4294#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4295pub const fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
4296    unsafe { simd_bitmask::<i16x32, _>(simd_ne(a.as_i16x32(), b.as_i16x32())) }
4297}
4298
4299/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4300///
4301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi16_mask&expand=1083)
4302#[inline]
4303#[target_feature(enable = "avx512bw")]
4304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4305#[cfg_attr(test, assert_instr(vpcmp))]
4306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4307pub const fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
4308    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4309}
4310
4311/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4312///
4313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi16_mask&expand=1080)
4314#[inline]
4315#[target_feature(enable = "avx512bw,avx512vl")]
4316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4317#[cfg_attr(test, assert_instr(vpcmp))]
4318#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4319pub const fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
4320    unsafe { simd_bitmask::<i16x16, _>(simd_ne(a.as_i16x16(), b.as_i16x16())) }
4321}
4322
4323/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4324///
4325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi16_mask&expand=1081)
4326#[inline]
4327#[target_feature(enable = "avx512bw,avx512vl")]
4328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4329#[cfg_attr(test, assert_instr(vpcmp))]
4330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4331pub const fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
4332    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4333}
4334
4335/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
4336///
4337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi16_mask&expand=1078)
4338#[inline]
4339#[target_feature(enable = "avx512bw,avx512vl")]
4340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4341#[cfg_attr(test, assert_instr(vpcmp))]
4342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4343pub const fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
4344    unsafe { simd_bitmask::<i16x8, _>(simd_ne(a.as_i16x8(), b.as_i16x8())) }
4345}
4346
4347/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4348///
4349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi16_mask&expand=1079)
4350#[inline]
4351#[target_feature(enable = "avx512bw,avx512vl")]
4352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4353#[cfg_attr(test, assert_instr(vpcmp))]
4354#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4355pub const fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
4356    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
4357}
4358
4359/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4360///
4361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi8_mask&expand=1100)
4362#[inline]
4363#[target_feature(enable = "avx512bw")]
4364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4365#[cfg_attr(test, assert_instr(vpcmp))]
4366#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4367pub const fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
4368    unsafe { simd_bitmask::<i8x64, _>(simd_ne(a.as_i8x64(), b.as_i8x64())) }
4369}
4370
4371/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4372///
4373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi8_mask&expand=1101)
4374#[inline]
4375#[target_feature(enable = "avx512bw")]
4376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4377#[cfg_attr(test, assert_instr(vpcmp))]
4378#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4379pub const fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
4380    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4381}
4382
4383/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4384///
4385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi8_mask&expand=1098)
4386#[inline]
4387#[target_feature(enable = "avx512bw,avx512vl")]
4388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4389#[cfg_attr(test, assert_instr(vpcmp))]
4390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4391pub const fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
4392    unsafe { simd_bitmask::<i8x32, _>(simd_ne(a.as_i8x32(), b.as_i8x32())) }
4393}
4394
4395/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4396///
4397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi8_mask&expand=1099)
4398#[inline]
4399#[target_feature(enable = "avx512bw,avx512vl")]
4400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4401#[cfg_attr(test, assert_instr(vpcmp))]
4402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4403pub const fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
4404    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4405}
4406
4407/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
4408///
4409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi8_mask&expand=1096)
4410#[inline]
4411#[target_feature(enable = "avx512bw,avx512vl")]
4412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4413#[cfg_attr(test, assert_instr(vpcmp))]
4414#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4415pub const fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
4416    unsafe { simd_bitmask::<i8x16, _>(simd_ne(a.as_i8x16(), b.as_i8x16())) }
4417}
4418
4419/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4420///
4421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi8_mask&expand=1097)
4422#[inline]
4423#[target_feature(enable = "avx512bw,avx512vl")]
4424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4425#[cfg_attr(test, assert_instr(vpcmp))]
4426#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4427pub const fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
4428    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
4429}
4430
4431/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k.
4432///
4433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu16_mask&expand=715)
4434#[inline]
4435#[target_feature(enable = "avx512bw")]
4436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4437#[rustc_legacy_const_generics(2)]
4438#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4439#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4440pub const fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4441    unsafe {
4442        static_assert_uimm_bits!(IMM8, 3);
4443        let a = a.as_u16x32();
4444        let b = b.as_u16x32();
4445        let r = match IMM8 {
4446            0 => simd_eq(a, b),
4447            1 => simd_lt(a, b),
4448            2 => simd_le(a, b),
4449            3 => i16x32::ZERO,
4450            4 => simd_ne(a, b),
4451            5 => simd_ge(a, b),
4452            6 => simd_gt(a, b),
4453            _ => i16x32::splat(-1),
4454        };
4455        simd_bitmask(r)
4456    }
4457}
4458
4459/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4460///
4461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu16_mask&expand=716)
4462#[inline]
4463#[target_feature(enable = "avx512bw")]
4464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4465#[rustc_legacy_const_generics(3)]
4466#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4467#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4468pub const fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
4469    k1: __mmask32,
4470    a: __m512i,
4471    b: __m512i,
4472) -> __mmask32 {
4473    unsafe {
4474        static_assert_uimm_bits!(IMM8, 3);
4475        let a = a.as_u16x32();
4476        let b = b.as_u16x32();
4477        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4478        let r = match IMM8 {
4479            0 => simd_and(k1, simd_eq(a, b)),
4480            1 => simd_and(k1, simd_lt(a, b)),
4481            2 => simd_and(k1, simd_le(a, b)),
4482            3 => i16x32::ZERO,
4483            4 => simd_and(k1, simd_ne(a, b)),
4484            5 => simd_and(k1, simd_ge(a, b)),
4485            6 => simd_and(k1, simd_gt(a, b)),
4486            _ => k1,
4487        };
4488        simd_bitmask(r)
4489    }
4490}
4491
4492/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4493///
4494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu16_mask&expand=713)
4495#[inline]
4496#[target_feature(enable = "avx512bw,avx512vl")]
4497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4498#[rustc_legacy_const_generics(2)]
4499#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4500#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4501pub const fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4502    unsafe {
4503        static_assert_uimm_bits!(IMM8, 3);
4504        let a = a.as_u16x16();
4505        let b = b.as_u16x16();
4506        let r = match IMM8 {
4507            0 => simd_eq(a, b),
4508            1 => simd_lt(a, b),
4509            2 => simd_le(a, b),
4510            3 => i16x16::ZERO,
4511            4 => simd_ne(a, b),
4512            5 => simd_ge(a, b),
4513            6 => simd_gt(a, b),
4514            _ => i16x16::splat(-1),
4515        };
4516        simd_bitmask(r)
4517    }
4518}
4519
4520/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4521///
4522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu16_mask&expand=714)
4523#[inline]
4524#[target_feature(enable = "avx512bw,avx512vl")]
4525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4526#[rustc_legacy_const_generics(3)]
4527#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4528#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4529pub const fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
4530    k1: __mmask16,
4531    a: __m256i,
4532    b: __m256i,
4533) -> __mmask16 {
4534    unsafe {
4535        static_assert_uimm_bits!(IMM8, 3);
4536        let a = a.as_u16x16();
4537        let b = b.as_u16x16();
4538        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4539        let r = match IMM8 {
4540            0 => simd_and(k1, simd_eq(a, b)),
4541            1 => simd_and(k1, simd_lt(a, b)),
4542            2 => simd_and(k1, simd_le(a, b)),
4543            3 => i16x16::ZERO,
4544            4 => simd_and(k1, simd_ne(a, b)),
4545            5 => simd_and(k1, simd_ge(a, b)),
4546            6 => simd_and(k1, simd_gt(a, b)),
4547            _ => k1,
4548        };
4549        simd_bitmask(r)
4550    }
4551}
4552
4553/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4554///
4555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu16_mask&expand=711)
4556#[inline]
4557#[target_feature(enable = "avx512bw,avx512vl")]
4558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4559#[rustc_legacy_const_generics(2)]
4560#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4562pub const fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4563    unsafe {
4564        static_assert_uimm_bits!(IMM8, 3);
4565        let a = a.as_u16x8();
4566        let b = b.as_u16x8();
4567        let r = match IMM8 {
4568            0 => simd_eq(a, b),
4569            1 => simd_lt(a, b),
4570            2 => simd_le(a, b),
4571            3 => i16x8::ZERO,
4572            4 => simd_ne(a, b),
4573            5 => simd_ge(a, b),
4574            6 => simd_gt(a, b),
4575            _ => i16x8::splat(-1),
4576        };
4577        simd_bitmask(r)
4578    }
4579}
4580
4581/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4582///
4583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu16_mask&expand=712)
4584#[inline]
4585#[target_feature(enable = "avx512bw,avx512vl")]
4586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4587#[rustc_legacy_const_generics(3)]
4588#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4590pub const fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(
4591    k1: __mmask8,
4592    a: __m128i,
4593    b: __m128i,
4594) -> __mmask8 {
4595    unsafe {
4596        static_assert_uimm_bits!(IMM8, 3);
4597        let a = a.as_u16x8();
4598        let b = b.as_u16x8();
4599        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4600        let r = match IMM8 {
4601            0 => simd_and(k1, simd_eq(a, b)),
4602            1 => simd_and(k1, simd_lt(a, b)),
4603            2 => simd_and(k1, simd_le(a, b)),
4604            3 => i16x8::ZERO,
4605            4 => simd_and(k1, simd_ne(a, b)),
4606            5 => simd_and(k1, simd_ge(a, b)),
4607            6 => simd_and(k1, simd_gt(a, b)),
4608            _ => k1,
4609        };
4610        simd_bitmask(r)
4611    }
4612}
4613
4614/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4615///
4616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu8_mask&expand=733)
4617#[inline]
4618#[target_feature(enable = "avx512bw")]
4619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4620#[rustc_legacy_const_generics(2)]
4621#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4623pub const fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4624    unsafe {
4625        static_assert_uimm_bits!(IMM8, 3);
4626        let a = a.as_u8x64();
4627        let b = b.as_u8x64();
4628        let r = match IMM8 {
4629            0 => simd_eq(a, b),
4630            1 => simd_lt(a, b),
4631            2 => simd_le(a, b),
4632            3 => i8x64::ZERO,
4633            4 => simd_ne(a, b),
4634            5 => simd_ge(a, b),
4635            6 => simd_gt(a, b),
4636            _ => i8x64::splat(-1),
4637        };
4638        simd_bitmask(r)
4639    }
4640}
4641
4642/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4643///
4644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu8_mask&expand=734)
4645#[inline]
4646#[target_feature(enable = "avx512bw")]
4647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4648#[rustc_legacy_const_generics(3)]
4649#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4651pub const fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
4652    k1: __mmask64,
4653    a: __m512i,
4654    b: __m512i,
4655) -> __mmask64 {
4656    unsafe {
4657        static_assert_uimm_bits!(IMM8, 3);
4658        let a = a.as_u8x64();
4659        let b = b.as_u8x64();
4660        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4661        let r = match IMM8 {
4662            0 => simd_and(k1, simd_eq(a, b)),
4663            1 => simd_and(k1, simd_lt(a, b)),
4664            2 => simd_and(k1, simd_le(a, b)),
4665            3 => i8x64::ZERO,
4666            4 => simd_and(k1, simd_ne(a, b)),
4667            5 => simd_and(k1, simd_ge(a, b)),
4668            6 => simd_and(k1, simd_gt(a, b)),
4669            _ => k1,
4670        };
4671        simd_bitmask(r)
4672    }
4673}
4674
4675/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4676///
4677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu8_mask&expand=731)
4678#[inline]
4679#[target_feature(enable = "avx512bw,avx512vl")]
4680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4681#[rustc_legacy_const_generics(2)]
4682#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4684pub const fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4685    unsafe {
4686        static_assert_uimm_bits!(IMM8, 3);
4687        let a = a.as_u8x32();
4688        let b = b.as_u8x32();
4689        let r = match IMM8 {
4690            0 => simd_eq(a, b),
4691            1 => simd_lt(a, b),
4692            2 => simd_le(a, b),
4693            3 => i8x32::ZERO,
4694            4 => simd_ne(a, b),
4695            5 => simd_ge(a, b),
4696            6 => simd_gt(a, b),
4697            _ => i8x32::splat(-1),
4698        };
4699        simd_bitmask(r)
4700    }
4701}
4702
4703/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4704///
4705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu8_mask&expand=732)
4706#[inline]
4707#[target_feature(enable = "avx512bw,avx512vl")]
4708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4709#[rustc_legacy_const_generics(3)]
4710#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4711#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4712pub const fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
4713    k1: __mmask32,
4714    a: __m256i,
4715    b: __m256i,
4716) -> __mmask32 {
4717    unsafe {
4718        static_assert_uimm_bits!(IMM8, 3);
4719        let a = a.as_u8x32();
4720        let b = b.as_u8x32();
4721        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4722        let r = match IMM8 {
4723            0 => simd_and(k1, simd_eq(a, b)),
4724            1 => simd_and(k1, simd_lt(a, b)),
4725            2 => simd_and(k1, simd_le(a, b)),
4726            3 => i8x32::ZERO,
4727            4 => simd_and(k1, simd_ne(a, b)),
4728            5 => simd_and(k1, simd_ge(a, b)),
4729            6 => simd_and(k1, simd_gt(a, b)),
4730            _ => k1,
4731        };
4732        simd_bitmask(r)
4733    }
4734}
4735
4736/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4737///
4738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu8_mask&expand=729)
4739#[inline]
4740#[target_feature(enable = "avx512bw,avx512vl")]
4741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4742#[rustc_legacy_const_generics(2)]
4743#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4745pub const fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4746    unsafe {
4747        static_assert_uimm_bits!(IMM8, 3);
4748        let a = a.as_u8x16();
4749        let b = b.as_u8x16();
4750        let r = match IMM8 {
4751            0 => simd_eq(a, b),
4752            1 => simd_lt(a, b),
4753            2 => simd_le(a, b),
4754            3 => i8x16::ZERO,
4755            4 => simd_ne(a, b),
4756            5 => simd_ge(a, b),
4757            6 => simd_gt(a, b),
4758            _ => i8x16::splat(-1),
4759        };
4760        simd_bitmask(r)
4761    }
4762}
4763
4764/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4765///
4766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu8_mask&expand=730)
4767#[inline]
4768#[target_feature(enable = "avx512bw,avx512vl")]
4769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4770#[rustc_legacy_const_generics(3)]
4771#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4773pub const fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(
4774    k1: __mmask16,
4775    a: __m128i,
4776    b: __m128i,
4777) -> __mmask16 {
4778    unsafe {
4779        static_assert_uimm_bits!(IMM8, 3);
4780        let a = a.as_u8x16();
4781        let b = b.as_u8x16();
4782        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4783        let r = match IMM8 {
4784            0 => simd_and(k1, simd_eq(a, b)),
4785            1 => simd_and(k1, simd_lt(a, b)),
4786            2 => simd_and(k1, simd_le(a, b)),
4787            3 => i8x16::ZERO,
4788            4 => simd_and(k1, simd_ne(a, b)),
4789            5 => simd_and(k1, simd_ge(a, b)),
4790            6 => simd_and(k1, simd_gt(a, b)),
4791            _ => k1,
4792        };
4793        simd_bitmask(r)
4794    }
4795}
4796
4797/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4798///
4799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi16_mask&expand=691)
4800#[inline]
4801#[target_feature(enable = "avx512bw")]
4802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4803#[rustc_legacy_const_generics(2)]
4804#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4805#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4806pub const fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4807    unsafe {
4808        static_assert_uimm_bits!(IMM8, 3);
4809        let a = a.as_i16x32();
4810        let b = b.as_i16x32();
4811        let r = match IMM8 {
4812            0 => simd_eq(a, b),
4813            1 => simd_lt(a, b),
4814            2 => simd_le(a, b),
4815            3 => i16x32::ZERO,
4816            4 => simd_ne(a, b),
4817            5 => simd_ge(a, b),
4818            6 => simd_gt(a, b),
4819            _ => i16x32::splat(-1),
4820        };
4821        simd_bitmask(r)
4822    }
4823}
4824
4825/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4826///
4827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi16_mask&expand=692)
4828#[inline]
4829#[target_feature(enable = "avx512bw")]
4830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4831#[rustc_legacy_const_generics(3)]
4832#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4833#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4834pub const fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
4835    k1: __mmask32,
4836    a: __m512i,
4837    b: __m512i,
4838) -> __mmask32 {
4839    unsafe {
4840        static_assert_uimm_bits!(IMM8, 3);
4841        let a = a.as_i16x32();
4842        let b = b.as_i16x32();
4843        let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4844        let r = match IMM8 {
4845            0 => simd_and(k1, simd_eq(a, b)),
4846            1 => simd_and(k1, simd_lt(a, b)),
4847            2 => simd_and(k1, simd_le(a, b)),
4848            3 => i16x32::ZERO,
4849            4 => simd_and(k1, simd_ne(a, b)),
4850            5 => simd_and(k1, simd_ge(a, b)),
4851            6 => simd_and(k1, simd_gt(a, b)),
4852            _ => k1,
4853        };
4854        simd_bitmask(r)
4855    }
4856}
4857
4858/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4859///
4860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi16_mask&expand=689)
4861#[inline]
4862#[target_feature(enable = "avx512bw,avx512vl")]
4863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4864#[rustc_legacy_const_generics(2)]
4865#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4867pub const fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4868    unsafe {
4869        static_assert_uimm_bits!(IMM8, 3);
4870        let a = a.as_i16x16();
4871        let b = b.as_i16x16();
4872        let r = match IMM8 {
4873            0 => simd_eq(a, b),
4874            1 => simd_lt(a, b),
4875            2 => simd_le(a, b),
4876            3 => i16x16::ZERO,
4877            4 => simd_ne(a, b),
4878            5 => simd_ge(a, b),
4879            6 => simd_gt(a, b),
4880            _ => i16x16::splat(-1),
4881        };
4882        simd_bitmask(r)
4883    }
4884}
4885
4886/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4887///
4888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi16_mask&expand=690)
4889#[inline]
4890#[target_feature(enable = "avx512bw,avx512vl")]
4891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4892#[rustc_legacy_const_generics(3)]
4893#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4894#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4895pub const fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
4896    k1: __mmask16,
4897    a: __m256i,
4898    b: __m256i,
4899) -> __mmask16 {
4900    unsafe {
4901        static_assert_uimm_bits!(IMM8, 3);
4902        let a = a.as_i16x16();
4903        let b = b.as_i16x16();
4904        let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4905        let r = match IMM8 {
4906            0 => simd_and(k1, simd_eq(a, b)),
4907            1 => simd_and(k1, simd_lt(a, b)),
4908            2 => simd_and(k1, simd_le(a, b)),
4909            3 => i16x16::ZERO,
4910            4 => simd_and(k1, simd_ne(a, b)),
4911            5 => simd_and(k1, simd_ge(a, b)),
4912            6 => simd_and(k1, simd_gt(a, b)),
4913            _ => k1,
4914        };
4915        simd_bitmask(r)
4916    }
4917}
4918
4919/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4920///
4921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi16_mask&expand=687)
4922#[inline]
4923#[target_feature(enable = "avx512bw,avx512vl")]
4924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4925#[rustc_legacy_const_generics(2)]
4926#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4927#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4928pub const fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4929    unsafe {
4930        static_assert_uimm_bits!(IMM8, 3);
4931        let a = a.as_i16x8();
4932        let b = b.as_i16x8();
4933        let r = match IMM8 {
4934            0 => simd_eq(a, b),
4935            1 => simd_lt(a, b),
4936            2 => simd_le(a, b),
4937            3 => i16x8::ZERO,
4938            4 => simd_ne(a, b),
4939            5 => simd_ge(a, b),
4940            6 => simd_gt(a, b),
4941            _ => i16x8::splat(-1),
4942        };
4943        simd_bitmask(r)
4944    }
4945}
4946
4947/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4948///
4949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi16_mask&expand=688)
4950#[inline]
4951#[target_feature(enable = "avx512bw,avx512vl")]
4952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4953#[rustc_legacy_const_generics(3)]
4954#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4955#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4956pub const fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(
4957    k1: __mmask8,
4958    a: __m128i,
4959    b: __m128i,
4960) -> __mmask8 {
4961    unsafe {
4962        static_assert_uimm_bits!(IMM8, 3);
4963        let a = a.as_i16x8();
4964        let b = b.as_i16x8();
4965        let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4966        let r = match IMM8 {
4967            0 => simd_and(k1, simd_eq(a, b)),
4968            1 => simd_and(k1, simd_lt(a, b)),
4969            2 => simd_and(k1, simd_le(a, b)),
4970            3 => i16x8::ZERO,
4971            4 => simd_and(k1, simd_ne(a, b)),
4972            5 => simd_and(k1, simd_ge(a, b)),
4973            6 => simd_and(k1, simd_gt(a, b)),
4974            _ => k1,
4975        };
4976        simd_bitmask(r)
4977    }
4978}
4979
4980/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4981///
4982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi8_mask&expand=709)
4983#[inline]
4984#[target_feature(enable = "avx512bw")]
4985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4986#[rustc_legacy_const_generics(2)]
4987#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4989pub const fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4990    unsafe {
4991        static_assert_uimm_bits!(IMM8, 3);
4992        let a = a.as_i8x64();
4993        let b = b.as_i8x64();
4994        let r = match IMM8 {
4995            0 => simd_eq(a, b),
4996            1 => simd_lt(a, b),
4997            2 => simd_le(a, b),
4998            3 => i8x64::ZERO,
4999            4 => simd_ne(a, b),
5000            5 => simd_ge(a, b),
5001            6 => simd_gt(a, b),
5002            _ => i8x64::splat(-1),
5003        };
5004        simd_bitmask(r)
5005    }
5006}
5007
5008/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
5009///
5010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi8_mask&expand=710)
5011#[inline]
5012#[target_feature(enable = "avx512bw")]
5013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5014#[rustc_legacy_const_generics(3)]
5015#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5016#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5017pub const fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
5018    k1: __mmask64,
5019    a: __m512i,
5020    b: __m512i,
5021) -> __mmask64 {
5022    unsafe {
5023        static_assert_uimm_bits!(IMM8, 3);
5024        let a = a.as_i8x64();
5025        let b = b.as_i8x64();
5026        let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
5027        let r = match IMM8 {
5028            0 => simd_and(k1, simd_eq(a, b)),
5029            1 => simd_and(k1, simd_lt(a, b)),
5030            2 => simd_and(k1, simd_le(a, b)),
5031            3 => i8x64::ZERO,
5032            4 => simd_and(k1, simd_ne(a, b)),
5033            5 => simd_and(k1, simd_ge(a, b)),
5034            6 => simd_and(k1, simd_gt(a, b)),
5035            _ => k1,
5036        };
5037        simd_bitmask(r)
5038    }
5039}
5040
5041/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
5042///
5043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi8_mask&expand=707)
5044#[inline]
5045#[target_feature(enable = "avx512bw,avx512vl")]
5046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5047#[rustc_legacy_const_generics(2)]
5048#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5049#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5050pub const fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
5051    unsafe {
5052        static_assert_uimm_bits!(IMM8, 3);
5053        let a = a.as_i8x32();
5054        let b = b.as_i8x32();
5055        let r = match IMM8 {
5056            0 => simd_eq(a, b),
5057            1 => simd_lt(a, b),
5058            2 => simd_le(a, b),
5059            3 => i8x32::ZERO,
5060            4 => simd_ne(a, b),
5061            5 => simd_ge(a, b),
5062            6 => simd_gt(a, b),
5063            _ => i8x32::splat(-1),
5064        };
5065        simd_bitmask(r)
5066    }
5067}
5068
5069/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
5070///
5071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi8_mask&expand=708)
5072#[inline]
5073#[target_feature(enable = "avx512bw,avx512vl")]
5074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5075#[rustc_legacy_const_generics(3)]
5076#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5078pub const fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
5079    k1: __mmask32,
5080    a: __m256i,
5081    b: __m256i,
5082) -> __mmask32 {
5083    unsafe {
5084        static_assert_uimm_bits!(IMM8, 3);
5085        let a = a.as_i8x32();
5086        let b = b.as_i8x32();
5087        let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
5088        let r = match IMM8 {
5089            0 => simd_and(k1, simd_eq(a, b)),
5090            1 => simd_and(k1, simd_lt(a, b)),
5091            2 => simd_and(k1, simd_le(a, b)),
5092            3 => i8x32::ZERO,
5093            4 => simd_and(k1, simd_ne(a, b)),
5094            5 => simd_and(k1, simd_ge(a, b)),
5095            6 => simd_and(k1, simd_gt(a, b)),
5096            _ => k1,
5097        };
5098        simd_bitmask(r)
5099    }
5100}
5101
5102/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
5103///
5104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi8_mask&expand=705)
5105#[inline]
5106#[target_feature(enable = "avx512bw,avx512vl")]
5107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5108#[rustc_legacy_const_generics(2)]
5109#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5111pub const fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
5112    unsafe {
5113        static_assert_uimm_bits!(IMM8, 3);
5114        let a = a.as_i8x16();
5115        let b = b.as_i8x16();
5116        let r = match IMM8 {
5117            0 => simd_eq(a, b),
5118            1 => simd_lt(a, b),
5119            2 => simd_le(a, b),
5120            3 => i8x16::ZERO,
5121            4 => simd_ne(a, b),
5122            5 => simd_ge(a, b),
5123            6 => simd_gt(a, b),
5124            _ => i8x16::splat(-1),
5125        };
5126        simd_bitmask(r)
5127    }
5128}
5129
5130/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi8_mask&expand=706)
5133#[inline]
5134#[target_feature(enable = "avx512bw,avx512vl")]
5135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5136#[rustc_legacy_const_generics(3)]
5137#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
5138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5139pub const fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(
5140    k1: __mmask16,
5141    a: __m128i,
5142    b: __m128i,
5143) -> __mmask16 {
5144    unsafe {
5145        static_assert_uimm_bits!(IMM8, 3);
5146        let a = a.as_i8x16();
5147        let b = b.as_i8x16();
5148        let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
5149        let r = match IMM8 {
5150            0 => simd_and(k1, simd_eq(a, b)),
5151            1 => simd_and(k1, simd_lt(a, b)),
5152            2 => simd_and(k1, simd_le(a, b)),
5153            3 => i8x16::ZERO,
5154            4 => simd_and(k1, simd_ne(a, b)),
5155            5 => simd_and(k1, simd_ge(a, b)),
5156            6 => simd_and(k1, simd_gt(a, b)),
5157            _ => k1,
5158        };
5159        simd_bitmask(r)
5160    }
5161}
5162
5163/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
5164///
5165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi16)
5166#[inline]
5167#[target_feature(enable = "avx512bw,avx512vl")]
5168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5170pub const fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
5171    unsafe { simd_reduce_add_ordered(a.as_i16x16(), 0) }
5172}
5173
5174/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5175///
5176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi16)
5177#[inline]
5178#[target_feature(enable = "avx512bw,avx512vl")]
5179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5181pub const fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
5182    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO), 0) }
5183}
5184
5185/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
5186///
5187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi16)
5188#[inline]
5189#[target_feature(enable = "avx512bw,avx512vl")]
5190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5191#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5192pub const fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
5193    unsafe { simd_reduce_add_ordered(a.as_i16x8(), 0) }
5194}
5195
5196/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5197///
5198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi16)
5199#[inline]
5200#[target_feature(enable = "avx512bw,avx512vl")]
5201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5203pub const fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
5204    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO), 0) }
5205}
5206
5207/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
5208///
5209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi8)
5210#[inline]
5211#[target_feature(enable = "avx512bw,avx512vl")]
5212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5214pub const fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
5215    unsafe { simd_reduce_add_ordered(a.as_i8x32(), 0) }
5216}
5217
5218/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5219///
5220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi8)
5221#[inline]
5222#[target_feature(enable = "avx512bw,avx512vl")]
5223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5224#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5225pub const fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
5226    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO), 0) }
5227}
5228
5229/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
5230///
5231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi8)
5232#[inline]
5233#[target_feature(enable = "avx512bw,avx512vl")]
5234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5235#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5236pub const fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
5237    unsafe { simd_reduce_add_ordered(a.as_i8x16(), 0) }
5238}
5239
5240/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi8)
5243#[inline]
5244#[target_feature(enable = "avx512bw,avx512vl")]
5245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5246#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5247pub const fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
5248    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO), 0) }
5249}
5250
5251/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5252///
5253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi16)
5254#[inline]
5255#[target_feature(enable = "avx512bw,avx512vl")]
5256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5257#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5258pub const fn _mm256_reduce_and_epi16(a: __m256i) -> i16 {
5259    unsafe { simd_reduce_and(a.as_i16x16()) }
5260}
5261
5262/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5263///
5264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi16)
5265#[inline]
5266#[target_feature(enable = "avx512bw,avx512vl")]
5267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5269pub const fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 {
5270    unsafe {
5271        simd_reduce_and(simd_select_bitmask(
5272            k,
5273            a.as_i16x16(),
5274            _mm256_set1_epi64x(-1).as_i16x16(),
5275        ))
5276    }
5277}
5278
5279/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5280///
5281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi16)
5282#[inline]
5283#[target_feature(enable = "avx512bw,avx512vl")]
5284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5286pub const fn _mm_reduce_and_epi16(a: __m128i) -> i16 {
5287    unsafe { simd_reduce_and(a.as_i16x8()) }
5288}
5289
5290/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5291///
5292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi16)
5293#[inline]
5294#[target_feature(enable = "avx512bw,avx512vl")]
5295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5297pub const fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 {
5298    unsafe {
5299        simd_reduce_and(simd_select_bitmask(
5300            k,
5301            a.as_i16x8(),
5302            _mm_set1_epi64x(-1).as_i16x8(),
5303        ))
5304    }
5305}
5306
5307/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5308///
5309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi8)
5310#[inline]
5311#[target_feature(enable = "avx512bw,avx512vl")]
5312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5313#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5314pub const fn _mm256_reduce_and_epi8(a: __m256i) -> i8 {
5315    unsafe { simd_reduce_and(a.as_i8x32()) }
5316}
5317
5318/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5319///
5320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi8)
5321#[inline]
5322#[target_feature(enable = "avx512bw,avx512vl")]
5323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5324#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5325pub const fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 {
5326    unsafe {
5327        simd_reduce_and(simd_select_bitmask(
5328            k,
5329            a.as_i8x32(),
5330            _mm256_set1_epi64x(-1).as_i8x32(),
5331        ))
5332    }
5333}
5334
5335/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
5336///
5337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi8)
5338#[inline]
5339#[target_feature(enable = "avx512bw,avx512vl")]
5340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5342pub const fn _mm_reduce_and_epi8(a: __m128i) -> i8 {
5343    unsafe { simd_reduce_and(a.as_i8x16()) }
5344}
5345
5346/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
5347///
5348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi8)
5349#[inline]
5350#[target_feature(enable = "avx512bw,avx512vl")]
5351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5352#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5353pub const fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 {
5354    unsafe {
5355        simd_reduce_and(simd_select_bitmask(
5356            k,
5357            a.as_i8x16(),
5358            _mm_set1_epi64x(-1).as_i8x16(),
5359        ))
5360    }
5361}
5362
5363/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5364///
5365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi16)
5366#[inline]
5367#[target_feature(enable = "avx512bw,avx512vl")]
5368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5369#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5370pub const fn _mm256_reduce_max_epi16(a: __m256i) -> i16 {
5371    unsafe { simd_reduce_max(a.as_i16x16()) }
5372}
5373
5374/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5375///
5376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi16)
5377#[inline]
5378#[target_feature(enable = "avx512bw,avx512vl")]
5379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5380#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5381pub const fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 {
5382    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768))) }
5383}
5384
5385/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5386///
5387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi16)
5388#[inline]
5389#[target_feature(enable = "avx512bw,avx512vl")]
5390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5392pub const fn _mm_reduce_max_epi16(a: __m128i) -> i16 {
5393    unsafe { simd_reduce_max(a.as_i16x8()) }
5394}
5395
5396/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5397///
5398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi16)
5399#[inline]
5400#[target_feature(enable = "avx512bw,avx512vl")]
5401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5403pub const fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 {
5404    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768))) }
5405}
5406
5407/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5408///
5409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi8)
5410#[inline]
5411#[target_feature(enable = "avx512bw,avx512vl")]
5412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5413#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5414pub const fn _mm256_reduce_max_epi8(a: __m256i) -> i8 {
5415    unsafe { simd_reduce_max(a.as_i8x32()) }
5416}
5417
5418/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5419///
5420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi8)
5421#[inline]
5422#[target_feature(enable = "avx512bw,avx512vl")]
5423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5424#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5425pub const fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 {
5426    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128))) }
5427}
5428
5429/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5430///
5431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi8)
5432#[inline]
5433#[target_feature(enable = "avx512bw,avx512vl")]
5434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5436pub const fn _mm_reduce_max_epi8(a: __m128i) -> i8 {
5437    unsafe { simd_reduce_max(a.as_i8x16()) }
5438}
5439
5440/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi8)
5443#[inline]
5444#[target_feature(enable = "avx512bw,avx512vl")]
5445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5447pub const fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 {
5448    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128))) }
5449}
5450
5451/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5452///
5453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu16)
5454#[inline]
5455#[target_feature(enable = "avx512bw,avx512vl")]
5456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5457#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5458pub const fn _mm256_reduce_max_epu16(a: __m256i) -> u16 {
5459    unsafe { simd_reduce_max(a.as_u16x16()) }
5460}
5461
5462/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5463///
5464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu16)
5465#[inline]
5466#[target_feature(enable = "avx512bw,avx512vl")]
5467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5468#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5469pub const fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 {
5470    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO)) }
5471}
5472
5473/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
5474///
5475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu16)
5476#[inline]
5477#[target_feature(enable = "avx512bw,avx512vl")]
5478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5479#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5480pub const fn _mm_reduce_max_epu16(a: __m128i) -> u16 {
5481    unsafe { simd_reduce_max(a.as_u16x8()) }
5482}
5483
5484/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5485///
5486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu16)
5487#[inline]
5488#[target_feature(enable = "avx512bw,avx512vl")]
5489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5491pub const fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 {
5492    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO)) }
5493}
5494
5495/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5496///
5497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu8)
5498#[inline]
5499#[target_feature(enable = "avx512bw,avx512vl")]
5500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5502pub const fn _mm256_reduce_max_epu8(a: __m256i) -> u8 {
5503    unsafe { simd_reduce_max(a.as_u8x32()) }
5504}
5505
5506/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5507///
5508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu8)
5509#[inline]
5510#[target_feature(enable = "avx512bw,avx512vl")]
5511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5512#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5513pub const fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 {
5514    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO)) }
5515}
5516
5517/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
5518///
5519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu8)
5520#[inline]
5521#[target_feature(enable = "avx512bw,avx512vl")]
5522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5524pub const fn _mm_reduce_max_epu8(a: __m128i) -> u8 {
5525    unsafe { simd_reduce_max(a.as_u8x16()) }
5526}
5527
5528/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
5529///
5530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu8)
5531#[inline]
5532#[target_feature(enable = "avx512bw,avx512vl")]
5533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5535pub const fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 {
5536    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO)) }
5537}
5538
5539/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5540///
5541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi16)
5542#[inline]
5543#[target_feature(enable = "avx512bw,avx512vl")]
5544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5545#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5546pub const fn _mm256_reduce_min_epi16(a: __m256i) -> i16 {
5547    unsafe { simd_reduce_min(a.as_i16x16()) }
5548}
5549
5550/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5551///
5552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi16)
5553#[inline]
5554#[target_feature(enable = "avx512bw,avx512vl")]
5555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5556#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5557pub const fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 {
5558    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff))) }
5559}
5560
5561/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5562///
5563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi16)
5564#[inline]
5565#[target_feature(enable = "avx512bw,avx512vl")]
5566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5568pub const fn _mm_reduce_min_epi16(a: __m128i) -> i16 {
5569    unsafe { simd_reduce_min(a.as_i16x8()) }
5570}
5571
5572/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5573///
5574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi16)
5575#[inline]
5576#[target_feature(enable = "avx512bw,avx512vl")]
5577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5579pub const fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 {
5580    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff))) }
5581}
5582
5583/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi8)
5586#[inline]
5587#[target_feature(enable = "avx512bw,avx512vl")]
5588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5590pub const fn _mm256_reduce_min_epi8(a: __m256i) -> i8 {
5591    unsafe { simd_reduce_min(a.as_i8x32()) }
5592}
5593
5594/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5595///
5596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi8)
5597#[inline]
5598#[target_feature(enable = "avx512bw,avx512vl")]
5599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5600#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5601pub const fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 {
5602    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f))) }
5603}
5604
5605/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5606///
5607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi8)
5608#[inline]
5609#[target_feature(enable = "avx512bw,avx512vl")]
5610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5611#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5612pub const fn _mm_reduce_min_epi8(a: __m128i) -> i8 {
5613    unsafe { simd_reduce_min(a.as_i8x16()) }
5614}
5615
5616/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5617///
5618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi8)
5619#[inline]
5620#[target_feature(enable = "avx512bw,avx512vl")]
5621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5623pub const fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 {
5624    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f))) }
5625}
5626
5627/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5628///
5629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu16)
5630#[inline]
5631#[target_feature(enable = "avx512bw,avx512vl")]
5632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5633#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5634pub const fn _mm256_reduce_min_epu16(a: __m256i) -> u16 {
5635    unsafe { simd_reduce_min(a.as_u16x16()) }
5636}
5637
5638/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5639///
5640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu16)
5641#[inline]
5642#[target_feature(enable = "avx512bw,avx512vl")]
5643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5644#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5645pub const fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 {
5646    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff))) }
5647}
5648
5649/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
5650///
5651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu16)
5652#[inline]
5653#[target_feature(enable = "avx512bw,avx512vl")]
5654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5655#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5656pub const fn _mm_reduce_min_epu16(a: __m128i) -> u16 {
5657    unsafe { simd_reduce_min(a.as_u16x8()) }
5658}
5659
5660/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5661///
5662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu16)
5663#[inline]
5664#[target_feature(enable = "avx512bw,avx512vl")]
5665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5666#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5667pub const fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 {
5668    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff))) }
5669}
5670
5671/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5672///
5673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu8)
5674#[inline]
5675#[target_feature(enable = "avx512bw,avx512vl")]
5676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5677#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5678pub const fn _mm256_reduce_min_epu8(a: __m256i) -> u8 {
5679    unsafe { simd_reduce_min(a.as_u8x32()) }
5680}
5681
5682/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5683///
5684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu8)
5685#[inline]
5686#[target_feature(enable = "avx512bw,avx512vl")]
5687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5688#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5689pub const fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 {
5690    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff))) }
5691}
5692
5693/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
5694///
5695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu8)
5696#[inline]
5697#[target_feature(enable = "avx512bw,avx512vl")]
5698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5699#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5700pub const fn _mm_reduce_min_epu8(a: __m128i) -> u8 {
5701    unsafe { simd_reduce_min(a.as_u8x16()) }
5702}
5703
5704/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5705///
5706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu8)
5707#[inline]
5708#[target_feature(enable = "avx512bw,avx512vl")]
5709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5710#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5711pub const fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
5712    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff))) }
5713}
5714
5715/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5716///
5717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi16)
5718#[inline]
5719#[target_feature(enable = "avx512bw,avx512vl")]
5720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5721#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5722pub const fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
5723    unsafe { simd_reduce_mul_ordered(a.as_i16x16(), 1) }
5724}
5725
5726/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5727///
5728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi16)
5729#[inline]
5730#[target_feature(enable = "avx512bw,avx512vl")]
5731#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5732#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5733pub const fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
5734    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1)), 1) }
5735}
5736
5737/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5738///
5739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi16)
5740#[inline]
5741#[target_feature(enable = "avx512bw,avx512vl")]
5742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5743#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5744pub const fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
5745    unsafe { simd_reduce_mul_ordered(a.as_i16x8(), 1) }
5746}
5747
5748/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5749///
5750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi16)
5751#[inline]
5752#[target_feature(enable = "avx512bw,avx512vl")]
5753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5755pub const fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
5756    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1)), 1) }
5757}
5758
5759/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5760///
5761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi8)
5762#[inline]
5763#[target_feature(enable = "avx512bw,avx512vl")]
5764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5766pub const fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
5767    unsafe { simd_reduce_mul_ordered(a.as_i8x32(), 1) }
5768}
5769
5770/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5771///
5772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi8)
5773#[inline]
5774#[target_feature(enable = "avx512bw,avx512vl")]
5775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5777pub const fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
5778    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1)), 1) }
5779}
5780
5781/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5782///
5783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi8)
5784#[inline]
5785#[target_feature(enable = "avx512bw,avx512vl")]
5786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5788pub const fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
5789    unsafe { simd_reduce_mul_ordered(a.as_i8x16(), 1) }
5790}
5791
5792/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5793///
5794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi8)
5795#[inline]
5796#[target_feature(enable = "avx512bw,avx512vl")]
5797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5798#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5799pub const fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
5800    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1)), 1) }
5801}
5802
5803/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5804///
5805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi16)
5806#[inline]
5807#[target_feature(enable = "avx512bw,avx512vl")]
5808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5810pub const fn _mm256_reduce_or_epi16(a: __m256i) -> i16 {
5811    unsafe { simd_reduce_or(a.as_i16x16()) }
5812}
5813
5814/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5815///
5816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi16)
5817#[inline]
5818#[target_feature(enable = "avx512bw,avx512vl")]
5819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5820#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5821pub const fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 {
5822    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
5823}
5824
5825/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5826///
5827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi16)
5828#[inline]
5829#[target_feature(enable = "avx512bw,avx512vl")]
5830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5832pub const fn _mm_reduce_or_epi16(a: __m128i) -> i16 {
5833    unsafe { simd_reduce_or(a.as_i16x8()) }
5834}
5835
5836/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5837///
5838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi16)
5839#[inline]
5840#[target_feature(enable = "avx512bw,avx512vl")]
5841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5842#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5843pub const fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 {
5844    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
5845}
5846
5847/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5848///
5849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi8)
5850#[inline]
5851#[target_feature(enable = "avx512bw,avx512vl")]
5852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5854pub const fn _mm256_reduce_or_epi8(a: __m256i) -> i8 {
5855    unsafe { simd_reduce_or(a.as_i8x32()) }
5856}
5857
5858/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5859///
5860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi8)
5861#[inline]
5862#[target_feature(enable = "avx512bw,avx512vl")]
5863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5865pub const fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 {
5866    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
5867}
5868
5869/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5870///
5871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi8)
5872#[inline]
5873#[target_feature(enable = "avx512bw,avx512vl")]
5874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5875#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5876pub const fn _mm_reduce_or_epi8(a: __m128i) -> i8 {
5877    unsafe { simd_reduce_or(a.as_i8x16()) }
5878}
5879
5880/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5881///
5882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi8)
5883#[inline]
5884#[target_feature(enable = "avx512bw,avx512vl")]
5885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5886#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5887pub const fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 {
5888    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
5889}
5890
5891/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5892///
5893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi16&expand=3368)
5894#[inline]
5895#[target_feature(enable = "avx512bw")]
5896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5897#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5899pub const unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
5900    ptr::read_unaligned(mem_addr as *const __m512i)
5901}
5902
5903/// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5904///
5905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi16&expand=3365)
5906#[inline]
5907#[target_feature(enable = "avx512bw,avx512vl")]
5908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5909#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5910#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5911pub const unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
5912    ptr::read_unaligned(mem_addr as *const __m256i)
5913}
5914
5915/// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5916///
5917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi16&expand=3362)
5918#[inline]
5919#[target_feature(enable = "avx512bw,avx512vl")]
5920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5921#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5923pub const unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
5924    ptr::read_unaligned(mem_addr as *const __m128i)
5925}
5926
5927/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5928///
5929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi8&expand=3395)
5930#[inline]
5931#[target_feature(enable = "avx512bw")]
5932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5933#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5934#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5935pub const unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
5936    ptr::read_unaligned(mem_addr as *const __m512i)
5937}
5938
5939/// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5940///
5941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi8&expand=3392)
5942#[inline]
5943#[target_feature(enable = "avx512bw,avx512vl")]
5944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5945#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5947pub const unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
5948    ptr::read_unaligned(mem_addr as *const __m256i)
5949}
5950
5951/// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5952///
5953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi8&expand=3389)
5954#[inline]
5955#[target_feature(enable = "avx512bw,avx512vl")]
5956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5957#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5958#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5959pub const unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
5960    ptr::read_unaligned(mem_addr as *const __m128i)
5961}
5962
5963/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5964///
5965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi16&expand=5622)
5966#[inline]
5967#[target_feature(enable = "avx512bw")]
5968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5969#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5970#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5971pub const unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
5972    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5973}
5974
5975/// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5976///
5977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi16&expand=5620)
5978#[inline]
5979#[target_feature(enable = "avx512bw,avx512vl")]
5980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5981#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5983pub const unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
5984    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5985}
5986
5987/// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5988///
5989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi16&expand=5618)
5990#[inline]
5991#[target_feature(enable = "avx512bw,avx512vl")]
5992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5993#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5995pub const unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
5996    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5997}
5998
5999/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
6000///
6001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi8&expand=5640)
6002#[inline]
6003#[target_feature(enable = "avx512bw")]
6004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6005#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
6006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6007pub const unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
6008    ptr::write_unaligned(mem_addr as *mut __m512i, a);
6009}
6010
6011/// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
6012///
6013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi8&expand=5638)
6014#[inline]
6015#[target_feature(enable = "avx512bw,avx512vl")]
6016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6017#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
6018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6019pub const unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
6020    ptr::write_unaligned(mem_addr as *mut __m256i, a);
6021}
6022
6023/// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
6024///
6025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi8&expand=5636)
6026#[inline]
6027#[target_feature(enable = "avx512bw,avx512vl")]
6028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6029#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
6030#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6031pub const unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
6032    ptr::write_unaligned(mem_addr as *mut __m128i, a);
6033}
6034
6035/// Load packed 16-bit integers from memory into dst using writemask k
6036/// (elements are copied from src when the corresponding mask bit is not set).
6037/// mem_addr does not need to be aligned on any particular boundary.
6038///
6039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi16)
6040#[inline]
6041#[target_feature(enable = "avx512bw")]
6042#[cfg_attr(test, assert_instr(vmovdqu16))]
6043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6045pub const unsafe fn _mm512_mask_loadu_epi16(
6046    src: __m512i,
6047    k: __mmask32,
6048    mem_addr: *const i16,
6049) -> __m512i {
6050    let mask = simd_select_bitmask(k, i16x32::splat(!0), i16x32::ZERO);
6051    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x32()).as_m512i()
6052}
6053
6054/// Load packed 16-bit integers from memory into dst using zeromask k
6055/// (elements are zeroed out when the corresponding mask bit is not set).
6056/// mem_addr does not need to be aligned on any particular boundary.
6057///
6058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi16)
6059#[inline]
6060#[target_feature(enable = "avx512bw")]
6061#[cfg_attr(test, assert_instr(vmovdqu16))]
6062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6063#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6064pub const unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
6065    _mm512_mask_loadu_epi16(_mm512_setzero_si512(), k, mem_addr)
6066}
6067
6068/// Load packed 8-bit integers from memory into dst using writemask k
6069/// (elements are copied from src when the corresponding mask bit is not set).
6070/// mem_addr does not need to be aligned on any particular boundary.
6071///
6072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi8)
6073#[inline]
6074#[target_feature(enable = "avx512bw")]
6075#[cfg_attr(test, assert_instr(vmovdqu8))]
6076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6078pub const unsafe fn _mm512_mask_loadu_epi8(
6079    src: __m512i,
6080    k: __mmask64,
6081    mem_addr: *const i8,
6082) -> __m512i {
6083    let mask = simd_select_bitmask(k, i8x64::splat(!0), i8x64::ZERO);
6084    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x64()).as_m512i()
6085}
6086
6087/// Load packed 8-bit integers from memory into dst using zeromask k
6088/// (elements are zeroed out when the corresponding mask bit is not set).
6089/// mem_addr does not need to be aligned on any particular boundary.
6090///
6091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi8)
6092#[inline]
6093#[target_feature(enable = "avx512bw")]
6094#[cfg_attr(test, assert_instr(vmovdqu8))]
6095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6097pub const unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
6098    _mm512_mask_loadu_epi8(_mm512_setzero_si512(), k, mem_addr)
6099}
6100
6101/// Load packed 16-bit integers from memory into dst using writemask k
6102/// (elements are copied from src when the corresponding mask bit is not set).
6103/// mem_addr does not need to be aligned on any particular boundary.
6104///
6105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi16)
6106#[inline]
6107#[target_feature(enable = "avx512bw,avx512vl")]
6108#[cfg_attr(test, assert_instr(vmovdqu16))]
6109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6111pub const unsafe fn _mm256_mask_loadu_epi16(
6112    src: __m256i,
6113    k: __mmask16,
6114    mem_addr: *const i16,
6115) -> __m256i {
6116    let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
6117    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x16()).as_m256i()
6118}
6119
6120/// Load packed 16-bit integers from memory into dst using zeromask k
6121/// (elements are zeroed out when the corresponding mask bit is not set).
6122/// mem_addr does not need to be aligned on any particular boundary.
6123///
6124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi16)
6125#[inline]
6126#[target_feature(enable = "avx512bw,avx512vl")]
6127#[cfg_attr(test, assert_instr(vmovdqu16))]
6128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6129#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6130pub const unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
6131    _mm256_mask_loadu_epi16(_mm256_setzero_si256(), k, mem_addr)
6132}
6133
6134/// Load packed 8-bit integers from memory into dst using writemask k
6135/// (elements are copied from src when the corresponding mask bit is not set).
6136/// mem_addr does not need to be aligned on any particular boundary.
6137///
6138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi8)
6139#[inline]
6140#[target_feature(enable = "avx512bw,avx512vl")]
6141#[cfg_attr(test, assert_instr(vmovdqu8))]
6142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6143#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6144pub const unsafe fn _mm256_mask_loadu_epi8(
6145    src: __m256i,
6146    k: __mmask32,
6147    mem_addr: *const i8,
6148) -> __m256i {
6149    let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
6150    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x32()).as_m256i()
6151}
6152
6153/// Load packed 8-bit integers from memory into dst using zeromask k
6154/// (elements are zeroed out when the corresponding mask bit is not set).
6155/// mem_addr does not need to be aligned on any particular boundary.
6156///
6157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi8)
6158#[inline]
6159#[target_feature(enable = "avx512bw,avx512vl")]
6160#[cfg_attr(test, assert_instr(vmovdqu8))]
6161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6163pub const unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
6164    _mm256_mask_loadu_epi8(_mm256_setzero_si256(), k, mem_addr)
6165}
6166
6167/// Load packed 16-bit integers from memory into dst using writemask k
6168/// (elements are copied from src when the corresponding mask bit is not set).
6169/// mem_addr does not need to be aligned on any particular boundary.
6170///
6171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi16)
6172#[inline]
6173#[target_feature(enable = "avx512bw,avx512vl")]
6174#[cfg_attr(test, assert_instr(vmovdqu16))]
6175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6177pub const unsafe fn _mm_mask_loadu_epi16(
6178    src: __m128i,
6179    k: __mmask8,
6180    mem_addr: *const i16,
6181) -> __m128i {
6182    let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
6183    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x8()).as_m128i()
6184}
6185
6186/// Load packed 16-bit integers from memory into dst using zeromask k
6187/// (elements are zeroed out when the corresponding mask bit is not set).
6188/// mem_addr does not need to be aligned on any particular boundary.
6189///
6190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi16)
6191#[inline]
6192#[target_feature(enable = "avx512bw,avx512vl")]
6193#[cfg_attr(test, assert_instr(vmovdqu16))]
6194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6195#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6196pub const unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
6197    _mm_mask_loadu_epi16(_mm_setzero_si128(), k, mem_addr)
6198}
6199
6200/// Load packed 8-bit integers from memory into dst using writemask k
6201/// (elements are copied from src when the corresponding mask bit is not set).
6202/// mem_addr does not need to be aligned on any particular boundary.
6203///
6204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi8)
6205#[inline]
6206#[target_feature(enable = "avx512bw,avx512vl")]
6207#[cfg_attr(test, assert_instr(vmovdqu8))]
6208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6209#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6210pub const unsafe fn _mm_mask_loadu_epi8(
6211    src: __m128i,
6212    k: __mmask16,
6213    mem_addr: *const i8,
6214) -> __m128i {
6215    let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
6216    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x16()).as_m128i()
6217}
6218
6219/// Load packed 8-bit integers from memory into dst using zeromask k
6220/// (elements are zeroed out when the corresponding mask bit is not set).
6221/// mem_addr does not need to be aligned on any particular boundary.
6222///
6223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi8)
6224#[inline]
6225#[target_feature(enable = "avx512bw,avx512vl")]
6226#[cfg_attr(test, assert_instr(vmovdqu8))]
6227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6229pub const unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
6230    _mm_mask_loadu_epi8(_mm_setzero_si128(), k, mem_addr)
6231}
6232
6233/// Store packed 16-bit integers from a into memory using writemask k.
6234/// mem_addr does not need to be aligned on any particular boundary.
6235///
6236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi16)
6237#[inline]
6238#[target_feature(enable = "avx512bw")]
6239#[cfg_attr(test, assert_instr(vmovdqu16))]
6240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6241#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6242pub const unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
6243    let mask = simd_select_bitmask(mask, i16x32::splat(!0), i16x32::ZERO);
6244    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x32());
6245}
6246
6247/// Store packed 8-bit integers from a into memory using writemask k.
6248/// mem_addr does not need to be aligned on any particular boundary.
6249///
6250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi8)
6251#[inline]
6252#[target_feature(enable = "avx512bw")]
6253#[cfg_attr(test, assert_instr(vmovdqu8))]
6254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6255#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6256pub const unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
6257    let mask = simd_select_bitmask(mask, i8x64::splat(!0), i8x64::ZERO);
6258    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x64());
6259}
6260
6261/// Store packed 16-bit integers from a into memory using writemask k.
6262/// mem_addr does not need to be aligned on any particular boundary.
6263///
6264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi16)
6265#[inline]
6266#[target_feature(enable = "avx512bw,avx512vl")]
6267#[cfg_attr(test, assert_instr(vmovdqu16))]
6268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6269#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6270pub const unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
6271    let mask = simd_select_bitmask(mask, i16x16::splat(!0), i16x16::ZERO);
6272    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x16());
6273}
6274
6275/// Store packed 8-bit integers from a into memory using writemask k.
6276/// mem_addr does not need to be aligned on any particular boundary.
6277///
6278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi8)
6279#[inline]
6280#[target_feature(enable = "avx512bw,avx512vl")]
6281#[cfg_attr(test, assert_instr(vmovdqu8))]
6282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6284pub const unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
6285    let mask = simd_select_bitmask(mask, i8x32::splat(!0), i8x32::ZERO);
6286    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x32());
6287}
6288
6289/// Store packed 16-bit integers from a into memory using writemask k.
6290/// mem_addr does not need to be aligned on any particular boundary.
6291///
6292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi16)
6293#[inline]
6294#[target_feature(enable = "avx512bw,avx512vl")]
6295#[cfg_attr(test, assert_instr(vmovdqu16))]
6296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6297#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6298pub const unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
6299    let mask = simd_select_bitmask(mask, i16x8::splat(!0), i16x8::ZERO);
6300    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x8());
6301}
6302
6303/// Store packed 8-bit integers from a into memory using writemask k.
6304/// mem_addr does not need to be aligned on any particular boundary.
6305///
6306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi8)
6307#[inline]
6308#[target_feature(enable = "avx512bw,avx512vl")]
6309#[cfg_attr(test, assert_instr(vmovdqu8))]
6310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6312pub const unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
6313    let mask = simd_select_bitmask(mask, i8x16::splat(!0), i8x16::ZERO);
6314    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x16());
6315}
6316
6317/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
6318///
6319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_madd_epi16&expand=3511)
6320#[inline]
6321#[target_feature(enable = "avx512bw")]
6322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6323#[cfg_attr(test, assert_instr(vpmaddwd))]
6324#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6325pub const fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
6326    unsafe {
6327        let r: i32x32 = simd_mul(simd_cast(a.as_i16x32()), simd_cast(b.as_i16x32()));
6328        let even: i32x16 = simd_shuffle!(
6329            r,
6330            r,
6331            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
6332        );
6333        let odd: i32x16 = simd_shuffle!(
6334            r,
6335            r,
6336            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
6337        );
6338        simd_add(even, odd).as_m512i()
6339    }
6340}
6341
6342/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6343///
6344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_madd_epi16&expand=3512)
6345#[inline]
6346#[target_feature(enable = "avx512bw")]
6347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6348#[cfg_attr(test, assert_instr(vpmaddwd))]
6349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6350pub const fn _mm512_mask_madd_epi16(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
6351    unsafe {
6352        let madd = _mm512_madd_epi16(a, b).as_i32x16();
6353        transmute(simd_select_bitmask(k, madd, src.as_i32x16()))
6354    }
6355}
6356
6357/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6358///
6359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_madd_epi16&expand=3513)
6360#[inline]
6361#[target_feature(enable = "avx512bw")]
6362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6363#[cfg_attr(test, assert_instr(vpmaddwd))]
6364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6365pub const fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
6366    unsafe {
6367        let madd = _mm512_madd_epi16(a, b).as_i32x16();
6368        transmute(simd_select_bitmask(k, madd, i32x16::ZERO))
6369    }
6370}
6371
6372/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6373///
6374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_madd_epi16&expand=3509)
6375#[inline]
6376#[target_feature(enable = "avx512bw,avx512vl")]
6377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6378#[cfg_attr(test, assert_instr(vpmaddwd))]
6379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6380pub const fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
6381    unsafe {
6382        let madd = _mm256_madd_epi16(a, b).as_i32x8();
6383        transmute(simd_select_bitmask(k, madd, src.as_i32x8()))
6384    }
6385}
6386
6387/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6388///
6389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_madd_epi16&expand=3510)
6390#[inline]
6391#[target_feature(enable = "avx512bw,avx512vl")]
6392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6393#[cfg_attr(test, assert_instr(vpmaddwd))]
6394#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6395pub const fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
6396    unsafe {
6397        let madd = _mm256_madd_epi16(a, b).as_i32x8();
6398        transmute(simd_select_bitmask(k, madd, i32x8::ZERO))
6399    }
6400}
6401
6402/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6403///
6404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_madd_epi16&expand=3506)
6405#[inline]
6406#[target_feature(enable = "avx512bw,avx512vl")]
6407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6408#[cfg_attr(test, assert_instr(vpmaddwd))]
6409#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6410pub const fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6411    unsafe {
6412        let madd = _mm_madd_epi16(a, b).as_i32x4();
6413        transmute(simd_select_bitmask(k, madd, src.as_i32x4()))
6414    }
6415}
6416
6417/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6418///
6419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_madd_epi16&expand=3507)
6420#[inline]
6421#[target_feature(enable = "avx512bw,avx512vl")]
6422#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6423#[cfg_attr(test, assert_instr(vpmaddwd))]
6424#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6425pub const fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6426    unsafe {
6427        let madd = _mm_madd_epi16(a, b).as_i32x4();
6428        transmute(simd_select_bitmask(k, madd, i32x4::ZERO))
6429    }
6430}
6431
6432/// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst.
6433///
6434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maddubs_epi16&expand=3539)
6435#[inline]
6436#[target_feature(enable = "avx512bw")]
6437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6438#[cfg_attr(test, assert_instr(vpmaddubsw))]
6439pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
6440    unsafe { transmute(vpmaddubsw(a.as_u8x64(), b.as_i8x64())) }
6441}
6442
6443/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6444///
6445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_maddubs_epi16&expand=3540)
6446#[inline]
6447#[target_feature(enable = "avx512bw")]
6448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6449#[cfg_attr(test, assert_instr(vpmaddubsw))]
6450pub fn _mm512_mask_maddubs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6451    unsafe {
6452        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
6453        transmute(simd_select_bitmask(k, madd, src.as_i16x32()))
6454    }
6455}
6456
6457/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6458///
6459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_maddubs_epi16&expand=3541)
6460#[inline]
6461#[target_feature(enable = "avx512bw")]
6462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6463#[cfg_attr(test, assert_instr(vpmaddubsw))]
6464pub fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6465    unsafe {
6466        let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
6467        transmute(simd_select_bitmask(k, madd, i16x32::ZERO))
6468    }
6469}
6470
6471/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6472///
6473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_maddubs_epi16&expand=3537)
6474#[inline]
6475#[target_feature(enable = "avx512bw,avx512vl")]
6476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6477#[cfg_attr(test, assert_instr(vpmaddubsw))]
6478pub fn _mm256_mask_maddubs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6479    unsafe {
6480        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
6481        transmute(simd_select_bitmask(k, madd, src.as_i16x16()))
6482    }
6483}
6484
6485/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6486///
6487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_maddubs_epi16&expand=3538)
6488#[inline]
6489#[target_feature(enable = "avx512bw,avx512vl")]
6490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6491#[cfg_attr(test, assert_instr(vpmaddubsw))]
6492pub fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6493    unsafe {
6494        let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
6495        transmute(simd_select_bitmask(k, madd, i16x16::ZERO))
6496    }
6497}
6498
6499/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6500///
6501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_maddubs_epi16&expand=3534)
6502#[inline]
6503#[target_feature(enable = "avx512bw,avx512vl")]
6504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6505#[cfg_attr(test, assert_instr(vpmaddubsw))]
6506pub fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6507    unsafe {
6508        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6509        transmute(simd_select_bitmask(k, madd, src.as_i16x8()))
6510    }
6511}
6512
6513/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6514///
6515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_maddubs_epi16&expand=3535)
6516#[inline]
6517#[target_feature(enable = "avx512bw,avx512vl")]
6518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6519#[cfg_attr(test, assert_instr(vpmaddubsw))]
6520pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6521    unsafe {
6522        let madd = _mm_maddubs_epi16(a, b).as_i16x8();
6523        transmute(simd_select_bitmask(k, madd, i16x8::ZERO))
6524    }
6525}
6526
6527/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst.
6528///
6529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi32&expand=4091)
6530#[inline]
6531#[target_feature(enable = "avx512bw")]
6532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6533#[cfg_attr(test, assert_instr(vpackssdw))]
6534pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
6535    unsafe { transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) }
6536}
6537
6538/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6539///
6540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi32&expand=4089)
6541#[inline]
6542#[target_feature(enable = "avx512bw")]
6543#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6544#[cfg_attr(test, assert_instr(vpackssdw))]
6545pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6546    unsafe {
6547        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6548        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6549    }
6550}
6551
6552/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6553///
6554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi32&expand=4090)
6555#[inline]
6556#[target_feature(enable = "avx512bw")]
6557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6558#[cfg_attr(test, assert_instr(vpackssdw))]
6559pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6560    unsafe {
6561        let pack = _mm512_packs_epi32(a, b).as_i16x32();
6562        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6563    }
6564}
6565
6566/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6567///
6568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi32&expand=4086)
6569#[inline]
6570#[target_feature(enable = "avx512bw,avx512vl")]
6571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6572#[cfg_attr(test, assert_instr(vpackssdw))]
6573pub fn _mm256_mask_packs_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6574    unsafe {
6575        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6576        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6577    }
6578}
6579
6580/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6581///
6582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packs_epi32&expand=4087)
6583#[inline]
6584#[target_feature(enable = "avx512bw,avx512vl")]
6585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6586#[cfg_attr(test, assert_instr(vpackssdw))]
6587pub fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6588    unsafe {
6589        let pack = _mm256_packs_epi32(a, b).as_i16x16();
6590        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6591    }
6592}
6593
6594/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6595///
6596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi32&expand=4083)
6597#[inline]
6598#[target_feature(enable = "avx512bw,avx512vl")]
6599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6600#[cfg_attr(test, assert_instr(vpackssdw))]
6601pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6602    unsafe {
6603        let pack = _mm_packs_epi32(a, b).as_i16x8();
6604        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6605    }
6606}
6607
6608/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6609///
6610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi32&expand=4084)
6611#[inline]
6612#[target_feature(enable = "avx512bw,avx512vl")]
6613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6614#[cfg_attr(test, assert_instr(vpackssdw))]
6615pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6616    unsafe {
6617        let pack = _mm_packs_epi32(a, b).as_i16x8();
6618        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6619    }
6620}
6621
6622/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst.
6623///
6624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi16&expand=4082)
6625#[inline]
6626#[target_feature(enable = "avx512bw")]
6627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6628#[cfg_attr(test, assert_instr(vpacksswb))]
6629pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
6630    unsafe { transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) }
6631}
6632
6633/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6634///
6635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi16&expand=4080)
6636#[inline]
6637#[target_feature(enable = "avx512bw")]
6638#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6639#[cfg_attr(test, assert_instr(vpacksswb))]
6640pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6641    unsafe {
6642        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6643        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6644    }
6645}
6646
6647/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6648///
6649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi16&expand=4081)
6650#[inline]
6651#[target_feature(enable = "avx512bw")]
6652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6653#[cfg_attr(test, assert_instr(vpacksswb))]
6654pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6655    unsafe {
6656        let pack = _mm512_packs_epi16(a, b).as_i8x64();
6657        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6658    }
6659}
6660
6661/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6662///
6663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi16&expand=4077)
6664#[inline]
6665#[target_feature(enable = "avx512bw,avx512vl")]
6666#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6667#[cfg_attr(test, assert_instr(vpacksswb))]
6668pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6669    unsafe {
6670        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6671        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6672    }
6673}
6674
6675/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6676///
6677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_maskz_packs_epi16&expand=4078)
6678#[inline]
6679#[target_feature(enable = "avx512bw,avx512vl")]
6680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6681#[cfg_attr(test, assert_instr(vpacksswb))]
6682pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6683    unsafe {
6684        let pack = _mm256_packs_epi16(a, b).as_i8x32();
6685        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6686    }
6687}
6688
6689/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6690///
6691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi16&expand=4074)
6692#[inline]
6693#[target_feature(enable = "avx512bw,avx512vl")]
6694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6695#[cfg_attr(test, assert_instr(vpacksswb))]
6696pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6697    unsafe {
6698        let pack = _mm_packs_epi16(a, b).as_i8x16();
6699        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6700    }
6701}
6702
6703/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6704///
6705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi16&expand=4075)
6706#[inline]
6707#[target_feature(enable = "avx512bw,avx512vl")]
6708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6709#[cfg_attr(test, assert_instr(vpacksswb))]
6710pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6711    unsafe {
6712        let pack = _mm_packs_epi16(a, b).as_i8x16();
6713        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6714    }
6715}
6716
6717/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst.
6718///
6719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi32&expand=4130)
6720#[inline]
6721#[target_feature(enable = "avx512bw")]
6722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6723#[cfg_attr(test, assert_instr(vpackusdw))]
6724pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
6725    unsafe { transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) }
6726}
6727
6728/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6729///
6730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi32&expand=4128)
6731#[inline]
6732#[target_feature(enable = "avx512bw")]
6733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6734#[cfg_attr(test, assert_instr(vpackusdw))]
6735pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6736    unsafe {
6737        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6738        transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
6739    }
6740}
6741
6742/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6743///
6744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi32&expand=4129)
6745#[inline]
6746#[target_feature(enable = "avx512bw")]
6747#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6748#[cfg_attr(test, assert_instr(vpackusdw))]
6749pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6750    unsafe {
6751        let pack = _mm512_packus_epi32(a, b).as_i16x32();
6752        transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
6753    }
6754}
6755
6756/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6757///
6758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi32&expand=4125)
6759#[inline]
6760#[target_feature(enable = "avx512bw,avx512vl")]
6761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6762#[cfg_attr(test, assert_instr(vpackusdw))]
6763pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6764    unsafe {
6765        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6766        transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
6767    }
6768}
6769
6770/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6771///
6772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi32&expand=4126)
6773#[inline]
6774#[target_feature(enable = "avx512bw,avx512vl")]
6775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6776#[cfg_attr(test, assert_instr(vpackusdw))]
6777pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6778    unsafe {
6779        let pack = _mm256_packus_epi32(a, b).as_i16x16();
6780        transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
6781    }
6782}
6783
6784/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6785///
6786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi32&expand=4122)
6787#[inline]
6788#[target_feature(enable = "avx512bw,avx512vl")]
6789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6790#[cfg_attr(test, assert_instr(vpackusdw))]
6791pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6792    unsafe {
6793        let pack = _mm_packus_epi32(a, b).as_i16x8();
6794        transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
6795    }
6796}
6797
6798/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6799///
6800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi32&expand=4123)
6801#[inline]
6802#[target_feature(enable = "avx512bw,avx512vl")]
6803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6804#[cfg_attr(test, assert_instr(vpackusdw))]
6805pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6806    unsafe {
6807        let pack = _mm_packus_epi32(a, b).as_i16x8();
6808        transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
6809    }
6810}
6811
6812/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst.
6813///
6814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi16&expand=4121)
6815#[inline]
6816#[target_feature(enable = "avx512bw")]
6817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6818#[cfg_attr(test, assert_instr(vpackuswb))]
6819pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
6820    unsafe { transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) }
6821}
6822
6823/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6824///
6825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi16&expand=4119)
6826#[inline]
6827#[target_feature(enable = "avx512bw")]
6828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6829#[cfg_attr(test, assert_instr(vpackuswb))]
6830pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6831    unsafe {
6832        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6833        transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6834    }
6835}
6836
6837/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6838///
6839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi16&expand=4120)
6840#[inline]
6841#[target_feature(enable = "avx512bw")]
6842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6843#[cfg_attr(test, assert_instr(vpackuswb))]
6844pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6845    unsafe {
6846        let pack = _mm512_packus_epi16(a, b).as_i8x64();
6847        transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6848    }
6849}
6850
6851/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6852///
6853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi16&expand=4116)
6854#[inline]
6855#[target_feature(enable = "avx512bw,avx512vl")]
6856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6857#[cfg_attr(test, assert_instr(vpackuswb))]
6858pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6859    unsafe {
6860        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6861        transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6862    }
6863}
6864
6865/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6866///
6867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi16&expand=4117)
6868#[inline]
6869#[target_feature(enable = "avx512bw,avx512vl")]
6870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6871#[cfg_attr(test, assert_instr(vpackuswb))]
6872pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6873    unsafe {
6874        let pack = _mm256_packus_epi16(a, b).as_i8x32();
6875        transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6876    }
6877}
6878
6879/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6880///
6881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi16&expand=4113)
6882#[inline]
6883#[target_feature(enable = "avx512bw,avx512vl")]
6884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6885#[cfg_attr(test, assert_instr(vpackuswb))]
6886pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6887    unsafe {
6888        let pack = _mm_packus_epi16(a, b).as_i8x16();
6889        transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6890    }
6891}
6892
6893/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6894///
6895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi16&expand=4114)
6896#[inline]
6897#[target_feature(enable = "avx512bw,avx512vl")]
6898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6899#[cfg_attr(test, assert_instr(vpackuswb))]
6900pub fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6901    unsafe {
6902        let pack = _mm_packus_epi16(a, b).as_i8x16();
6903        transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6904    }
6905}
6906
6907/// Average packed unsigned 16-bit integers in a and b, and store the results in dst.
6908///
6909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu16&expand=388)
6910#[inline]
6911#[target_feature(enable = "avx512bw")]
6912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6913#[cfg_attr(test, assert_instr(vpavgw))]
6914#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6915pub const fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
6916    unsafe {
6917        let a = simd_cast::<_, u32x32>(a.as_u16x32());
6918        let b = simd_cast::<_, u32x32>(b.as_u16x32());
6919        let r = simd_shr(simd_add(simd_add(a, b), u32x32::splat(1)), u32x32::splat(1));
6920        transmute(simd_cast::<_, u16x32>(r))
6921    }
6922}
6923
6924/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6925///
6926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu16&expand=389)
6927#[inline]
6928#[target_feature(enable = "avx512bw")]
6929#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6930#[cfg_attr(test, assert_instr(vpavgw))]
6931#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6932pub const fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6933    unsafe {
6934        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6935        transmute(simd_select_bitmask(k, avg, src.as_u16x32()))
6936    }
6937}
6938
6939/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6940///
6941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu16&expand=390)
6942#[inline]
6943#[target_feature(enable = "avx512bw")]
6944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6945#[cfg_attr(test, assert_instr(vpavgw))]
6946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6947pub const fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6948    unsafe {
6949        let avg = _mm512_avg_epu16(a, b).as_u16x32();
6950        transmute(simd_select_bitmask(k, avg, u16x32::ZERO))
6951    }
6952}
6953
6954/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6955///
6956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu16&expand=386)
6957#[inline]
6958#[target_feature(enable = "avx512bw,avx512vl")]
6959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6960#[cfg_attr(test, assert_instr(vpavgw))]
6961#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6962pub const fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6963    unsafe {
6964        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6965        transmute(simd_select_bitmask(k, avg, src.as_u16x16()))
6966    }
6967}
6968
6969/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6970///
6971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu16&expand=387)
6972#[inline]
6973#[target_feature(enable = "avx512bw,avx512vl")]
6974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6975#[cfg_attr(test, assert_instr(vpavgw))]
6976#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6977pub const fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6978    unsafe {
6979        let avg = _mm256_avg_epu16(a, b).as_u16x16();
6980        transmute(simd_select_bitmask(k, avg, u16x16::ZERO))
6981    }
6982}
6983
6984/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6985///
6986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu16&expand=383)
6987#[inline]
6988#[target_feature(enable = "avx512bw,avx512vl")]
6989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6990#[cfg_attr(test, assert_instr(vpavgw))]
6991#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6992pub const fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6993    unsafe {
6994        let avg = _mm_avg_epu16(a, b).as_u16x8();
6995        transmute(simd_select_bitmask(k, avg, src.as_u16x8()))
6996    }
6997}
6998
6999/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7000///
7001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu16&expand=384)
7002#[inline]
7003#[target_feature(enable = "avx512bw,avx512vl")]
7004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7005#[cfg_attr(test, assert_instr(vpavgw))]
7006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7007pub const fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
7008    unsafe {
7009        let avg = _mm_avg_epu16(a, b).as_u16x8();
7010        transmute(simd_select_bitmask(k, avg, u16x8::ZERO))
7011    }
7012}
7013
7014/// Average packed unsigned 8-bit integers in a and b, and store the results in dst.
7015///
7016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu8&expand=397)
7017#[inline]
7018#[target_feature(enable = "avx512bw")]
7019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7020#[cfg_attr(test, assert_instr(vpavgb))]
7021#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7022pub const fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
7023    unsafe {
7024        let a = simd_cast::<_, u16x64>(a.as_u8x64());
7025        let b = simd_cast::<_, u16x64>(b.as_u8x64());
7026        let r = simd_shr(simd_add(simd_add(a, b), u16x64::splat(1)), u16x64::splat(1));
7027        transmute(simd_cast::<_, u8x64>(r))
7028    }
7029}
7030
7031/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7032///
7033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu8&expand=398)
7034#[inline]
7035#[target_feature(enable = "avx512bw")]
7036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7037#[cfg_attr(test, assert_instr(vpavgb))]
7038#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7039pub const fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7040    unsafe {
7041        let avg = _mm512_avg_epu8(a, b).as_u8x64();
7042        transmute(simd_select_bitmask(k, avg, src.as_u8x64()))
7043    }
7044}
7045
7046/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7047///
7048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu8&expand=399)
7049#[inline]
7050#[target_feature(enable = "avx512bw")]
7051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7052#[cfg_attr(test, assert_instr(vpavgb))]
7053#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7054pub const fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7055    unsafe {
7056        let avg = _mm512_avg_epu8(a, b).as_u8x64();
7057        transmute(simd_select_bitmask(k, avg, u8x64::ZERO))
7058    }
7059}
7060
7061/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7062///
7063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu8&expand=395)
7064#[inline]
7065#[target_feature(enable = "avx512bw,avx512vl")]
7066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7067#[cfg_attr(test, assert_instr(vpavgb))]
7068#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7069pub const fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7070    unsafe {
7071        let avg = _mm256_avg_epu8(a, b).as_u8x32();
7072        transmute(simd_select_bitmask(k, avg, src.as_u8x32()))
7073    }
7074}
7075
7076/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7077///
7078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu8&expand=396)
7079#[inline]
7080#[target_feature(enable = "avx512bw,avx512vl")]
7081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7082#[cfg_attr(test, assert_instr(vpavgb))]
7083#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7084pub const fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7085    unsafe {
7086        let avg = _mm256_avg_epu8(a, b).as_u8x32();
7087        transmute(simd_select_bitmask(k, avg, u8x32::ZERO))
7088    }
7089}
7090
7091/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7092///
7093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu8&expand=392)
7094#[inline]
7095#[target_feature(enable = "avx512bw,avx512vl")]
7096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7097#[cfg_attr(test, assert_instr(vpavgb))]
7098#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7099pub const fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7100    unsafe {
7101        let avg = _mm_avg_epu8(a, b).as_u8x16();
7102        transmute(simd_select_bitmask(k, avg, src.as_u8x16()))
7103    }
7104}
7105
7106/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7107///
7108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu8&expand=393)
7109#[inline]
7110#[target_feature(enable = "avx512bw,avx512vl")]
7111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7112#[cfg_attr(test, assert_instr(vpavgb))]
7113#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7114pub const fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7115    unsafe {
7116        let avg = _mm_avg_epu8(a, b).as_u8x16();
7117        transmute(simd_select_bitmask(k, avg, u8x16::ZERO))
7118    }
7119}
7120
7121/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst.
7122///
7123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi16&expand=5271)
7124#[inline]
7125#[target_feature(enable = "avx512bw")]
7126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7127#[cfg_attr(test, assert_instr(vpsllw))]
7128pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
7129    unsafe { transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) }
7130}
7131
7132/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7133///
7134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi16&expand=5269)
7135#[inline]
7136#[target_feature(enable = "avx512bw")]
7137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7138#[cfg_attr(test, assert_instr(vpsllw))]
7139pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7140    unsafe {
7141        let shf = _mm512_sll_epi16(a, count).as_i16x32();
7142        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7143    }
7144}
7145
7146/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7147///
7148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi16&expand=5270)
7149#[inline]
7150#[target_feature(enable = "avx512bw")]
7151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7152#[cfg_attr(test, assert_instr(vpsllw))]
7153pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7154    unsafe {
7155        let shf = _mm512_sll_epi16(a, count).as_i16x32();
7156        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7157    }
7158}
7159
7160/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7161///
7162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi16&expand=5266)
7163#[inline]
7164#[target_feature(enable = "avx512bw,avx512vl")]
7165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7166#[cfg_attr(test, assert_instr(vpsllw))]
7167pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7168    unsafe {
7169        let shf = _mm256_sll_epi16(a, count).as_i16x16();
7170        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7171    }
7172}
7173
7174/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7175///
7176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi16&expand=5267)
7177#[inline]
7178#[target_feature(enable = "avx512bw,avx512vl")]
7179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7180#[cfg_attr(test, assert_instr(vpsllw))]
7181pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7182    unsafe {
7183        let shf = _mm256_sll_epi16(a, count).as_i16x16();
7184        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7185    }
7186}
7187
7188/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7189///
7190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi16&expand=5263)
7191#[inline]
7192#[target_feature(enable = "avx512bw,avx512vl")]
7193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7194#[cfg_attr(test, assert_instr(vpsllw))]
7195pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7196    unsafe {
7197        let shf = _mm_sll_epi16(a, count).as_i16x8();
7198        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7199    }
7200}
7201
7202/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7203///
7204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi16&expand=5264)
7205#[inline]
7206#[target_feature(enable = "avx512bw,avx512vl")]
7207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7208#[cfg_attr(test, assert_instr(vpsllw))]
7209pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7210    unsafe {
7211        let shf = _mm_sll_epi16(a, count).as_i16x8();
7212        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7213    }
7214}
7215
7216/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
7217///
7218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi16&expand=5301)
7219#[inline]
7220#[target_feature(enable = "avx512bw")]
7221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7222#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7223#[rustc_legacy_const_generics(1)]
7224#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7225pub const fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7226    unsafe {
7227        static_assert_uimm_bits!(IMM8, 8);
7228        if IMM8 >= 16 {
7229            _mm512_setzero_si512()
7230        } else {
7231            transmute(simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
7232        }
7233    }
7234}
7235
7236/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7237///
7238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi16&expand=5299)
7239#[inline]
7240#[target_feature(enable = "avx512bw")]
7241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7242#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7243#[rustc_legacy_const_generics(3)]
7244#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7245pub const fn _mm512_mask_slli_epi16<const IMM8: u32>(
7246    src: __m512i,
7247    k: __mmask32,
7248    a: __m512i,
7249) -> __m512i {
7250    unsafe {
7251        static_assert_uimm_bits!(IMM8, 8);
7252        let shf = if IMM8 >= 16 {
7253            u16x32::ZERO
7254        } else {
7255            simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16))
7256        };
7257        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
7258    }
7259}
7260
7261/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7262///
7263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi16&expand=5300)
7264#[inline]
7265#[target_feature(enable = "avx512bw")]
7266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7267#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7268#[rustc_legacy_const_generics(2)]
7269#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7270pub const fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
7271    unsafe {
7272        static_assert_uimm_bits!(IMM8, 8);
7273        if IMM8 >= 16 {
7274            _mm512_setzero_si512()
7275        } else {
7276            let shf = simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16));
7277            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
7278        }
7279    }
7280}
7281
7282/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7283///
7284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi16&expand=5296)
7285#[inline]
7286#[target_feature(enable = "avx512bw,avx512vl")]
7287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7288#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7289#[rustc_legacy_const_generics(3)]
7290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7291pub const fn _mm256_mask_slli_epi16<const IMM8: u32>(
7292    src: __m256i,
7293    k: __mmask16,
7294    a: __m256i,
7295) -> __m256i {
7296    unsafe {
7297        static_assert_uimm_bits!(IMM8, 8);
7298        let shf = if IMM8 >= 16 {
7299            u16x16::ZERO
7300        } else {
7301            simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16))
7302        };
7303        transmute(simd_select_bitmask(k, shf, src.as_u16x16()))
7304    }
7305}
7306
7307/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7308///
7309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi16&expand=5297)
7310#[inline]
7311#[target_feature(enable = "avx512bw,avx512vl")]
7312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7313#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7314#[rustc_legacy_const_generics(2)]
7315#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7316pub const fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
7317    unsafe {
7318        static_assert_uimm_bits!(IMM8, 8);
7319        if IMM8 >= 16 {
7320            _mm256_setzero_si256()
7321        } else {
7322            let shf = simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16));
7323            transmute(simd_select_bitmask(k, shf, u16x16::ZERO))
7324        }
7325    }
7326}
7327
7328/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7329///
7330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi16&expand=5293)
7331#[inline]
7332#[target_feature(enable = "avx512bw,avx512vl")]
7333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7334#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7335#[rustc_legacy_const_generics(3)]
7336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7337pub const fn _mm_mask_slli_epi16<const IMM8: u32>(
7338    src: __m128i,
7339    k: __mmask8,
7340    a: __m128i,
7341) -> __m128i {
7342    unsafe {
7343        static_assert_uimm_bits!(IMM8, 8);
7344        let shf = if IMM8 >= 16 {
7345            u16x8::ZERO
7346        } else {
7347            simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16))
7348        };
7349        transmute(simd_select_bitmask(k, shf, src.as_u16x8()))
7350    }
7351}
7352
7353/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7354///
7355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi16&expand=5294)
7356#[inline]
7357#[target_feature(enable = "avx512bw,avx512vl")]
7358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7359#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
7360#[rustc_legacy_const_generics(2)]
7361#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7362pub const fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
7363    unsafe {
7364        static_assert_uimm_bits!(IMM8, 8);
7365        if IMM8 >= 16 {
7366            _mm_setzero_si128()
7367        } else {
7368            let shf = simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16));
7369            transmute(simd_select_bitmask(k, shf, u16x8::ZERO))
7370        }
7371    }
7372}
7373
7374/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7375///
7376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi16&expand=5333)
7377#[inline]
7378#[target_feature(enable = "avx512bw")]
7379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7380#[cfg_attr(test, assert_instr(vpsllvw))]
7381#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7382pub const fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
7383    unsafe {
7384        let count = count.as_u16x32();
7385        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7386        let count = simd_select(no_overflow, count, u16x32::ZERO);
7387        simd_select(no_overflow, simd_shl(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
7388    }
7389}
7390
7391/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7392///
7393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi16&expand=5331)
7394#[inline]
7395#[target_feature(enable = "avx512bw")]
7396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7397#[cfg_attr(test, assert_instr(vpsllvw))]
7398#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7399pub const fn _mm512_mask_sllv_epi16(
7400    src: __m512i,
7401    k: __mmask32,
7402    a: __m512i,
7403    count: __m512i,
7404) -> __m512i {
7405    unsafe {
7406        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
7407        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7408    }
7409}
7410
7411/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7412///
7413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi16&expand=5332)
7414#[inline]
7415#[target_feature(enable = "avx512bw")]
7416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7417#[cfg_attr(test, assert_instr(vpsllvw))]
7418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7419pub const fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7420    unsafe {
7421        let shf = _mm512_sllv_epi16(a, count).as_i16x32();
7422        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7423    }
7424}
7425
7426/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7427///
7428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi16&expand=5330)
7429#[inline]
7430#[target_feature(enable = "avx512bw,avx512vl")]
7431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7432#[cfg_attr(test, assert_instr(vpsllvw))]
7433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7434pub const fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
7435    unsafe {
7436        let count = count.as_u16x16();
7437        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7438        let count = simd_select(no_overflow, count, u16x16::ZERO);
7439        simd_select(no_overflow, simd_shl(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
7440    }
7441}
7442
7443/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7444///
7445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi16&expand=5328)
7446#[inline]
7447#[target_feature(enable = "avx512bw,avx512vl")]
7448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7449#[cfg_attr(test, assert_instr(vpsllvw))]
7450#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7451pub const fn _mm256_mask_sllv_epi16(
7452    src: __m256i,
7453    k: __mmask16,
7454    a: __m256i,
7455    count: __m256i,
7456) -> __m256i {
7457    unsafe {
7458        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
7459        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7460    }
7461}
7462
7463/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7464///
7465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi16&expand=5329)
7466#[inline]
7467#[target_feature(enable = "avx512bw,avx512vl")]
7468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7469#[cfg_attr(test, assert_instr(vpsllvw))]
7470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7471pub const fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7472    unsafe {
7473        let shf = _mm256_sllv_epi16(a, count).as_i16x16();
7474        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7475    }
7476}
7477
7478/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7479///
7480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi16&expand=5327)
7481#[inline]
7482#[target_feature(enable = "avx512bw,avx512vl")]
7483#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7484#[cfg_attr(test, assert_instr(vpsllvw))]
7485#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7486pub const fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
7487    unsafe {
7488        let count = count.as_u16x8();
7489        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
7490        let count = simd_select(no_overflow, count, u16x8::ZERO);
7491        simd_select(no_overflow, simd_shl(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
7492    }
7493}
7494
7495/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7496///
7497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi16&expand=5325)
7498#[inline]
7499#[target_feature(enable = "avx512bw,avx512vl")]
7500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7501#[cfg_attr(test, assert_instr(vpsllvw))]
7502#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7503pub const fn _mm_mask_sllv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7504    unsafe {
7505        let shf = _mm_sllv_epi16(a, count).as_i16x8();
7506        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7507    }
7508}
7509
7510/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7511///
7512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi16&expand=5326)
7513#[inline]
7514#[target_feature(enable = "avx512bw,avx512vl")]
7515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7516#[cfg_attr(test, assert_instr(vpsllvw))]
7517#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7518pub const fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7519    unsafe {
7520        let shf = _mm_sllv_epi16(a, count).as_i16x8();
7521        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7522    }
7523}
7524
7525/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst.
7526///
7527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi16&expand=5483)
7528#[inline]
7529#[target_feature(enable = "avx512bw")]
7530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7531#[cfg_attr(test, assert_instr(vpsrlw))]
7532pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
7533    unsafe { transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) }
7534}
7535
7536/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7537///
7538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi16&expand=5481)
7539#[inline]
7540#[target_feature(enable = "avx512bw")]
7541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7542#[cfg_attr(test, assert_instr(vpsrlw))]
7543pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7544    unsafe {
7545        let shf = _mm512_srl_epi16(a, count).as_i16x32();
7546        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7547    }
7548}
7549
7550/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7551///
7552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi16&expand=5482)
7553#[inline]
7554#[target_feature(enable = "avx512bw")]
7555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7556#[cfg_attr(test, assert_instr(vpsrlw))]
7557pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7558    unsafe {
7559        let shf = _mm512_srl_epi16(a, count).as_i16x32();
7560        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7561    }
7562}
7563
7564/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7565///
7566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi16&expand=5478)
7567#[inline]
7568#[target_feature(enable = "avx512bw,avx512vl")]
7569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7570#[cfg_attr(test, assert_instr(vpsrlw))]
7571pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7572    unsafe {
7573        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7574        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7575    }
7576}
7577
7578/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7579///
7580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi16&expand=5479)
7581#[inline]
7582#[target_feature(enable = "avx512bw,avx512vl")]
7583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7584#[cfg_attr(test, assert_instr(vpsrlw))]
7585pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7586    unsafe {
7587        let shf = _mm256_srl_epi16(a, count).as_i16x16();
7588        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7589    }
7590}
7591
7592/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7593///
7594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi16&expand=5475)
7595#[inline]
7596#[target_feature(enable = "avx512bw,avx512vl")]
7597#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7598#[cfg_attr(test, assert_instr(vpsrlw))]
7599pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7600    unsafe {
7601        let shf = _mm_srl_epi16(a, count).as_i16x8();
7602        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7603    }
7604}
7605
7606/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7607///
7608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi16&expand=5476)
7609#[inline]
7610#[target_feature(enable = "avx512bw,avx512vl")]
7611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7612#[cfg_attr(test, assert_instr(vpsrlw))]
7613pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7614    unsafe {
7615        let shf = _mm_srl_epi16(a, count).as_i16x8();
7616        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7617    }
7618}
7619
7620/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
7621///
7622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi16&expand=5513)
7623#[inline]
7624#[target_feature(enable = "avx512bw")]
7625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7626#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7627#[rustc_legacy_const_generics(1)]
7628#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7629pub const fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7630    unsafe {
7631        static_assert_uimm_bits!(IMM8, 8);
7632        if IMM8 >= 16 {
7633            _mm512_setzero_si512()
7634        } else {
7635            transmute(simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
7636        }
7637    }
7638}
7639
7640/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7641///
7642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi16&expand=5511)
7643#[inline]
7644#[target_feature(enable = "avx512bw")]
7645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7646#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7647#[rustc_legacy_const_generics(3)]
7648#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7649pub const fn _mm512_mask_srli_epi16<const IMM8: u32>(
7650    src: __m512i,
7651    k: __mmask32,
7652    a: __m512i,
7653) -> __m512i {
7654    unsafe {
7655        static_assert_uimm_bits!(IMM8, 8);
7656        let shf = if IMM8 >= 16 {
7657            u16x32::ZERO
7658        } else {
7659            simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16))
7660        };
7661        transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
7662    }
7663}
7664
7665/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7666///
7667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi16&expand=5512)
7668#[inline]
7669#[target_feature(enable = "avx512bw")]
7670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7671#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7672#[rustc_legacy_const_generics(2)]
7673#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7674pub const fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
7675    unsafe {
7676        static_assert_uimm_bits!(IMM8, 8);
7677        //imm8 should be u32, it seems the document to verify is incorrect
7678        if IMM8 >= 16 {
7679            _mm512_setzero_si512()
7680        } else {
7681            let shf = simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16));
7682            transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
7683        }
7684    }
7685}
7686
7687/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7688///
7689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi16&expand=5508)
7690#[inline]
7691#[target_feature(enable = "avx512bw,avx512vl")]
7692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7693#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7694#[rustc_legacy_const_generics(3)]
7695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7696pub const fn _mm256_mask_srli_epi16<const IMM8: i32>(
7697    src: __m256i,
7698    k: __mmask16,
7699    a: __m256i,
7700) -> __m256i {
7701    unsafe {
7702        static_assert_uimm_bits!(IMM8, 8);
7703        let shf = _mm256_srli_epi16::<IMM8>(a);
7704        transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16()))
7705    }
7706}
7707
7708/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7709///
7710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi16&expand=5509)
7711#[inline]
7712#[target_feature(enable = "avx512bw,avx512vl")]
7713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7714#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7715#[rustc_legacy_const_generics(2)]
7716#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7717pub const fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
7718    unsafe {
7719        static_assert_uimm_bits!(IMM8, 8);
7720        let shf = _mm256_srli_epi16::<IMM8>(a);
7721        transmute(simd_select_bitmask(k, shf.as_i16x16(), i16x16::ZERO))
7722    }
7723}
7724
7725/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7726///
7727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi16&expand=5505)
7728#[inline]
7729#[target_feature(enable = "avx512bw,avx512vl")]
7730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7731#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7732#[rustc_legacy_const_generics(3)]
7733#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7734pub const fn _mm_mask_srli_epi16<const IMM8: i32>(
7735    src: __m128i,
7736    k: __mmask8,
7737    a: __m128i,
7738) -> __m128i {
7739    unsafe {
7740        static_assert_uimm_bits!(IMM8, 8);
7741        let shf = _mm_srli_epi16::<IMM8>(a);
7742        transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8()))
7743    }
7744}
7745
7746/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7747///
7748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi16&expand=5506)
7749#[inline]
7750#[target_feature(enable = "avx512bw,avx512vl")]
7751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7752#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
7753#[rustc_legacy_const_generics(2)]
7754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7755pub const fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
7756    unsafe {
7757        static_assert_uimm_bits!(IMM8, 8);
7758        let shf = _mm_srli_epi16::<IMM8>(a);
7759        transmute(simd_select_bitmask(k, shf.as_i16x8(), i16x8::ZERO))
7760    }
7761}
7762
7763/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7764///
7765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi16&expand=5545)
7766#[inline]
7767#[target_feature(enable = "avx512bw")]
7768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7769#[cfg_attr(test, assert_instr(vpsrlvw))]
7770#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7771pub const fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
7772    unsafe {
7773        let count = count.as_u16x32();
7774        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
7775        let count = simd_select(no_overflow, count, u16x32::ZERO);
7776        simd_select(no_overflow, simd_shr(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
7777    }
7778}
7779
7780/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7781///
7782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi16&expand=5543)
7783#[inline]
7784#[target_feature(enable = "avx512bw")]
7785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7786#[cfg_attr(test, assert_instr(vpsrlvw))]
7787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7788pub const fn _mm512_mask_srlv_epi16(
7789    src: __m512i,
7790    k: __mmask32,
7791    a: __m512i,
7792    count: __m512i,
7793) -> __m512i {
7794    unsafe {
7795        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7796        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7797    }
7798}
7799
7800/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7801///
7802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi16&expand=5544)
7803#[inline]
7804#[target_feature(enable = "avx512bw")]
7805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7806#[cfg_attr(test, assert_instr(vpsrlvw))]
7807#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7808pub const fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7809    unsafe {
7810        let shf = _mm512_srlv_epi16(a, count).as_i16x32();
7811        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7812    }
7813}
7814
7815/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7816///
7817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi16&expand=5542)
7818#[inline]
7819#[target_feature(enable = "avx512bw,avx512vl")]
7820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7821#[cfg_attr(test, assert_instr(vpsrlvw))]
7822#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7823pub const fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
7824    unsafe {
7825        let count = count.as_u16x16();
7826        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
7827        let count = simd_select(no_overflow, count, u16x16::ZERO);
7828        simd_select(no_overflow, simd_shr(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
7829    }
7830}
7831
7832/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7833///
7834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi16&expand=5540)
7835#[inline]
7836#[target_feature(enable = "avx512bw,avx512vl")]
7837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7838#[cfg_attr(test, assert_instr(vpsrlvw))]
7839#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7840pub const fn _mm256_mask_srlv_epi16(
7841    src: __m256i,
7842    k: __mmask16,
7843    a: __m256i,
7844    count: __m256i,
7845) -> __m256i {
7846    unsafe {
7847        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7848        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7849    }
7850}
7851
7852/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7853///
7854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi16&expand=5541)
7855#[inline]
7856#[target_feature(enable = "avx512bw,avx512vl")]
7857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7858#[cfg_attr(test, assert_instr(vpsrlvw))]
7859#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7860pub const fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7861    unsafe {
7862        let shf = _mm256_srlv_epi16(a, count).as_i16x16();
7863        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7864    }
7865}
7866
7867/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7868///
7869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi16&expand=5539)
7870#[inline]
7871#[target_feature(enable = "avx512bw,avx512vl")]
7872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7873#[cfg_attr(test, assert_instr(vpsrlvw))]
7874#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7875pub const fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
7876    unsafe {
7877        let count = count.as_u16x8();
7878        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
7879        let count = simd_select(no_overflow, count, u16x8::ZERO);
7880        simd_select(no_overflow, simd_shr(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
7881    }
7882}
7883
7884/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7885///
7886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi16&expand=5537)
7887#[inline]
7888#[target_feature(enable = "avx512bw,avx512vl")]
7889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7890#[cfg_attr(test, assert_instr(vpsrlvw))]
7891#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7892pub const fn _mm_mask_srlv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7893    unsafe {
7894        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7895        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7896    }
7897}
7898
7899/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7900///
7901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi16&expand=5538)
7902#[inline]
7903#[target_feature(enable = "avx512bw,avx512vl")]
7904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7905#[cfg_attr(test, assert_instr(vpsrlvw))]
7906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
7907pub const fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7908    unsafe {
7909        let shf = _mm_srlv_epi16(a, count).as_i16x8();
7910        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7911    }
7912}
7913
7914/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst.
7915///
7916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi16&expand=5398)
7917#[inline]
7918#[target_feature(enable = "avx512bw")]
7919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7920#[cfg_attr(test, assert_instr(vpsraw))]
7921pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
7922    unsafe { transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) }
7923}
7924
7925/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7926///
7927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi16&expand=5396)
7928#[inline]
7929#[target_feature(enable = "avx512bw")]
7930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7931#[cfg_attr(test, assert_instr(vpsraw))]
7932pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7933    unsafe {
7934        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7935        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7936    }
7937}
7938
7939/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7940///
7941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi16&expand=5397)
7942#[inline]
7943#[target_feature(enable = "avx512bw")]
7944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7945#[cfg_attr(test, assert_instr(vpsraw))]
7946pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
7947    unsafe {
7948        let shf = _mm512_sra_epi16(a, count).as_i16x32();
7949        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7950    }
7951}
7952
7953/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7954///
7955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi16&expand=5393)
7956#[inline]
7957#[target_feature(enable = "avx512bw,avx512vl")]
7958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7959#[cfg_attr(test, assert_instr(vpsraw))]
7960pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7961    unsafe {
7962        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7963        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7964    }
7965}
7966
7967/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7968///
7969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi16&expand=5394)
7970#[inline]
7971#[target_feature(enable = "avx512bw,avx512vl")]
7972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7973#[cfg_attr(test, assert_instr(vpsraw))]
7974pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7975    unsafe {
7976        let shf = _mm256_sra_epi16(a, count).as_i16x16();
7977        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7978    }
7979}
7980
7981/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7982///
7983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi16&expand=5390)
7984#[inline]
7985#[target_feature(enable = "avx512bw,avx512vl")]
7986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7987#[cfg_attr(test, assert_instr(vpsraw))]
7988pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7989    unsafe {
7990        let shf = _mm_sra_epi16(a, count).as_i16x8();
7991        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7992    }
7993}
7994
7995/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7996///
7997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi16&expand=5391)
7998#[inline]
7999#[target_feature(enable = "avx512bw,avx512vl")]
8000#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8001#[cfg_attr(test, assert_instr(vpsraw))]
8002pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
8003    unsafe {
8004        let shf = _mm_sra_epi16(a, count).as_i16x8();
8005        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
8006    }
8007}
8008
8009/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
8010///
8011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi16&expand=5427)
8012#[inline]
8013#[target_feature(enable = "avx512bw")]
8014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8015#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8016#[rustc_legacy_const_generics(1)]
8017#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8018pub const fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
8019    unsafe {
8020        static_assert_uimm_bits!(IMM8, 8);
8021        transmute(simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)))
8022    }
8023}
8024
8025/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8026///
8027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi16&expand=5425)
8028#[inline]
8029#[target_feature(enable = "avx512bw")]
8030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8031#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8032#[rustc_legacy_const_generics(3)]
8033#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8034pub const fn _mm512_mask_srai_epi16<const IMM8: u32>(
8035    src: __m512i,
8036    k: __mmask32,
8037    a: __m512i,
8038) -> __m512i {
8039    unsafe {
8040        static_assert_uimm_bits!(IMM8, 8);
8041        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
8042        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
8043    }
8044}
8045
8046/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8047///
8048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi16&expand=5426)
8049#[inline]
8050#[target_feature(enable = "avx512bw")]
8051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8052#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8053#[rustc_legacy_const_generics(2)]
8054#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8055pub const fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
8056    unsafe {
8057        static_assert_uimm_bits!(IMM8, 8);
8058        let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
8059        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
8060    }
8061}
8062
8063/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8064///
8065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi16&expand=5422)
8066#[inline]
8067#[target_feature(enable = "avx512bw,avx512vl")]
8068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8069#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8070#[rustc_legacy_const_generics(3)]
8071#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8072pub const fn _mm256_mask_srai_epi16<const IMM8: u32>(
8073    src: __m256i,
8074    k: __mmask16,
8075    a: __m256i,
8076) -> __m256i {
8077    unsafe {
8078        static_assert_uimm_bits!(IMM8, 8);
8079        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
8080        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
8081    }
8082}
8083
8084/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8085///
8086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi16&expand=5423)
8087#[inline]
8088#[target_feature(enable = "avx512bw,avx512vl")]
8089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8090#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8091#[rustc_legacy_const_generics(2)]
8092#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8093pub const fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
8094    unsafe {
8095        static_assert_uimm_bits!(IMM8, 8);
8096        let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
8097        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
8098    }
8099}
8100
8101/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8102///
8103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi16&expand=5419)
8104#[inline]
8105#[target_feature(enable = "avx512bw,avx512vl")]
8106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8107#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8108#[rustc_legacy_const_generics(3)]
8109#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8110pub const fn _mm_mask_srai_epi16<const IMM8: u32>(
8111    src: __m128i,
8112    k: __mmask8,
8113    a: __m128i,
8114) -> __m128i {
8115    unsafe {
8116        static_assert_uimm_bits!(IMM8, 8);
8117        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
8118        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
8119    }
8120}
8121
8122/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8123///
8124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi16&expand=5420)
8125#[inline]
8126#[target_feature(enable = "avx512bw,avx512vl")]
8127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8128#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
8129#[rustc_legacy_const_generics(2)]
8130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8131pub const fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
8132    unsafe {
8133        static_assert_uimm_bits!(IMM8, 8);
8134        let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
8135        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
8136    }
8137}
8138
8139/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
8140///
8141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi16&expand=5456)
8142#[inline]
8143#[target_feature(enable = "avx512bw")]
8144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8145#[cfg_attr(test, assert_instr(vpsravw))]
8146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8147pub const fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
8148    unsafe {
8149        let count = count.as_u16x32();
8150        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
8151        let count = simd_select(no_overflow, transmute(count), i16x32::splat(15));
8152        simd_shr(a.as_i16x32(), count).as_m512i()
8153    }
8154}
8155
8156/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8157///
8158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi16&expand=5454)
8159#[inline]
8160#[target_feature(enable = "avx512bw")]
8161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8162#[cfg_attr(test, assert_instr(vpsravw))]
8163#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8164pub const fn _mm512_mask_srav_epi16(
8165    src: __m512i,
8166    k: __mmask32,
8167    a: __m512i,
8168    count: __m512i,
8169) -> __m512i {
8170    unsafe {
8171        let shf = _mm512_srav_epi16(a, count).as_i16x32();
8172        transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
8173    }
8174}
8175
8176/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8177///
8178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi16&expand=5455)
8179#[inline]
8180#[target_feature(enable = "avx512bw")]
8181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8182#[cfg_attr(test, assert_instr(vpsravw))]
8183#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8184pub const fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
8185    unsafe {
8186        let shf = _mm512_srav_epi16(a, count).as_i16x32();
8187        transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
8188    }
8189}
8190
8191/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
8192///
8193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi16&expand=5453)
8194#[inline]
8195#[target_feature(enable = "avx512bw,avx512vl")]
8196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8197#[cfg_attr(test, assert_instr(vpsravw))]
8198#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8199pub const fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
8200    unsafe {
8201        let count = count.as_u16x16();
8202        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
8203        let count = simd_select(no_overflow, transmute(count), i16x16::splat(15));
8204        simd_shr(a.as_i16x16(), count).as_m256i()
8205    }
8206}
8207
8208/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8209///
8210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi16&expand=5451)
8211#[inline]
8212#[target_feature(enable = "avx512bw,avx512vl")]
8213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8214#[cfg_attr(test, assert_instr(vpsravw))]
8215#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8216pub const fn _mm256_mask_srav_epi16(
8217    src: __m256i,
8218    k: __mmask16,
8219    a: __m256i,
8220    count: __m256i,
8221) -> __m256i {
8222    unsafe {
8223        let shf = _mm256_srav_epi16(a, count).as_i16x16();
8224        transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
8225    }
8226}
8227
8228/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8229///
8230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi16&expand=5452)
8231#[inline]
8232#[target_feature(enable = "avx512bw,avx512vl")]
8233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8234#[cfg_attr(test, assert_instr(vpsravw))]
8235#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8236pub const fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
8237    unsafe {
8238        let shf = _mm256_srav_epi16(a, count).as_i16x16();
8239        transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
8240    }
8241}
8242
8243/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
8244///
8245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi16&expand=5450)
8246#[inline]
8247#[target_feature(enable = "avx512bw,avx512vl")]
8248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8249#[cfg_attr(test, assert_instr(vpsravw))]
8250#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8251pub const fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
8252    unsafe {
8253        let count = count.as_u16x8();
8254        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
8255        let count = simd_select(no_overflow, transmute(count), i16x8::splat(15));
8256        simd_shr(a.as_i16x8(), count).as_m128i()
8257    }
8258}
8259
8260/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8261///
8262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi16&expand=5448)
8263#[inline]
8264#[target_feature(enable = "avx512bw,avx512vl")]
8265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8266#[cfg_attr(test, assert_instr(vpsravw))]
8267#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8268pub const fn _mm_mask_srav_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
8269    unsafe {
8270        let shf = _mm_srav_epi16(a, count).as_i16x8();
8271        transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
8272    }
8273}
8274
8275/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8276///
8277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi16&expand=5449)
8278#[inline]
8279#[target_feature(enable = "avx512bw,avx512vl")]
8280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8281#[cfg_attr(test, assert_instr(vpsravw))]
8282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8283pub const fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
8284    unsafe {
8285        let shf = _mm_srav_epi16(a, count).as_i16x8();
8286        transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
8287    }
8288}
8289
8290/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8291///
8292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi16&expand=4226)
8293#[inline]
8294#[target_feature(enable = "avx512bw")]
8295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8296#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8297pub fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
8298    unsafe { transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32())) }
8299}
8300
8301/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8302///
8303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi16&expand=4223)
8304#[inline]
8305#[target_feature(enable = "avx512bw")]
8306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8307#[cfg_attr(test, assert_instr(vpermt2w))]
8308pub fn _mm512_mask_permutex2var_epi16(
8309    a: __m512i,
8310    k: __mmask32,
8311    idx: __m512i,
8312    b: __m512i,
8313) -> __m512i {
8314    unsafe {
8315        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
8316        transmute(simd_select_bitmask(k, permute, a.as_i16x32()))
8317    }
8318}
8319
8320/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8321///
8322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi16&expand=4225)
8323#[inline]
8324#[target_feature(enable = "avx512bw")]
8325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8326#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8327pub fn _mm512_maskz_permutex2var_epi16(
8328    k: __mmask32,
8329    a: __m512i,
8330    idx: __m512i,
8331    b: __m512i,
8332) -> __m512i {
8333    unsafe {
8334        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
8335        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
8336    }
8337}
8338
8339/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8340///
8341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi16&expand=4224)
8342#[inline]
8343#[target_feature(enable = "avx512bw")]
8344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8345#[cfg_attr(test, assert_instr(vpermi2w))]
8346pub fn _mm512_mask2_permutex2var_epi16(
8347    a: __m512i,
8348    idx: __m512i,
8349    k: __mmask32,
8350    b: __m512i,
8351) -> __m512i {
8352    unsafe {
8353        let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
8354        transmute(simd_select_bitmask(k, permute, idx.as_i16x32()))
8355    }
8356}
8357
8358/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8359///
8360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi16&expand=4222)
8361#[inline]
8362#[target_feature(enable = "avx512bw,avx512vl")]
8363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8364#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8365pub fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
8366    unsafe { transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16())) }
8367}
8368
8369/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8370///
8371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi16&expand=4219)
8372#[inline]
8373#[target_feature(enable = "avx512bw,avx512vl")]
8374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8375#[cfg_attr(test, assert_instr(vpermt2w))]
8376pub fn _mm256_mask_permutex2var_epi16(
8377    a: __m256i,
8378    k: __mmask16,
8379    idx: __m256i,
8380    b: __m256i,
8381) -> __m256i {
8382    unsafe {
8383        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
8384        transmute(simd_select_bitmask(k, permute, a.as_i16x16()))
8385    }
8386}
8387
8388/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8389///
8390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi16&expand=4221)
8391#[inline]
8392#[target_feature(enable = "avx512bw,avx512vl")]
8393#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8394#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8395pub fn _mm256_maskz_permutex2var_epi16(
8396    k: __mmask16,
8397    a: __m256i,
8398    idx: __m256i,
8399    b: __m256i,
8400) -> __m256i {
8401    unsafe {
8402        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
8403        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
8404    }
8405}
8406
8407/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8408///
8409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi16&expand=4220)
8410#[inline]
8411#[target_feature(enable = "avx512bw,avx512vl")]
8412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8413#[cfg_attr(test, assert_instr(vpermi2w))]
8414pub fn _mm256_mask2_permutex2var_epi16(
8415    a: __m256i,
8416    idx: __m256i,
8417    k: __mmask16,
8418    b: __m256i,
8419) -> __m256i {
8420    unsafe {
8421        let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
8422        transmute(simd_select_bitmask(k, permute, idx.as_i16x16()))
8423    }
8424}
8425
8426/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8427///
8428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi16&expand=4218)
8429#[inline]
8430#[target_feature(enable = "avx512bw,avx512vl")]
8431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8432#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8433pub fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
8434    unsafe { transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8())) }
8435}
8436
8437/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8438///
8439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi16&expand=4215)
8440#[inline]
8441#[target_feature(enable = "avx512bw,avx512vl")]
8442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8443#[cfg_attr(test, assert_instr(vpermt2w))]
8444pub fn _mm_mask_permutex2var_epi16(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
8445    unsafe {
8446        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
8447        transmute(simd_select_bitmask(k, permute, a.as_i16x8()))
8448    }
8449}
8450
8451/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8452///
8453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi16&expand=4217)
8454#[inline]
8455#[target_feature(enable = "avx512bw,avx512vl")]
8456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8457#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
8458pub fn _mm_maskz_permutex2var_epi16(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
8459    unsafe {
8460        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
8461        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
8462    }
8463}
8464
8465/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8466///
8467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi16&expand=4216)
8468#[inline]
8469#[target_feature(enable = "avx512bw,avx512vl")]
8470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8471#[cfg_attr(test, assert_instr(vpermi2w))]
8472pub fn _mm_mask2_permutex2var_epi16(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
8473    unsafe {
8474        let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
8475        transmute(simd_select_bitmask(k, permute, idx.as_i16x8()))
8476    }
8477}
8478
8479/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8480///
8481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi16&expand=4295)
8482#[inline]
8483#[target_feature(enable = "avx512bw")]
8484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8485#[cfg_attr(test, assert_instr(vpermw))]
8486pub fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i {
8487    unsafe { transmute(vpermw(a.as_i16x32(), idx.as_i16x32())) }
8488}
8489
8490/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8491///
8492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi16&expand=4293)
8493#[inline]
8494#[target_feature(enable = "avx512bw")]
8495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8496#[cfg_attr(test, assert_instr(vpermw))]
8497pub fn _mm512_mask_permutexvar_epi16(
8498    src: __m512i,
8499    k: __mmask32,
8500    idx: __m512i,
8501    a: __m512i,
8502) -> __m512i {
8503    unsafe {
8504        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
8505        transmute(simd_select_bitmask(k, permute, src.as_i16x32()))
8506    }
8507}
8508
8509/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8510///
8511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi16&expand=4294)
8512#[inline]
8513#[target_feature(enable = "avx512bw")]
8514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8515#[cfg_attr(test, assert_instr(vpermw))]
8516pub fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i {
8517    unsafe {
8518        let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
8519        transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
8520    }
8521}
8522
8523/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8524///
8525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi16&expand=4292)
8526#[inline]
8527#[target_feature(enable = "avx512bw,avx512vl")]
8528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8529#[cfg_attr(test, assert_instr(vpermw))]
8530pub fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i {
8531    unsafe { transmute(vpermw256(a.as_i16x16(), idx.as_i16x16())) }
8532}
8533
8534/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8535///
8536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi16&expand=4290)
8537#[inline]
8538#[target_feature(enable = "avx512bw,avx512vl")]
8539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8540#[cfg_attr(test, assert_instr(vpermw))]
8541pub fn _mm256_mask_permutexvar_epi16(
8542    src: __m256i,
8543    k: __mmask16,
8544    idx: __m256i,
8545    a: __m256i,
8546) -> __m256i {
8547    unsafe {
8548        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
8549        transmute(simd_select_bitmask(k, permute, src.as_i16x16()))
8550    }
8551}
8552
8553/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8554///
8555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi16&expand=4291)
8556#[inline]
8557#[target_feature(enable = "avx512bw,avx512vl")]
8558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8559#[cfg_attr(test, assert_instr(vpermw))]
8560pub fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i {
8561    unsafe {
8562        let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
8563        transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
8564    }
8565}
8566
8567/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8568///
8569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi16&expand=4289)
8570#[inline]
8571#[target_feature(enable = "avx512bw,avx512vl")]
8572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8573#[cfg_attr(test, assert_instr(vpermw))]
8574pub fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i {
8575    unsafe { transmute(vpermw128(a.as_i16x8(), idx.as_i16x8())) }
8576}
8577
8578/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8579///
8580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi16&expand=4287)
8581#[inline]
8582#[target_feature(enable = "avx512bw,avx512vl")]
8583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8584#[cfg_attr(test, assert_instr(vpermw))]
8585pub fn _mm_mask_permutexvar_epi16(src: __m128i, k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
8586    unsafe {
8587        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
8588        transmute(simd_select_bitmask(k, permute, src.as_i16x8()))
8589    }
8590}
8591
8592/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8593///
8594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi16&expand=4288)
8595#[inline]
8596#[target_feature(enable = "avx512bw,avx512vl")]
8597#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8598#[cfg_attr(test, assert_instr(vpermw))]
8599pub fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
8600    unsafe {
8601        let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
8602        transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
8603    }
8604}
8605
8606/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8607///
8608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi16&expand=430)
8609#[inline]
8610#[target_feature(enable = "avx512bw")]
8611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8612#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8613#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8614pub const fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8615    unsafe { transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32())) }
8616}
8617
8618/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8619///
8620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi16&expand=429)
8621#[inline]
8622#[target_feature(enable = "avx512bw,avx512vl")]
8623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8624#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8625#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8626pub const fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8627    unsafe { transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16())) }
8628}
8629
8630/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
8631///
8632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi16&expand=427)
8633#[inline]
8634#[target_feature(enable = "avx512bw,avx512vl")]
8635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8636#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
8637#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8638pub const fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8639    unsafe { transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8())) }
8640}
8641
8642/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8643///
8644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi8&expand=441)
8645#[inline]
8646#[target_feature(enable = "avx512bw")]
8647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8648#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8649#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8650pub const fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8651    unsafe { transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64())) }
8652}
8653
8654/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8655///
8656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi8&expand=440)
8657#[inline]
8658#[target_feature(enable = "avx512bw,avx512vl")]
8659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8660#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8661#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8662pub const fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8663    unsafe { transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32())) }
8664}
8665
8666/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
8667///
8668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi8&expand=439)
8669#[inline]
8670#[target_feature(enable = "avx512bw,avx512vl")]
8671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8672#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
8673#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8674pub const fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8675    unsafe { transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16())) }
8676}
8677
8678/// Broadcast the low packed 16-bit integer from a to all elements of dst.
8679///
8680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastw_epi16&expand=587)
8681#[inline]
8682#[target_feature(enable = "avx512bw")]
8683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8684#[cfg_attr(test, assert_instr(vpbroadcastw))]
8685#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8686pub const fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
8687    unsafe {
8688        let a = _mm512_castsi128_si512(a).as_i16x32();
8689        let ret: i16x32 = simd_shuffle!(
8690            a,
8691            a,
8692            [
8693                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8694                0, 0, 0, 0,
8695            ],
8696        );
8697        transmute(ret)
8698    }
8699}
8700
8701/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8702///
8703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastw_epi16&expand=588)
8704#[inline]
8705#[target_feature(enable = "avx512bw")]
8706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8707#[cfg_attr(test, assert_instr(vpbroadcastw))]
8708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8709pub const fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
8710    unsafe {
8711        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8712        transmute(simd_select_bitmask(k, broadcast, src.as_i16x32()))
8713    }
8714}
8715
8716/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8717///
8718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastw_epi16&expand=589)
8719#[inline]
8720#[target_feature(enable = "avx512bw")]
8721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8722#[cfg_attr(test, assert_instr(vpbroadcastw))]
8723#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8724pub const fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
8725    unsafe {
8726        let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
8727        transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO))
8728    }
8729}
8730
8731/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8732///
8733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastw_epi16&expand=585)
8734#[inline]
8735#[target_feature(enable = "avx512bw,avx512vl")]
8736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8737#[cfg_attr(test, assert_instr(vpbroadcastw))]
8738#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8739pub const fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
8740    unsafe {
8741        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8742        transmute(simd_select_bitmask(k, broadcast, src.as_i16x16()))
8743    }
8744}
8745
8746/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8747///
8748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastw_epi16&expand=586)
8749#[inline]
8750#[target_feature(enable = "avx512bw,avx512vl")]
8751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8752#[cfg_attr(test, assert_instr(vpbroadcastw))]
8753#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8754pub const fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
8755    unsafe {
8756        let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
8757        transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO))
8758    }
8759}
8760
8761/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8762///
8763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastw_epi16&expand=582)
8764#[inline]
8765#[target_feature(enable = "avx512bw,avx512vl")]
8766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8767#[cfg_attr(test, assert_instr(vpbroadcastw))]
8768#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8769pub const fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8770    unsafe {
8771        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8772        transmute(simd_select_bitmask(k, broadcast, src.as_i16x8()))
8773    }
8774}
8775
8776/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8777///
8778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastw_epi16&expand=583)
8779#[inline]
8780#[target_feature(enable = "avx512bw,avx512vl")]
8781#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8782#[cfg_attr(test, assert_instr(vpbroadcastw))]
8783#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8784pub const fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
8785    unsafe {
8786        let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
8787        transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO))
8788    }
8789}
8790
8791/// Broadcast the low packed 8-bit integer from a to all elements of dst.
8792///
8793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastb_epi8&expand=536)
8794#[inline]
8795#[target_feature(enable = "avx512bw")]
8796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8797#[cfg_attr(test, assert_instr(vpbroadcastb))]
8798#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8799pub const fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
8800    unsafe {
8801        let a = _mm512_castsi128_si512(a).as_i8x64();
8802        let ret: i8x64 = simd_shuffle!(
8803            a,
8804            a,
8805            [
8806                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8807                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8808                0, 0, 0, 0, 0, 0, 0, 0,
8809            ],
8810        );
8811        transmute(ret)
8812    }
8813}
8814
8815/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8816///
8817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastb_epi8&expand=537)
8818#[inline]
8819#[target_feature(enable = "avx512bw")]
8820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8821#[cfg_attr(test, assert_instr(vpbroadcastb))]
8822#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8823pub const fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
8824    unsafe {
8825        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8826        transmute(simd_select_bitmask(k, broadcast, src.as_i8x64()))
8827    }
8828}
8829
8830/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8831///
8832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastb_epi8&expand=538)
8833#[inline]
8834#[target_feature(enable = "avx512bw")]
8835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8836#[cfg_attr(test, assert_instr(vpbroadcastb))]
8837#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8838pub const fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
8839    unsafe {
8840        let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
8841        transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO))
8842    }
8843}
8844
8845/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8846///
8847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastb_epi8&expand=534)
8848#[inline]
8849#[target_feature(enable = "avx512bw,avx512vl")]
8850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8851#[cfg_attr(test, assert_instr(vpbroadcastb))]
8852#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8853pub const fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i {
8854    unsafe {
8855        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8856        transmute(simd_select_bitmask(k, broadcast, src.as_i8x32()))
8857    }
8858}
8859
8860/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8861///
8862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastb_epi8&expand=535)
8863#[inline]
8864#[target_feature(enable = "avx512bw,avx512vl")]
8865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8866#[cfg_attr(test, assert_instr(vpbroadcastb))]
8867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8868pub const fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
8869    unsafe {
8870        let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
8871        transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO))
8872    }
8873}
8874
8875/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8876///
8877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastb_epi8&expand=531)
8878#[inline]
8879#[target_feature(enable = "avx512bw,avx512vl")]
8880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8881#[cfg_attr(test, assert_instr(vpbroadcastb))]
8882#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8883pub const fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
8884    unsafe {
8885        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8886        transmute(simd_select_bitmask(k, broadcast, src.as_i8x16()))
8887    }
8888}
8889
8890/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8891///
8892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastb_epi8&expand=532)
8893#[inline]
8894#[target_feature(enable = "avx512bw,avx512vl")]
8895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8896#[cfg_attr(test, assert_instr(vpbroadcastb))]
8897#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8898pub const fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
8899    unsafe {
8900        let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
8901        transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO))
8902    }
8903}
8904
8905/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
8906///
8907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi16&expand=6012)
8908#[inline]
8909#[target_feature(enable = "avx512bw")]
8910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8911#[cfg_attr(test, assert_instr(vpunpckhwd))]
8912#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8913pub const fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
8914    unsafe {
8915        let a = a.as_i16x32();
8916        let b = b.as_i16x32();
8917        #[rustfmt::skip]
8918        let r: i16x32 = simd_shuffle!(
8919            a,
8920            b,
8921            [
8922                4, 32 + 4, 5, 32 + 5,
8923                6, 32 + 6, 7, 32 + 7,
8924                12, 32 + 12, 13, 32 + 13,
8925                14, 32 + 14, 15, 32 + 15,
8926                20, 32 + 20, 21, 32 + 21,
8927                22, 32 + 22, 23, 32 + 23,
8928                28, 32 + 28, 29, 32 + 29,
8929                30, 32 + 30, 31, 32 + 31,
8930            ],
8931        );
8932        transmute(r)
8933    }
8934}
8935
8936/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8937///
8938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi16&expand=6010)
8939#[inline]
8940#[target_feature(enable = "avx512bw")]
8941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8942#[cfg_attr(test, assert_instr(vpunpckhwd))]
8943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8944pub const fn _mm512_mask_unpackhi_epi16(
8945    src: __m512i,
8946    k: __mmask32,
8947    a: __m512i,
8948    b: __m512i,
8949) -> __m512i {
8950    unsafe {
8951        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8952        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32()))
8953    }
8954}
8955
8956/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8957///
8958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi16&expand=6011)
8959#[inline]
8960#[target_feature(enable = "avx512bw")]
8961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8962#[cfg_attr(test, assert_instr(vpunpckhwd))]
8963#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8964pub const fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8965    unsafe {
8966        let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
8967        transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO))
8968    }
8969}
8970
8971/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8972///
8973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi16&expand=6007)
8974#[inline]
8975#[target_feature(enable = "avx512bw,avx512vl")]
8976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8977#[cfg_attr(test, assert_instr(vpunpckhwd))]
8978#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8979pub const fn _mm256_mask_unpackhi_epi16(
8980    src: __m256i,
8981    k: __mmask16,
8982    a: __m256i,
8983    b: __m256i,
8984) -> __m256i {
8985    unsafe {
8986        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
8987        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16()))
8988    }
8989}
8990
8991/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8992///
8993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi16&expand=6008)
8994#[inline]
8995#[target_feature(enable = "avx512bw,avx512vl")]
8996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8997#[cfg_attr(test, assert_instr(vpunpckhwd))]
8998#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
8999pub const fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
9000    unsafe {
9001        let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
9002        transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO))
9003    }
9004}
9005
9006/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9007///
9008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi16&expand=6004)
9009#[inline]
9010#[target_feature(enable = "avx512bw,avx512vl")]
9011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9012#[cfg_attr(test, assert_instr(vpunpckhwd))]
9013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9014pub const fn _mm_mask_unpackhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9015    unsafe {
9016        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
9017        transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8()))
9018    }
9019}
9020
9021/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9022///
9023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi16&expand=6005)
9024#[inline]
9025#[target_feature(enable = "avx512bw,avx512vl")]
9026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9027#[cfg_attr(test, assert_instr(vpunpckhwd))]
9028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9029pub const fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9030    unsafe {
9031        let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
9032        transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO))
9033    }
9034}
9035
9036/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
9037///
9038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi8&expand=6039)
9039#[inline]
9040#[target_feature(enable = "avx512bw")]
9041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9042#[cfg_attr(test, assert_instr(vpunpckhbw))]
9043#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9044pub const fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
9045    unsafe {
9046        let a = a.as_i8x64();
9047        let b = b.as_i8x64();
9048        #[rustfmt::skip]
9049        let r: i8x64 = simd_shuffle!(
9050            a,
9051            b,
9052            [
9053                8, 64 + 8, 9, 64 + 9,
9054                10, 64 + 10, 11, 64 + 11,
9055                12, 64 + 12, 13, 64 + 13,
9056                14, 64 + 14, 15, 64 + 15,
9057                24, 64 + 24, 25, 64 + 25,
9058                26, 64 + 26, 27, 64 + 27,
9059                28, 64 + 28, 29, 64 + 29,
9060                30, 64 + 30, 31, 64 + 31,
9061                40, 64 + 40, 41, 64 + 41,
9062                42, 64 + 42, 43, 64 + 43,
9063                44, 64 + 44, 45, 64 + 45,
9064                46, 64 + 46, 47, 64 + 47,
9065                56, 64 + 56, 57, 64 + 57,
9066                58, 64 + 58, 59, 64 + 59,
9067                60, 64 + 60, 61, 64 + 61,
9068                62, 64 + 62, 63, 64 + 63,
9069            ],
9070        );
9071        transmute(r)
9072    }
9073}
9074
9075/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9076///
9077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi8&expand=6037)
9078#[inline]
9079#[target_feature(enable = "avx512bw")]
9080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9081#[cfg_attr(test, assert_instr(vpunpckhbw))]
9082#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9083pub const fn _mm512_mask_unpackhi_epi8(
9084    src: __m512i,
9085    k: __mmask64,
9086    a: __m512i,
9087    b: __m512i,
9088) -> __m512i {
9089    unsafe {
9090        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
9091        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64()))
9092    }
9093}
9094
9095/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9096///
9097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi8&expand=6038)
9098#[inline]
9099#[target_feature(enable = "avx512bw")]
9100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9101#[cfg_attr(test, assert_instr(vpunpckhbw))]
9102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9103pub const fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9104    unsafe {
9105        let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
9106        transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO))
9107    }
9108}
9109
9110/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9111///
9112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi8&expand=6034)
9113#[inline]
9114#[target_feature(enable = "avx512bw,avx512vl")]
9115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9116#[cfg_attr(test, assert_instr(vpunpckhbw))]
9117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9118pub const fn _mm256_mask_unpackhi_epi8(
9119    src: __m256i,
9120    k: __mmask32,
9121    a: __m256i,
9122    b: __m256i,
9123) -> __m256i {
9124    unsafe {
9125        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
9126        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32()))
9127    }
9128}
9129
9130/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9131///
9132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi8&expand=6035)
9133#[inline]
9134#[target_feature(enable = "avx512bw,avx512vl")]
9135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9136#[cfg_attr(test, assert_instr(vpunpckhbw))]
9137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9138pub const fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9139    unsafe {
9140        let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
9141        transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO))
9142    }
9143}
9144
9145/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9146///
9147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi8&expand=6031)
9148#[inline]
9149#[target_feature(enable = "avx512bw,avx512vl")]
9150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9151#[cfg_attr(test, assert_instr(vpunpckhbw))]
9152#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9153pub const fn _mm_mask_unpackhi_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9154    unsafe {
9155        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
9156        transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16()))
9157    }
9158}
9159
9160/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9161///
9162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi8&expand=6032)
9163#[inline]
9164#[target_feature(enable = "avx512bw,avx512vl")]
9165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9166#[cfg_attr(test, assert_instr(vpunpckhbw))]
9167#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9168pub const fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9169    unsafe {
9170        let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
9171        transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO))
9172    }
9173}
9174
9175/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
9176///
9177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi16&expand=6069)
9178#[inline]
9179#[target_feature(enable = "avx512bw")]
9180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9181#[cfg_attr(test, assert_instr(vpunpcklwd))]
9182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9183pub const fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
9184    unsafe {
9185        let a = a.as_i16x32();
9186        let b = b.as_i16x32();
9187        #[rustfmt::skip]
9188        let r: i16x32 = simd_shuffle!(
9189            a,
9190            b,
9191            [
9192               0,  32+0,   1, 32+1,
9193               2,  32+2,   3, 32+3,
9194               8,  32+8,   9, 32+9,
9195               10, 32+10, 11, 32+11,
9196               16, 32+16, 17, 32+17,
9197               18, 32+18, 19, 32+19,
9198               24, 32+24, 25, 32+25,
9199               26, 32+26, 27, 32+27
9200            ],
9201        );
9202        transmute(r)
9203    }
9204}
9205
9206/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9207///
9208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi16&expand=6067)
9209#[inline]
9210#[target_feature(enable = "avx512bw")]
9211#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9212#[cfg_attr(test, assert_instr(vpunpcklwd))]
9213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9214pub const fn _mm512_mask_unpacklo_epi16(
9215    src: __m512i,
9216    k: __mmask32,
9217    a: __m512i,
9218    b: __m512i,
9219) -> __m512i {
9220    unsafe {
9221        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
9222        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32()))
9223    }
9224}
9225
9226/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9227///
9228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi16&expand=6068)
9229#[inline]
9230#[target_feature(enable = "avx512bw")]
9231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9232#[cfg_attr(test, assert_instr(vpunpcklwd))]
9233#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9234pub const fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
9235    unsafe {
9236        let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
9237        transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO))
9238    }
9239}
9240
9241/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9242///
9243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi16&expand=6064)
9244#[inline]
9245#[target_feature(enable = "avx512bw,avx512vl")]
9246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9247#[cfg_attr(test, assert_instr(vpunpcklwd))]
9248#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9249pub const fn _mm256_mask_unpacklo_epi16(
9250    src: __m256i,
9251    k: __mmask16,
9252    a: __m256i,
9253    b: __m256i,
9254) -> __m256i {
9255    unsafe {
9256        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
9257        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16()))
9258    }
9259}
9260
9261/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9262///
9263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi16&expand=6065)
9264#[inline]
9265#[target_feature(enable = "avx512bw,avx512vl")]
9266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9267#[cfg_attr(test, assert_instr(vpunpcklwd))]
9268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9269pub const fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
9270    unsafe {
9271        let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
9272        transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO))
9273    }
9274}
9275
9276/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9277///
9278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi16&expand=6061)
9279#[inline]
9280#[target_feature(enable = "avx512bw,avx512vl")]
9281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9282#[cfg_attr(test, assert_instr(vpunpcklwd))]
9283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9284pub const fn _mm_mask_unpacklo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9285    unsafe {
9286        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
9287        transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8()))
9288    }
9289}
9290
9291/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9292///
9293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi16&expand=6062)
9294#[inline]
9295#[target_feature(enable = "avx512bw,avx512vl")]
9296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9297#[cfg_attr(test, assert_instr(vpunpcklwd))]
9298#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9299pub const fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
9300    unsafe {
9301        let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
9302        transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO))
9303    }
9304}
9305
9306/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
9307///
9308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi8&expand=6096)
9309#[inline]
9310#[target_feature(enable = "avx512bw")]
9311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9312#[cfg_attr(test, assert_instr(vpunpcklbw))]
9313#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9314pub const fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
9315    unsafe {
9316        let a = a.as_i8x64();
9317        let b = b.as_i8x64();
9318        #[rustfmt::skip]
9319        let r: i8x64 = simd_shuffle!(
9320            a,
9321            b,
9322            [
9323                0,  64+0,   1, 64+1,
9324                2,  64+2,   3, 64+3,
9325                4,  64+4,   5, 64+5,
9326                6,  64+6,   7, 64+7,
9327                16, 64+16, 17, 64+17,
9328                18, 64+18, 19, 64+19,
9329                20, 64+20, 21, 64+21,
9330                22, 64+22, 23, 64+23,
9331                32, 64+32, 33, 64+33,
9332                34, 64+34, 35, 64+35,
9333                36, 64+36, 37, 64+37,
9334                38, 64+38, 39, 64+39,
9335                48, 64+48, 49, 64+49,
9336                50, 64+50, 51, 64+51,
9337                52, 64+52, 53, 64+53,
9338                54, 64+54, 55, 64+55,
9339            ],
9340        );
9341        transmute(r)
9342    }
9343}
9344
9345/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9346///
9347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi8&expand=6094)
9348#[inline]
9349#[target_feature(enable = "avx512bw")]
9350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9351#[cfg_attr(test, assert_instr(vpunpcklbw))]
9352#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9353pub const fn _mm512_mask_unpacklo_epi8(
9354    src: __m512i,
9355    k: __mmask64,
9356    a: __m512i,
9357    b: __m512i,
9358) -> __m512i {
9359    unsafe {
9360        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
9361        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64()))
9362    }
9363}
9364
9365/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9366///
9367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi8&expand=6095)
9368#[inline]
9369#[target_feature(enable = "avx512bw")]
9370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9371#[cfg_attr(test, assert_instr(vpunpcklbw))]
9372#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9373pub const fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
9374    unsafe {
9375        let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
9376        transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO))
9377    }
9378}
9379
9380/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9381///
9382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi8&expand=6091)
9383#[inline]
9384#[target_feature(enable = "avx512bw,avx512vl")]
9385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9386#[cfg_attr(test, assert_instr(vpunpcklbw))]
9387#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9388pub const fn _mm256_mask_unpacklo_epi8(
9389    src: __m256i,
9390    k: __mmask32,
9391    a: __m256i,
9392    b: __m256i,
9393) -> __m256i {
9394    unsafe {
9395        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
9396        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32()))
9397    }
9398}
9399
9400/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9401///
9402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi8&expand=6092)
9403#[inline]
9404#[target_feature(enable = "avx512bw,avx512vl")]
9405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9406#[cfg_attr(test, assert_instr(vpunpcklbw))]
9407#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9408pub const fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
9409    unsafe {
9410        let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
9411        transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO))
9412    }
9413}
9414
9415/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9416///
9417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi8&expand=6088)
9418#[inline]
9419#[target_feature(enable = "avx512bw,avx512vl")]
9420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9421#[cfg_attr(test, assert_instr(vpunpcklbw))]
9422#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9423pub const fn _mm_mask_unpacklo_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9424    unsafe {
9425        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
9426        transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16()))
9427    }
9428}
9429
9430/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9431///
9432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi8&expand=6089)
9433#[inline]
9434#[target_feature(enable = "avx512bw,avx512vl")]
9435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9436#[cfg_attr(test, assert_instr(vpunpcklbw))]
9437#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9438pub const fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
9439    unsafe {
9440        let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
9441        transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO))
9442    }
9443}
9444
9445/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9446///
9447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi16&expand=3795)
9448#[inline]
9449#[target_feature(enable = "avx512bw")]
9450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9451#[cfg_attr(test, assert_instr(vmovdqu16))]
9452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9453pub const fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
9454    unsafe {
9455        let mov = a.as_i16x32();
9456        transmute(simd_select_bitmask(k, mov, src.as_i16x32()))
9457    }
9458}
9459
9460/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9461///
9462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi16&expand=3796)
9463#[inline]
9464#[target_feature(enable = "avx512bw")]
9465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9466#[cfg_attr(test, assert_instr(vmovdqu16))]
9467#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9468pub const fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
9469    unsafe {
9470        let mov = a.as_i16x32();
9471        transmute(simd_select_bitmask(k, mov, i16x32::ZERO))
9472    }
9473}
9474
9475/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9476///
9477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi16&expand=3793)
9478#[inline]
9479#[target_feature(enable = "avx512bw,avx512vl")]
9480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9481#[cfg_attr(test, assert_instr(vmovdqu16))]
9482#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9483pub const fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
9484    unsafe {
9485        let mov = a.as_i16x16();
9486        transmute(simd_select_bitmask(k, mov, src.as_i16x16()))
9487    }
9488}
9489
9490/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9491///
9492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi16&expand=3794)
9493#[inline]
9494#[target_feature(enable = "avx512bw,avx512vl")]
9495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9496#[cfg_attr(test, assert_instr(vmovdqu16))]
9497#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9498pub const fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
9499    unsafe {
9500        let mov = a.as_i16x16();
9501        transmute(simd_select_bitmask(k, mov, i16x16::ZERO))
9502    }
9503}
9504
9505/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9506///
9507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi16&expand=3791)
9508#[inline]
9509#[target_feature(enable = "avx512bw,avx512vl")]
9510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9511#[cfg_attr(test, assert_instr(vmovdqu16))]
9512#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9513pub const fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9514    unsafe {
9515        let mov = a.as_i16x8();
9516        transmute(simd_select_bitmask(k, mov, src.as_i16x8()))
9517    }
9518}
9519
9520/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9521///
9522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi16&expand=3792)
9523#[inline]
9524#[target_feature(enable = "avx512bw,avx512vl")]
9525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9526#[cfg_attr(test, assert_instr(vmovdqu16))]
9527#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9528pub const fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
9529    unsafe {
9530        let mov = a.as_i16x8();
9531        transmute(simd_select_bitmask(k, mov, i16x8::ZERO))
9532    }
9533}
9534
9535/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9536///
9537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi8&expand=3813)
9538#[inline]
9539#[target_feature(enable = "avx512bw")]
9540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9541#[cfg_attr(test, assert_instr(vmovdqu8))]
9542#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9543pub const fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
9544    unsafe {
9545        let mov = a.as_i8x64();
9546        transmute(simd_select_bitmask(k, mov, src.as_i8x64()))
9547    }
9548}
9549
9550/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9551///
9552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi8&expand=3814)
9553#[inline]
9554#[target_feature(enable = "avx512bw")]
9555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9556#[cfg_attr(test, assert_instr(vmovdqu8))]
9557#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9558pub const fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
9559    unsafe {
9560        let mov = a.as_i8x64();
9561        transmute(simd_select_bitmask(k, mov, i8x64::ZERO))
9562    }
9563}
9564
9565/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9566///
9567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi8&expand=3811)
9568#[inline]
9569#[target_feature(enable = "avx512bw,avx512vl")]
9570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9571#[cfg_attr(test, assert_instr(vmovdqu8))]
9572#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9573pub const fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
9574    unsafe {
9575        let mov = a.as_i8x32();
9576        transmute(simd_select_bitmask(k, mov, src.as_i8x32()))
9577    }
9578}
9579
9580/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9581///
9582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi8&expand=3812)
9583#[inline]
9584#[target_feature(enable = "avx512bw,avx512vl")]
9585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9586#[cfg_attr(test, assert_instr(vmovdqu8))]
9587#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9588pub const fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
9589    unsafe {
9590        let mov = a.as_i8x32();
9591        transmute(simd_select_bitmask(k, mov, i8x32::ZERO))
9592    }
9593}
9594
9595/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9596///
9597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi8&expand=3809)
9598#[inline]
9599#[target_feature(enable = "avx512bw,avx512vl")]
9600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9601#[cfg_attr(test, assert_instr(vmovdqu8))]
9602#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9603pub const fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
9604    unsafe {
9605        let mov = a.as_i8x16();
9606        transmute(simd_select_bitmask(k, mov, src.as_i8x16()))
9607    }
9608}
9609
9610/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9611///
9612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi8&expand=3810)
9613#[inline]
9614#[target_feature(enable = "avx512bw,avx512vl")]
9615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9616#[cfg_attr(test, assert_instr(vmovdqu8))]
9617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9618pub const fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
9619    unsafe {
9620        let mov = a.as_i8x16();
9621        transmute(simd_select_bitmask(k, mov, i8x16::ZERO))
9622    }
9623}
9624
9625/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9626///
9627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi16&expand=4942)
9628#[inline]
9629#[target_feature(enable = "avx512bw")]
9630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9631#[cfg_attr(test, assert_instr(vpbroadcastw))]
9632#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9633pub const fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
9634    unsafe {
9635        let r = _mm512_set1_epi16(a).as_i16x32();
9636        transmute(simd_select_bitmask(k, r, src.as_i16x32()))
9637    }
9638}
9639
9640/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9641///
9642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi16&expand=4943)
9643#[inline]
9644#[target_feature(enable = "avx512bw")]
9645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9646#[cfg_attr(test, assert_instr(vpbroadcastw))]
9647#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9648pub const fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
9649    unsafe {
9650        let r = _mm512_set1_epi16(a).as_i16x32();
9651        transmute(simd_select_bitmask(k, r, i16x32::ZERO))
9652    }
9653}
9654
9655/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9656///
9657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi16&expand=4939)
9658#[inline]
9659#[target_feature(enable = "avx512bw,avx512vl")]
9660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9661#[cfg_attr(test, assert_instr(vpbroadcastw))]
9662#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9663pub const fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
9664    unsafe {
9665        let r = _mm256_set1_epi16(a).as_i16x16();
9666        transmute(simd_select_bitmask(k, r, src.as_i16x16()))
9667    }
9668}
9669
9670/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9671///
9672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi16&expand=4940)
9673#[inline]
9674#[target_feature(enable = "avx512bw,avx512vl")]
9675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9676#[cfg_attr(test, assert_instr(vpbroadcastw))]
9677#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9678pub const fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
9679    unsafe {
9680        let r = _mm256_set1_epi16(a).as_i16x16();
9681        transmute(simd_select_bitmask(k, r, i16x16::ZERO))
9682    }
9683}
9684
9685/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9686///
9687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi16&expand=4936)
9688#[inline]
9689#[target_feature(enable = "avx512bw,avx512vl")]
9690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9691#[cfg_attr(test, assert_instr(vpbroadcastw))]
9692#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9693pub const fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
9694    unsafe {
9695        let r = _mm_set1_epi16(a).as_i16x8();
9696        transmute(simd_select_bitmask(k, r, src.as_i16x8()))
9697    }
9698}
9699
9700/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9701///
9702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi16&expand=4937)
9703#[inline]
9704#[target_feature(enable = "avx512bw,avx512vl")]
9705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9706#[cfg_attr(test, assert_instr(vpbroadcastw))]
9707#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9708pub const fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
9709    unsafe {
9710        let r = _mm_set1_epi16(a).as_i16x8();
9711        transmute(simd_select_bitmask(k, r, i16x8::ZERO))
9712    }
9713}
9714
9715/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9716///
9717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi8&expand=4970)
9718#[inline]
9719#[target_feature(enable = "avx512bw")]
9720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9721#[cfg_attr(test, assert_instr(vpbroadcast))]
9722#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9723pub const fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
9724    unsafe {
9725        let r = _mm512_set1_epi8(a).as_i8x64();
9726        transmute(simd_select_bitmask(k, r, src.as_i8x64()))
9727    }
9728}
9729
9730/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9731///
9732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi8&expand=4971)
9733#[inline]
9734#[target_feature(enable = "avx512bw")]
9735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9736#[cfg_attr(test, assert_instr(vpbroadcast))]
9737#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9738pub const fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
9739    unsafe {
9740        let r = _mm512_set1_epi8(a).as_i8x64();
9741        transmute(simd_select_bitmask(k, r, i8x64::ZERO))
9742    }
9743}
9744
9745/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9746///
9747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi8&expand=4967)
9748#[inline]
9749#[target_feature(enable = "avx512bw,avx512vl")]
9750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9751#[cfg_attr(test, assert_instr(vpbroadcast))]
9752#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9753pub const fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
9754    unsafe {
9755        let r = _mm256_set1_epi8(a).as_i8x32();
9756        transmute(simd_select_bitmask(k, r, src.as_i8x32()))
9757    }
9758}
9759
9760/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9761///
9762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi8&expand=4968)
9763#[inline]
9764#[target_feature(enable = "avx512bw,avx512vl")]
9765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9766#[cfg_attr(test, assert_instr(vpbroadcast))]
9767#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9768pub const fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
9769    unsafe {
9770        let r = _mm256_set1_epi8(a).as_i8x32();
9771        transmute(simd_select_bitmask(k, r, i8x32::ZERO))
9772    }
9773}
9774
9775/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9776///
9777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi8&expand=4964)
9778#[inline]
9779#[target_feature(enable = "avx512bw,avx512vl")]
9780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9781#[cfg_attr(test, assert_instr(vpbroadcast))]
9782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9783pub const fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
9784    unsafe {
9785        let r = _mm_set1_epi8(a).as_i8x16();
9786        transmute(simd_select_bitmask(k, r, src.as_i8x16()))
9787    }
9788}
9789
9790/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9791///
9792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi8&expand=4965)
9793#[inline]
9794#[target_feature(enable = "avx512bw,avx512vl")]
9795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9796#[cfg_attr(test, assert_instr(vpbroadcast))]
9797#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9798pub const fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
9799    unsafe {
9800        let r = _mm_set1_epi8(a).as_i8x16();
9801        transmute(simd_select_bitmask(k, r, i8x16::ZERO))
9802    }
9803}
9804
9805/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst.
9806///
9807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflelo_epi16&expand=5221)
9808#[inline]
9809#[target_feature(enable = "avx512bw")]
9810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9811#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9812#[rustc_legacy_const_generics(1)]
9813#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9814pub const fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9815    unsafe {
9816        static_assert_uimm_bits!(IMM8, 8);
9817        let a = a.as_i16x32();
9818        let r: i16x32 = simd_shuffle!(
9819            a,
9820            a,
9821            [
9822                IMM8 as u32 & 0b11,
9823                (IMM8 as u32 >> 2) & 0b11,
9824                (IMM8 as u32 >> 4) & 0b11,
9825                (IMM8 as u32 >> 6) & 0b11,
9826                4,
9827                5,
9828                6,
9829                7,
9830                (IMM8 as u32 & 0b11) + 8,
9831                ((IMM8 as u32 >> 2) & 0b11) + 8,
9832                ((IMM8 as u32 >> 4) & 0b11) + 8,
9833                ((IMM8 as u32 >> 6) & 0b11) + 8,
9834                12,
9835                13,
9836                14,
9837                15,
9838                (IMM8 as u32 & 0b11) + 16,
9839                ((IMM8 as u32 >> 2) & 0b11) + 16,
9840                ((IMM8 as u32 >> 4) & 0b11) + 16,
9841                ((IMM8 as u32 >> 6) & 0b11) + 16,
9842                20,
9843                21,
9844                22,
9845                23,
9846                (IMM8 as u32 & 0b11) + 24,
9847                ((IMM8 as u32 >> 2) & 0b11) + 24,
9848                ((IMM8 as u32 >> 4) & 0b11) + 24,
9849                ((IMM8 as u32 >> 6) & 0b11) + 24,
9850                28,
9851                29,
9852                30,
9853                31,
9854            ],
9855        );
9856        transmute(r)
9857    }
9858}
9859
9860/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9861///
9862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflelo_epi16&expand=5219)
9863#[inline]
9864#[target_feature(enable = "avx512bw")]
9865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9866#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9867#[rustc_legacy_const_generics(3)]
9868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9869pub const fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
9870    src: __m512i,
9871    k: __mmask32,
9872    a: __m512i,
9873) -> __m512i {
9874    unsafe {
9875        static_assert_uimm_bits!(IMM8, 8);
9876        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9877        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
9878    }
9879}
9880
9881/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9882///
9883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflelo_epi16&expand=5220)
9884#[inline]
9885#[target_feature(enable = "avx512bw")]
9886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9887#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
9888#[rustc_legacy_const_generics(2)]
9889#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9890pub const fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
9891    unsafe {
9892        static_assert_uimm_bits!(IMM8, 8);
9893        let r = _mm512_shufflelo_epi16::<IMM8>(a);
9894        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
9895    }
9896}
9897
9898/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9899///
9900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflelo_epi16&expand=5216)
9901#[inline]
9902#[target_feature(enable = "avx512bw,avx512vl")]
9903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9904#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9905#[rustc_legacy_const_generics(3)]
9906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9907pub const fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
9908    src: __m256i,
9909    k: __mmask16,
9910    a: __m256i,
9911) -> __m256i {
9912    unsafe {
9913        static_assert_uimm_bits!(IMM8, 8);
9914        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9915        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
9916    }
9917}
9918
9919/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9920///
9921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflelo_epi16&expand=5217)
9922#[inline]
9923#[target_feature(enable = "avx512bw,avx512vl")]
9924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9925#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9926#[rustc_legacy_const_generics(2)]
9927#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9928pub const fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
9929    unsafe {
9930        static_assert_uimm_bits!(IMM8, 8);
9931        let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
9932        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
9933    }
9934}
9935
9936/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9937///
9938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflelo_epi16&expand=5213)
9939#[inline]
9940#[target_feature(enable = "avx512bw,avx512vl")]
9941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9942#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9943#[rustc_legacy_const_generics(3)]
9944#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9945pub const fn _mm_mask_shufflelo_epi16<const IMM8: i32>(
9946    src: __m128i,
9947    k: __mmask8,
9948    a: __m128i,
9949) -> __m128i {
9950    unsafe {
9951        static_assert_uimm_bits!(IMM8, 8);
9952        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9953        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
9954    }
9955}
9956
9957/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
9958///
9959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflelo_epi16&expand=5214)
9960#[inline]
9961#[target_feature(enable = "avx512bw,avx512vl")]
9962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9963#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
9964#[rustc_legacy_const_generics(2)]
9965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9966pub const fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
9967    unsafe {
9968        static_assert_uimm_bits!(IMM8, 8);
9969        let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
9970        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
9971    }
9972}
9973
9974/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst.
9975///
9976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflehi_epi16&expand=5212)
9977#[inline]
9978#[target_feature(enable = "avx512bw")]
9979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9980#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
9981#[rustc_legacy_const_generics(1)]
9982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
9983pub const fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
9984    unsafe {
9985        static_assert_uimm_bits!(IMM8, 8);
9986        let a = a.as_i16x32();
9987        let r: i16x32 = simd_shuffle!(
9988            a,
9989            a,
9990            [
9991                0,
9992                1,
9993                2,
9994                3,
9995                (IMM8 as u32 & 0b11) + 4,
9996                ((IMM8 as u32 >> 2) & 0b11) + 4,
9997                ((IMM8 as u32 >> 4) & 0b11) + 4,
9998                ((IMM8 as u32 >> 6) & 0b11) + 4,
9999                8,
10000                9,
10001                10,
10002                11,
10003                (IMM8 as u32 & 0b11) + 12,
10004                ((IMM8 as u32 >> 2) & 0b11) + 12,
10005                ((IMM8 as u32 >> 4) & 0b11) + 12,
10006                ((IMM8 as u32 >> 6) & 0b11) + 12,
10007                16,
10008                17,
10009                18,
10010                19,
10011                (IMM8 as u32 & 0b11) + 20,
10012                ((IMM8 as u32 >> 2) & 0b11) + 20,
10013                ((IMM8 as u32 >> 4) & 0b11) + 20,
10014                ((IMM8 as u32 >> 6) & 0b11) + 20,
10015                24,
10016                25,
10017                26,
10018                27,
10019                (IMM8 as u32 & 0b11) + 28,
10020                ((IMM8 as u32 >> 2) & 0b11) + 28,
10021                ((IMM8 as u32 >> 4) & 0b11) + 28,
10022                ((IMM8 as u32 >> 6) & 0b11) + 28,
10023            ],
10024        );
10025        transmute(r)
10026    }
10027}
10028
10029/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10030///
10031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflehi_epi16&expand=5210)
10032#[inline]
10033#[target_feature(enable = "avx512bw")]
10034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10035#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
10036#[rustc_legacy_const_generics(3)]
10037#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10038pub const fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
10039    src: __m512i,
10040    k: __mmask32,
10041    a: __m512i,
10042) -> __m512i {
10043    unsafe {
10044        static_assert_uimm_bits!(IMM8, 8);
10045        let r = _mm512_shufflehi_epi16::<IMM8>(a);
10046        transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
10047    }
10048}
10049
10050/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10051///
10052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflehi_epi16&expand=5211)
10053#[inline]
10054#[target_feature(enable = "avx512bw")]
10055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10056#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
10057#[rustc_legacy_const_generics(2)]
10058#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10059pub const fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
10060    unsafe {
10061        static_assert_uimm_bits!(IMM8, 8);
10062        let r = _mm512_shufflehi_epi16::<IMM8>(a);
10063        transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
10064    }
10065}
10066
10067/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10068///
10069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflehi_epi16&expand=5207)
10070#[inline]
10071#[target_feature(enable = "avx512bw,avx512vl")]
10072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10073#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10074#[rustc_legacy_const_generics(3)]
10075#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10076pub const fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
10077    src: __m256i,
10078    k: __mmask16,
10079    a: __m256i,
10080) -> __m256i {
10081    unsafe {
10082        static_assert_uimm_bits!(IMM8, 8);
10083        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
10084        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
10085    }
10086}
10087
10088/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10089///
10090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflehi_epi16&expand=5208)
10091#[inline]
10092#[target_feature(enable = "avx512bw,avx512vl")]
10093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10094#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10095#[rustc_legacy_const_generics(2)]
10096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10097pub const fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
10098    unsafe {
10099        static_assert_uimm_bits!(IMM8, 8);
10100        let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
10101        transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
10102    }
10103}
10104
10105/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
10106///
10107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflehi_epi16&expand=5204)
10108#[inline]
10109#[target_feature(enable = "avx512bw,avx512vl")]
10110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10111#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10112#[rustc_legacy_const_generics(3)]
10113#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10114pub const fn _mm_mask_shufflehi_epi16<const IMM8: i32>(
10115    src: __m128i,
10116    k: __mmask8,
10117    a: __m128i,
10118) -> __m128i {
10119    unsafe {
10120        static_assert_uimm_bits!(IMM8, 8);
10121        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
10122        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
10123    }
10124}
10125
10126/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10127///
10128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflehi_epi16&expand=5205)
10129#[inline]
10130#[target_feature(enable = "avx512bw,avx512vl")]
10131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10132#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
10133#[rustc_legacy_const_generics(2)]
10134#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10135pub const fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
10136    unsafe {
10137        static_assert_uimm_bits!(IMM8, 8);
10138        let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
10139        transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
10140    }
10141}
10142
10143/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst.
10144///
10145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi8&expand=5159)
10146#[inline]
10147#[target_feature(enable = "avx512bw")]
10148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10149#[cfg_attr(test, assert_instr(vpshufb))]
10150pub fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i {
10151    unsafe { transmute(vpshufb(a.as_i8x64(), b.as_i8x64())) }
10152}
10153
10154/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10155///
10156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi8&expand=5157)
10157#[inline]
10158#[target_feature(enable = "avx512bw")]
10159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10160#[cfg_attr(test, assert_instr(vpshufb))]
10161pub fn _mm512_mask_shuffle_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
10162    unsafe {
10163        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
10164        transmute(simd_select_bitmask(k, shuffle, src.as_i8x64()))
10165    }
10166}
10167
10168/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10169///
10170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi8&expand=5158)
10171#[inline]
10172#[target_feature(enable = "avx512bw")]
10173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10174#[cfg_attr(test, assert_instr(vpshufb))]
10175pub fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
10176    unsafe {
10177        let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
10178        transmute(simd_select_bitmask(k, shuffle, i8x64::ZERO))
10179    }
10180}
10181
10182/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10183///
10184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi8&expand=5154)
10185#[inline]
10186#[target_feature(enable = "avx512bw,avx512vl")]
10187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10188#[cfg_attr(test, assert_instr(vpshufb))]
10189pub fn _mm256_mask_shuffle_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
10190    unsafe {
10191        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
10192        transmute(simd_select_bitmask(k, shuffle, src.as_i8x32()))
10193    }
10194}
10195
10196/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10197///
10198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi8&expand=5155)
10199#[inline]
10200#[target_feature(enable = "avx512bw,avx512vl")]
10201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10202#[cfg_attr(test, assert_instr(vpshufb))]
10203pub fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
10204    unsafe {
10205        let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
10206        transmute(simd_select_bitmask(k, shuffle, i8x32::ZERO))
10207    }
10208}
10209
10210/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10211///
10212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi8&expand=5151)
10213#[inline]
10214#[target_feature(enable = "avx512bw,avx512vl")]
10215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10216#[cfg_attr(test, assert_instr(vpshufb))]
10217pub fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
10218    unsafe {
10219        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
10220        transmute(simd_select_bitmask(k, shuffle, src.as_i8x16()))
10221    }
10222}
10223
10224/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10225///
10226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi8&expand=5152)
10227#[inline]
10228#[target_feature(enable = "avx512bw,avx512vl")]
10229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10230#[cfg_attr(test, assert_instr(vpshufb))]
10231pub fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
10232    unsafe {
10233        let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
10234        transmute(simd_select_bitmask(k, shuffle, i8x16::ZERO))
10235    }
10236}
10237
10238/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10239///
10240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi16_mask&expand=5884)
10241#[inline]
10242#[target_feature(enable = "avx512bw")]
10243#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10244#[cfg_attr(test, assert_instr(vptestmw))]
10245#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10246pub const fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
10247    let and = _mm512_and_si512(a, b);
10248    let zero = _mm512_setzero_si512();
10249    _mm512_cmpneq_epi16_mask(and, zero)
10250}
10251
10252/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10253///
10254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi16_mask&expand=5883)
10255#[inline]
10256#[target_feature(enable = "avx512bw")]
10257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10258#[cfg_attr(test, assert_instr(vptestmw))]
10259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10260pub const fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
10261    let and = _mm512_and_si512(a, b);
10262    let zero = _mm512_setzero_si512();
10263    _mm512_mask_cmpneq_epi16_mask(k, and, zero)
10264}
10265
10266/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10267///
10268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi16_mask&expand=5882)
10269#[inline]
10270#[target_feature(enable = "avx512bw,avx512vl")]
10271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10272#[cfg_attr(test, assert_instr(vptestmw))]
10273#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10274pub const fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
10275    let and = _mm256_and_si256(a, b);
10276    let zero = _mm256_setzero_si256();
10277    _mm256_cmpneq_epi16_mask(and, zero)
10278}
10279
10280/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10281///
10282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi16_mask&expand=5881)
10283#[inline]
10284#[target_feature(enable = "avx512bw,avx512vl")]
10285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10286#[cfg_attr(test, assert_instr(vptestmw))]
10287#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10288pub const fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
10289    let and = _mm256_and_si256(a, b);
10290    let zero = _mm256_setzero_si256();
10291    _mm256_mask_cmpneq_epi16_mask(k, and, zero)
10292}
10293
10294/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10295///
10296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi16_mask&expand=5880)
10297#[inline]
10298#[target_feature(enable = "avx512bw,avx512vl")]
10299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10300#[cfg_attr(test, assert_instr(vptestmw))]
10301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10302pub const fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
10303    let and = _mm_and_si128(a, b);
10304    let zero = _mm_setzero_si128();
10305    _mm_cmpneq_epi16_mask(and, zero)
10306}
10307
10308/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10309///
10310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi16_mask&expand=5879)
10311#[inline]
10312#[target_feature(enable = "avx512bw,avx512vl")]
10313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10314#[cfg_attr(test, assert_instr(vptestmw))]
10315#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10316pub const fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
10317    let and = _mm_and_si128(a, b);
10318    let zero = _mm_setzero_si128();
10319    _mm_mask_cmpneq_epi16_mask(k, and, zero)
10320}
10321
10322/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10323///
10324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi8_mask&expand=5902)
10325#[inline]
10326#[target_feature(enable = "avx512bw")]
10327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10328#[cfg_attr(test, assert_instr(vptestmb))]
10329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10330pub const fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
10331    let and = _mm512_and_si512(a, b);
10332    let zero = _mm512_setzero_si512();
10333    _mm512_cmpneq_epi8_mask(and, zero)
10334}
10335
10336/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10337///
10338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi8_mask&expand=5901)
10339#[inline]
10340#[target_feature(enable = "avx512bw")]
10341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10342#[cfg_attr(test, assert_instr(vptestmb))]
10343#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10344pub const fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
10345    let and = _mm512_and_si512(a, b);
10346    let zero = _mm512_setzero_si512();
10347    _mm512_mask_cmpneq_epi8_mask(k, and, zero)
10348}
10349
10350/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10351///
10352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi8_mask&expand=5900)
10353#[inline]
10354#[target_feature(enable = "avx512bw,avx512vl")]
10355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10356#[cfg_attr(test, assert_instr(vptestmb))]
10357#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10358pub const fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
10359    let and = _mm256_and_si256(a, b);
10360    let zero = _mm256_setzero_si256();
10361    _mm256_cmpneq_epi8_mask(and, zero)
10362}
10363
10364/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10365///
10366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi8_mask&expand=5899)
10367#[inline]
10368#[target_feature(enable = "avx512bw,avx512vl")]
10369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10370#[cfg_attr(test, assert_instr(vptestmb))]
10371#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10372pub const fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
10373    let and = _mm256_and_si256(a, b);
10374    let zero = _mm256_setzero_si256();
10375    _mm256_mask_cmpneq_epi8_mask(k, and, zero)
10376}
10377
10378/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
10379///
10380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi8_mask&expand=5898)
10381#[inline]
10382#[target_feature(enable = "avx512bw,avx512vl")]
10383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10384#[cfg_attr(test, assert_instr(vptestmb))]
10385#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10386pub const fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
10387    let and = _mm_and_si128(a, b);
10388    let zero = _mm_setzero_si128();
10389    _mm_cmpneq_epi8_mask(and, zero)
10390}
10391
10392/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
10393///
10394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi8_mask&expand=5897)
10395#[inline]
10396#[target_feature(enable = "avx512bw,avx512vl")]
10397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10398#[cfg_attr(test, assert_instr(vptestmb))]
10399#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10400pub const fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
10401    let and = _mm_and_si128(a, b);
10402    let zero = _mm_setzero_si128();
10403    _mm_mask_cmpneq_epi8_mask(k, and, zero)
10404}
10405
10406/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10407///
10408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi16_mask&expand=5915)
10409#[inline]
10410#[target_feature(enable = "avx512bw")]
10411#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10412#[cfg_attr(test, assert_instr(vptestnmw))]
10413#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10414pub const fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
10415    let and = _mm512_and_si512(a, b);
10416    let zero = _mm512_setzero_si512();
10417    _mm512_cmpeq_epi16_mask(and, zero)
10418}
10419
10420/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10421///
10422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi16_mask&expand=5914)
10423#[inline]
10424#[target_feature(enable = "avx512bw")]
10425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10426#[cfg_attr(test, assert_instr(vptestnmw))]
10427#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10428pub const fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
10429    let and = _mm512_and_si512(a, b);
10430    let zero = _mm512_setzero_si512();
10431    _mm512_mask_cmpeq_epi16_mask(k, and, zero)
10432}
10433
10434/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10435///
10436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi16_mask&expand=5913)
10437#[inline]
10438#[target_feature(enable = "avx512bw,avx512vl")]
10439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10440#[cfg_attr(test, assert_instr(vptestnmw))]
10441#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10442pub const fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
10443    let and = _mm256_and_si256(a, b);
10444    let zero = _mm256_setzero_si256();
10445    _mm256_cmpeq_epi16_mask(and, zero)
10446}
10447
10448/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10449///
10450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi16_mask&expand=5912)
10451#[inline]
10452#[target_feature(enable = "avx512bw,avx512vl")]
10453#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10454#[cfg_attr(test, assert_instr(vptestnmw))]
10455#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10456pub const fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
10457    let and = _mm256_and_si256(a, b);
10458    let zero = _mm256_setzero_si256();
10459    _mm256_mask_cmpeq_epi16_mask(k, and, zero)
10460}
10461
10462/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10463///
10464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi16_mask&expand=5911)
10465#[inline]
10466#[target_feature(enable = "avx512bw,avx512vl")]
10467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10468#[cfg_attr(test, assert_instr(vptestnmw))]
10469#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10470pub const fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
10471    let and = _mm_and_si128(a, b);
10472    let zero = _mm_setzero_si128();
10473    _mm_cmpeq_epi16_mask(and, zero)
10474}
10475
10476/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10477///
10478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi16_mask&expand=5910)
10479#[inline]
10480#[target_feature(enable = "avx512bw,avx512vl")]
10481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10482#[cfg_attr(test, assert_instr(vptestnmw))]
10483#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10484pub const fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
10485    let and = _mm_and_si128(a, b);
10486    let zero = _mm_setzero_si128();
10487    _mm_mask_cmpeq_epi16_mask(k, and, zero)
10488}
10489
10490/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10491///
10492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi8_mask&expand=5933)
10493#[inline]
10494#[target_feature(enable = "avx512bw")]
10495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10496#[cfg_attr(test, assert_instr(vptestnmb))]
10497#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10498pub const fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
10499    let and = _mm512_and_si512(a, b);
10500    let zero = _mm512_setzero_si512();
10501    _mm512_cmpeq_epi8_mask(and, zero)
10502}
10503
10504/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10505///
10506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi8_mask&expand=5932)
10507#[inline]
10508#[target_feature(enable = "avx512bw")]
10509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10510#[cfg_attr(test, assert_instr(vptestnmb))]
10511#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10512pub const fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
10513    let and = _mm512_and_si512(a, b);
10514    let zero = _mm512_setzero_si512();
10515    _mm512_mask_cmpeq_epi8_mask(k, and, zero)
10516}
10517
10518/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10519///
10520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi8_mask&expand=5931)
10521#[inline]
10522#[target_feature(enable = "avx512bw,avx512vl")]
10523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10524#[cfg_attr(test, assert_instr(vptestnmb))]
10525#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10526pub const fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
10527    let and = _mm256_and_si256(a, b);
10528    let zero = _mm256_setzero_si256();
10529    _mm256_cmpeq_epi8_mask(and, zero)
10530}
10531
10532/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10533///
10534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi8_mask&expand=5930)
10535#[inline]
10536#[target_feature(enable = "avx512bw,avx512vl")]
10537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10538#[cfg_attr(test, assert_instr(vptestnmb))]
10539#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10540pub const fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
10541    let and = _mm256_and_si256(a, b);
10542    let zero = _mm256_setzero_si256();
10543    _mm256_mask_cmpeq_epi8_mask(k, and, zero)
10544}
10545
10546/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
10547///
10548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi8_mask&expand=5929)
10549#[inline]
10550#[target_feature(enable = "avx512bw,avx512vl")]
10551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10552#[cfg_attr(test, assert_instr(vptestnmb))]
10553#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10554pub const fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
10555    let and = _mm_and_si128(a, b);
10556    let zero = _mm_setzero_si128();
10557    _mm_cmpeq_epi8_mask(and, zero)
10558}
10559
10560/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
10561///
10562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi8_mask&expand=5928)
10563#[inline]
10564#[target_feature(enable = "avx512bw,avx512vl")]
10565#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10566#[cfg_attr(test, assert_instr(vptestnmb))]
10567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10568pub const fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
10569    let and = _mm_and_si128(a, b);
10570    let zero = _mm_setzero_si128();
10571    _mm_mask_cmpeq_epi8_mask(k, and, zero)
10572}
10573
10574/// Store 64-bit mask from a into memory.
10575///
10576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask64&expand=5578)
10577#[inline]
10578#[target_feature(enable = "avx512bw")]
10579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10580#[cfg_attr(test, assert_instr(mov))] //should be kmovq
10581#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10582pub const unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
10583    ptr::write(mem_addr as *mut __mmask64, a);
10584}
10585
10586/// Store 32-bit mask from a into memory.
10587///
10588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask32&expand=5577)
10589#[inline]
10590#[target_feature(enable = "avx512bw")]
10591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10592#[cfg_attr(test, assert_instr(mov))] //should be kmovd
10593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10594pub const unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
10595    ptr::write(mem_addr as *mut __mmask32, a);
10596}
10597
10598/// Load 64-bit mask from memory into k.
10599///
10600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask64&expand=3318)
10601#[inline]
10602#[target_feature(enable = "avx512bw")]
10603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10604#[cfg_attr(test, assert_instr(mov))] //should be kmovq
10605#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10606pub const unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
10607    ptr::read(mem_addr as *const __mmask64)
10608}
10609
10610/// Load 32-bit mask from memory into k.
10611///
10612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask32&expand=3317)
10613#[inline]
10614#[target_feature(enable = "avx512bw")]
10615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10616#[cfg_attr(test, assert_instr(mov))] //should be kmovd
10617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10618pub const unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
10619    ptr::read(mem_addr as *const __mmask32)
10620}
10621
10622/// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst.
10623///
10624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sad_epu8&expand=4855)
10625#[inline]
10626#[target_feature(enable = "avx512bw")]
10627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10628#[cfg_attr(test, assert_instr(vpsadbw))]
10629pub fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i {
10630    unsafe { transmute(vpsadbw(a.as_u8x64(), b.as_u8x64())) }
10631}
10632
10633/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10634///
10635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dbsad_epu8&expand=2114)
10636#[inline]
10637#[target_feature(enable = "avx512bw")]
10638#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10639#[rustc_legacy_const_generics(2)]
10640#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10641pub fn _mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
10642    unsafe {
10643        static_assert_uimm_bits!(IMM8, 8);
10644        let a = a.as_u8x64();
10645        let b = b.as_u8x64();
10646        let r = vdbpsadbw(a, b, IMM8);
10647        transmute(r)
10648    }
10649}
10650
10651/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10652///
10653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dbsad_epu8&expand=2115)
10654#[inline]
10655#[target_feature(enable = "avx512bw")]
10656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10657#[rustc_legacy_const_generics(4)]
10658#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10659pub fn _mm512_mask_dbsad_epu8<const IMM8: i32>(
10660    src: __m512i,
10661    k: __mmask32,
10662    a: __m512i,
10663    b: __m512i,
10664) -> __m512i {
10665    unsafe {
10666        static_assert_uimm_bits!(IMM8, 8);
10667        let a = a.as_u8x64();
10668        let b = b.as_u8x64();
10669        let r = vdbpsadbw(a, b, IMM8);
10670        transmute(simd_select_bitmask(k, r, src.as_u16x32()))
10671    }
10672}
10673
10674/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10675///
10676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dbsad_epu8&expand=2116)
10677#[inline]
10678#[target_feature(enable = "avx512bw")]
10679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10680#[rustc_legacy_const_generics(3)]
10681#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10682pub fn _mm512_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
10683    unsafe {
10684        static_assert_uimm_bits!(IMM8, 8);
10685        let a = a.as_u8x64();
10686        let b = b.as_u8x64();
10687        let r = vdbpsadbw(a, b, IMM8);
10688        transmute(simd_select_bitmask(k, r, u16x32::ZERO))
10689    }
10690}
10691
10692/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10693///
10694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dbsad_epu8&expand=2111)
10695#[inline]
10696#[target_feature(enable = "avx512bw,avx512vl")]
10697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10698#[rustc_legacy_const_generics(2)]
10699#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10700pub fn _mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
10701    unsafe {
10702        static_assert_uimm_bits!(IMM8, 8);
10703        let a = a.as_u8x32();
10704        let b = b.as_u8x32();
10705        let r = vdbpsadbw256(a, b, IMM8);
10706        transmute(r)
10707    }
10708}
10709
10710/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10711///
10712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dbsad_epu8&expand=2112)
10713#[inline]
10714#[target_feature(enable = "avx512bw,avx512vl")]
10715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10716#[rustc_legacy_const_generics(4)]
10717#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10718pub fn _mm256_mask_dbsad_epu8<const IMM8: i32>(
10719    src: __m256i,
10720    k: __mmask16,
10721    a: __m256i,
10722    b: __m256i,
10723) -> __m256i {
10724    unsafe {
10725        static_assert_uimm_bits!(IMM8, 8);
10726        let a = a.as_u8x32();
10727        let b = b.as_u8x32();
10728        let r = vdbpsadbw256(a, b, IMM8);
10729        transmute(simd_select_bitmask(k, r, src.as_u16x16()))
10730    }
10731}
10732
10733/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10734///
10735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dbsad_epu8&expand=2113)
10736#[inline]
10737#[target_feature(enable = "avx512bw,avx512vl")]
10738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10739#[rustc_legacy_const_generics(3)]
10740#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10741pub fn _mm256_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
10742    unsafe {
10743        static_assert_uimm_bits!(IMM8, 8);
10744        let a = a.as_u8x32();
10745        let b = b.as_u8x32();
10746        let r = vdbpsadbw256(a, b, IMM8);
10747        transmute(simd_select_bitmask(k, r, u16x16::ZERO))
10748    }
10749}
10750
10751/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10752///
10753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dbsad_epu8&expand=2108)
10754#[inline]
10755#[target_feature(enable = "avx512bw,avx512vl")]
10756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10757#[rustc_legacy_const_generics(2)]
10758#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10759pub fn _mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
10760    unsafe {
10761        static_assert_uimm_bits!(IMM8, 8);
10762        let a = a.as_u8x16();
10763        let b = b.as_u8x16();
10764        let r = vdbpsadbw128(a, b, IMM8);
10765        transmute(r)
10766    }
10767}
10768
10769/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10770///
10771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dbsad_epu8&expand=2109)
10772#[inline]
10773#[target_feature(enable = "avx512bw,avx512vl")]
10774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10775#[rustc_legacy_const_generics(4)]
10776#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10777pub fn _mm_mask_dbsad_epu8<const IMM8: i32>(
10778    src: __m128i,
10779    k: __mmask8,
10780    a: __m128i,
10781    b: __m128i,
10782) -> __m128i {
10783    unsafe {
10784        static_assert_uimm_bits!(IMM8, 8);
10785        let a = a.as_u8x16();
10786        let b = b.as_u8x16();
10787        let r = vdbpsadbw128(a, b, IMM8);
10788        transmute(simd_select_bitmask(k, r, src.as_u16x8()))
10789    }
10790}
10791
10792/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
10793///
10794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dbsad_epu8&expand=2110)
10795#[inline]
10796#[target_feature(enable = "avx512bw,avx512vl")]
10797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10798#[rustc_legacy_const_generics(3)]
10799#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
10800pub fn _mm_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
10801    unsafe {
10802        static_assert_uimm_bits!(IMM8, 8);
10803        let a = a.as_u8x16();
10804        let b = b.as_u8x16();
10805        let r = vdbpsadbw128(a, b, IMM8);
10806        transmute(simd_select_bitmask(k, r, u16x8::ZERO))
10807    }
10808}
10809
10810/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10811///
10812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi16_mask&expand=3873)
10813#[inline]
10814#[target_feature(enable = "avx512bw")]
10815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10816#[cfg_attr(test, assert_instr(vpmovw2m))]
10817#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10818pub const fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
10819    let filter = _mm512_set1_epi16(1 << 15);
10820    let a = _mm512_and_si512(a, filter);
10821    _mm512_cmpeq_epi16_mask(a, filter)
10822}
10823
10824/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10825///
10826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi16_mask&expand=3872)
10827#[inline]
10828#[target_feature(enable = "avx512bw,avx512vl")]
10829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10830#[cfg_attr(test, assert_instr(vpmovw2m))]
10831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10832pub const fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
10833    let filter = _mm256_set1_epi16(1 << 15);
10834    let a = _mm256_and_si256(a, filter);
10835    _mm256_cmpeq_epi16_mask(a, filter)
10836}
10837
10838/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
10839///
10840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi16_mask&expand=3871)
10841#[inline]
10842#[target_feature(enable = "avx512bw,avx512vl")]
10843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10844#[cfg_attr(test, assert_instr(vpmovw2m))]
10845#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10846pub const fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
10847    let filter = _mm_set1_epi16(1 << 15);
10848    let a = _mm_and_si128(a, filter);
10849    _mm_cmpeq_epi16_mask(a, filter)
10850}
10851
10852/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10853///
10854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi8_mask&expand=3883)
10855#[inline]
10856#[target_feature(enable = "avx512bw")]
10857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10858#[cfg_attr(test, assert_instr(vpmovb2m))]
10859#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10860pub const fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
10861    let filter = _mm512_set1_epi8(1 << 7);
10862    let a = _mm512_and_si512(a, filter);
10863    _mm512_cmpeq_epi8_mask(a, filter)
10864}
10865
10866/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10867///
10868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi8_mask&expand=3882)
10869#[inline]
10870#[target_feature(enable = "avx512bw,avx512vl")]
10871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10872#[cfg_attr(test, assert_instr(vpmovmskb))]
10873// should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10874// using vpmovb2m plus converting the mask register to a standard register.
10875#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10876pub const fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
10877    let filter = _mm256_set1_epi8(1 << 7);
10878    let a = _mm256_and_si256(a, filter);
10879    _mm256_cmpeq_epi8_mask(a, filter)
10880}
10881
10882/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
10883///
10884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi8_mask&expand=3881)
10885#[inline]
10886#[target_feature(enable = "avx512bw,avx512vl")]
10887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10888#[cfg_attr(test, assert_instr(vpmovmskb))]
10889// should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
10890// using vpmovb2m plus converting the mask register to a standard register.
10891#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10892pub const fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
10893    let filter = _mm_set1_epi8(1 << 7);
10894    let a = _mm_and_si128(a, filter);
10895    _mm_cmpeq_epi8_mask(a, filter)
10896}
10897
10898/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10899///
10900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi16&expand=3886)
10901#[inline]
10902#[target_feature(enable = "avx512bw")]
10903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10904#[cfg_attr(test, assert_instr(vpmovm2w))]
10905#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10906pub const fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
10907    unsafe {
10908        let one = _mm512_set1_epi16(
10909            1 << 15
10910                | 1 << 14
10911                | 1 << 13
10912                | 1 << 12
10913                | 1 << 11
10914                | 1 << 10
10915                | 1 << 9
10916                | 1 << 8
10917                | 1 << 7
10918                | 1 << 6
10919                | 1 << 5
10920                | 1 << 4
10921                | 1 << 3
10922                | 1 << 2
10923                | 1 << 1
10924                | 1 << 0,
10925        )
10926        .as_i16x32();
10927        transmute(simd_select_bitmask(k, one, i16x32::ZERO))
10928    }
10929}
10930
10931/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10932///
10933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi16&expand=3885)
10934#[inline]
10935#[target_feature(enable = "avx512bw,avx512vl")]
10936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10937#[cfg_attr(test, assert_instr(vpmovm2w))]
10938#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10939pub const fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
10940    unsafe {
10941        let one = _mm256_set1_epi16(
10942            1 << 15
10943                | 1 << 14
10944                | 1 << 13
10945                | 1 << 12
10946                | 1 << 11
10947                | 1 << 10
10948                | 1 << 9
10949                | 1 << 8
10950                | 1 << 7
10951                | 1 << 6
10952                | 1 << 5
10953                | 1 << 4
10954                | 1 << 3
10955                | 1 << 2
10956                | 1 << 1
10957                | 1 << 0,
10958        )
10959        .as_i16x16();
10960        transmute(simd_select_bitmask(k, one, i16x16::ZERO))
10961    }
10962}
10963
10964/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10965///
10966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi16&expand=3884)
10967#[inline]
10968#[target_feature(enable = "avx512bw,avx512vl")]
10969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10970#[cfg_attr(test, assert_instr(vpmovm2w))]
10971#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
10972pub const fn _mm_movm_epi16(k: __mmask8) -> __m128i {
10973    unsafe {
10974        let one = _mm_set1_epi16(
10975            1 << 15
10976                | 1 << 14
10977                | 1 << 13
10978                | 1 << 12
10979                | 1 << 11
10980                | 1 << 10
10981                | 1 << 9
10982                | 1 << 8
10983                | 1 << 7
10984                | 1 << 6
10985                | 1 << 5
10986                | 1 << 4
10987                | 1 << 3
10988                | 1 << 2
10989                | 1 << 1
10990                | 1 << 0,
10991        )
10992        .as_i16x8();
10993        transmute(simd_select_bitmask(k, one, i16x8::ZERO))
10994    }
10995}
10996
10997/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
10998///
10999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi8&expand=3895)
11000#[inline]
11001#[target_feature(enable = "avx512bw")]
11002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11003#[cfg_attr(test, assert_instr(vpmovm2b))]
11004#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11005pub const fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
11006    unsafe {
11007        let one =
11008            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
11009                .as_i8x64();
11010        transmute(simd_select_bitmask(k, one, i8x64::ZERO))
11011    }
11012}
11013
11014/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
11015///
11016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi8&expand=3894)
11017#[inline]
11018#[target_feature(enable = "avx512bw,avx512vl")]
11019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11020#[cfg_attr(test, assert_instr(vpmovm2b))]
11021#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11022pub const fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
11023    unsafe {
11024        let one =
11025            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
11026                .as_i8x32();
11027        transmute(simd_select_bitmask(k, one, i8x32::ZERO))
11028    }
11029}
11030
11031/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
11032///
11033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi8&expand=3893)
11034#[inline]
11035#[target_feature(enable = "avx512bw,avx512vl")]
11036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11037#[cfg_attr(test, assert_instr(vpmovm2b))]
11038#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11039pub const fn _mm_movm_epi8(k: __mmask16) -> __m128i {
11040    unsafe {
11041        let one =
11042            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
11043                .as_i8x16();
11044        transmute(simd_select_bitmask(k, one, i8x16::ZERO))
11045    }
11046}
11047
11048/// Convert 32-bit mask a into an integer value, and store the result in dst.
11049///
11050/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_cvtmask32_u32)
11051#[inline]
11052#[target_feature(enable = "avx512bw")]
11053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11054#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11055pub const fn _cvtmask32_u32(a: __mmask32) -> u32 {
11056    a
11057}
11058
11059/// Convert integer value a into an 32-bit mask, and store the result in k.
11060///
11061/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask32)
11062#[inline]
11063#[target_feature(enable = "avx512bw")]
11064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11065#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11066pub const fn _cvtu32_mask32(a: u32) -> __mmask32 {
11067    a
11068}
11069
11070/// Add 32-bit masks in a and b, and store the result in k.
11071///
11072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask32&expand=3207)
11073#[inline]
11074#[target_feature(enable = "avx512bw")]
11075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11076#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11077pub const fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11078    a.wrapping_add(b)
11079}
11080
11081/// Add 64-bit masks in a and b, and store the result in k.
11082///
11083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask64&expand=3208)
11084#[inline]
11085#[target_feature(enable = "avx512bw")]
11086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11087#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11088pub const fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11089    a.wrapping_add(b)
11090}
11091
11092/// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
11093///
11094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask32&expand=3213)
11095#[inline]
11096#[target_feature(enable = "avx512bw")]
11097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11098#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11099pub const fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11100    a & b
11101}
11102
11103/// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
11104///
11105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask64&expand=3214)
11106#[inline]
11107#[target_feature(enable = "avx512bw")]
11108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11109#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11110pub const fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11111    a & b
11112}
11113
11114/// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
11115///
11116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask32&expand=3234)
11117#[inline]
11118#[target_feature(enable = "avx512bw")]
11119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11120#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11121pub const fn _knot_mask32(a: __mmask32) -> __mmask32 {
11122    !a
11123}
11124
11125/// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
11126///
11127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask64&expand=3235)
11128#[inline]
11129#[target_feature(enable = "avx512bw")]
11130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11131#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11132pub const fn _knot_mask64(a: __mmask64) -> __mmask64 {
11133    !a
11134}
11135
11136/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
11137///
11138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask32&expand=3219)
11139#[inline]
11140#[target_feature(enable = "avx512bw")]
11141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11142#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11143pub const fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11144    _knot_mask32(a) & b
11145}
11146
11147/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
11148///
11149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask64&expand=3220)
11150#[inline]
11151#[target_feature(enable = "avx512bw")]
11152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11153#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11154pub const fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11155    _knot_mask64(a) & b
11156}
11157
11158/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
11159///
11160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask32&expand=3240)
11161#[inline]
11162#[target_feature(enable = "avx512bw")]
11163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11164#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11165pub const fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11166    a | b
11167}
11168
11169/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
11170///
11171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask64&expand=3241)
11172#[inline]
11173#[target_feature(enable = "avx512bw")]
11174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11175#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11176pub const fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11177    a | b
11178}
11179
11180/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
11181///
11182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask32&expand=3292)
11183#[inline]
11184#[target_feature(enable = "avx512bw")]
11185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11187pub const fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11188    a ^ b
11189}
11190
11191/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
11192///
11193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask64&expand=3293)
11194#[inline]
11195#[target_feature(enable = "avx512bw")]
11196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11197#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11198pub const fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11199    a ^ b
11200}
11201
11202/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
11203///
11204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask32&expand=3286)
11205#[inline]
11206#[target_feature(enable = "avx512bw")]
11207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11208#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11209pub const fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
11210    _knot_mask32(a ^ b)
11211}
11212
11213/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
11214///
11215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask64&expand=3287)
11216#[inline]
11217#[target_feature(enable = "avx512bw")]
11218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11219#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11220pub const fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
11221    _knot_mask64(a ^ b)
11222}
11223
11224/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11225/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
11226///
11227/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask32_u8)
11228#[inline]
11229#[target_feature(enable = "avx512bw")]
11230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11231#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11232pub const unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8) -> u8 {
11233    let tmp = _kor_mask32(a, b);
11234    *all_ones = (tmp == 0xffffffff) as u8;
11235    (tmp == 0) as u8
11236}
11237
11238/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11239/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
11240///
11241/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask64_u8)
11242#[inline]
11243#[target_feature(enable = "avx512bw")]
11244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11245#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11246pub const unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) -> u8 {
11247    let tmp = _kor_mask64(a, b);
11248    *all_ones = (tmp == 0xffffffff_ffffffff) as u8;
11249    (tmp == 0) as u8
11250}
11251
11252/// Compute the bitwise OR of 32-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
11253/// store 0 in dst.
11254///
11255/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask32_u8)
11256#[inline]
11257#[target_feature(enable = "avx512bw")]
11258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11260pub const fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11261    (_kor_mask32(a, b) == 0xffffffff) as u8
11262}
11263
11264/// Compute the bitwise OR of 64-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
11265/// store 0 in dst.
11266///
11267/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask64_u8)
11268#[inline]
11269#[target_feature(enable = "avx512bw")]
11270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11271#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11272pub const fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11273    (_kor_mask64(a, b) == 0xffffffff_ffffffff) as u8
11274}
11275
11276/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11277/// store 0 in dst.
11278///
11279/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask32_u8)
11280#[inline]
11281#[target_feature(enable = "avx512bw")]
11282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11284pub const fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11285    (_kor_mask32(a, b) == 0) as u8
11286}
11287
11288/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
11289/// store 0 in dst.
11290///
11291/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask64_u8)
11292#[inline]
11293#[target_feature(enable = "avx512bw")]
11294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11295#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11296pub const fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11297    (_kor_mask64(a, b) == 0) as u8
11298}
11299
11300/// Shift the bits of 32-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11301///
11302/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask32)
11303#[inline]
11304#[target_feature(enable = "avx512bw")]
11305#[rustc_legacy_const_generics(1)]
11306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11307#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11308pub const fn _kshiftli_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
11309    a.unbounded_shl(COUNT)
11310}
11311
11312/// Shift the bits of 64-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11313///
11314/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask64)
11315#[inline]
11316#[target_feature(enable = "avx512bw")]
11317#[rustc_legacy_const_generics(1)]
11318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11319#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11320pub const fn _kshiftli_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
11321    a.unbounded_shl(COUNT)
11322}
11323
11324/// Shift the bits of 32-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11325///
11326/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask32)
11327#[inline]
11328#[target_feature(enable = "avx512bw")]
11329#[rustc_legacy_const_generics(1)]
11330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11332pub const fn _kshiftri_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
11333    a.unbounded_shr(COUNT)
11334}
11335
11336/// Shift the bits of 64-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
11337///
11338/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask64)
11339#[inline]
11340#[target_feature(enable = "avx512bw")]
11341#[rustc_legacy_const_generics(1)]
11342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11343#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11344pub const fn _kshiftri_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
11345    a.unbounded_shr(COUNT)
11346}
11347
11348/// Compute the bitwise AND of 32-bit masks a and b, and if the result is all zeros, store 1 in dst,
11349/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
11350/// zeros, store 1 in and_not, otherwise store 0 in and_not.
11351///
11352/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask32_u8)
11353#[inline]
11354#[target_feature(enable = "avx512bw")]
11355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11357pub const unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) -> u8 {
11358    *and_not = (_kandn_mask32(a, b) == 0) as u8;
11359    (_kand_mask32(a, b) == 0) as u8
11360}
11361
11362/// Compute the bitwise AND of 64-bit masks a and b, and if the result is all zeros, store 1 in dst,
11363/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
11364/// zeros, store 1 in and_not, otherwise store 0 in and_not.
11365///
11366/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask64_u8)
11367#[inline]
11368#[target_feature(enable = "avx512bw")]
11369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11370#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11371pub const unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> u8 {
11372    *and_not = (_kandn_mask64(a, b) == 0) as u8;
11373    (_kand_mask64(a, b) == 0) as u8
11374}
11375
11376/// Compute the bitwise NOT of 32-bit mask a and then AND with 16-bit mask b, if the result is all
11377/// zeros, store 1 in dst, otherwise store 0 in dst.
11378///
11379/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask32_u8)
11380#[inline]
11381#[target_feature(enable = "avx512bw")]
11382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11383#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11384pub const fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11385    (_kandn_mask32(a, b) == 0) as u8
11386}
11387
11388/// Compute the bitwise NOT of 64-bit mask a and then AND with 8-bit mask b, if the result is all
11389/// zeros, store 1 in dst, otherwise store 0 in dst.
11390///
11391/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask64_u8)
11392#[inline]
11393#[target_feature(enable = "avx512bw")]
11394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11395#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11396pub const fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11397    (_kandn_mask64(a, b) == 0) as u8
11398}
11399
11400/// Compute the bitwise AND of 32-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
11401/// store 0 in dst.
11402///
11403/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask32_u8)
11404#[inline]
11405#[target_feature(enable = "avx512bw")]
11406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11407#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11408pub const fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
11409    (_kand_mask32(a, b) == 0) as u8
11410}
11411
11412/// Compute the bitwise AND of 64-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
11413/// store 0 in dst.
11414///
11415/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask64_u8)
11416#[inline]
11417#[target_feature(enable = "avx512bw")]
11418#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11419#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11420pub const fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
11421    (_kand_mask64(a, b) == 0) as u8
11422}
11423
11424/// Unpack and interleave 16 bits from masks a and b, and store the 32-bit result in k.
11425///
11426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackw)
11427#[inline]
11428#[target_feature(enable = "avx512bw")]
11429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11430#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckwd
11431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11432pub const fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 {
11433    ((a & 0xffff) << 16) | (b & 0xffff)
11434}
11435
11436/// Unpack and interleave 32 bits from masks a and b, and store the 64-bit result in k.
11437///
11438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackd)
11439#[inline]
11440#[target_feature(enable = "avx512bw")]
11441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11442#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckdq
11443#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11444pub const fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 {
11445    ((a & 0xffffffff) << 32) | (b & 0xffffffff)
11446}
11447
11448/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
11449///
11450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi8&expand=1407)
11451#[inline]
11452#[target_feature(enable = "avx512bw")]
11453#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11454#[cfg_attr(test, assert_instr(vpmovwb))]
11455#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11456pub const fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
11457    unsafe {
11458        let a = a.as_i16x32();
11459        transmute::<i8x32, _>(simd_cast(a))
11460    }
11461}
11462
11463/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11464///
11465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi8&expand=1408)
11466#[inline]
11467#[target_feature(enable = "avx512bw")]
11468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11469#[cfg_attr(test, assert_instr(vpmovwb))]
11470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11471pub const fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
11472    unsafe {
11473        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
11474        transmute(simd_select_bitmask(k, convert, src.as_i8x32()))
11475    }
11476}
11477
11478/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11479///
11480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi8&expand=1409)
11481#[inline]
11482#[target_feature(enable = "avx512bw")]
11483#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11484#[cfg_attr(test, assert_instr(vpmovwb))]
11485#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11486pub const fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
11487    unsafe {
11488        let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
11489        transmute(simd_select_bitmask(k, convert, i8x32::ZERO))
11490    }
11491}
11492
11493/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
11494///
11495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi8&expand=1404)
11496#[inline]
11497#[target_feature(enable = "avx512bw,avx512vl")]
11498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11499#[cfg_attr(test, assert_instr(vpmovwb))]
11500#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11501pub const fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
11502    unsafe {
11503        let a = a.as_i16x16();
11504        transmute::<i8x16, _>(simd_cast(a))
11505    }
11506}
11507
11508/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11509///
11510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi8&expand=1405)
11511#[inline]
11512#[target_feature(enable = "avx512bw,avx512vl")]
11513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11514#[cfg_attr(test, assert_instr(vpmovwb))]
11515#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11516pub const fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
11517    unsafe {
11518        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
11519        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
11520    }
11521}
11522
11523/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11524///
11525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi8&expand=1406)
11526#[inline]
11527#[target_feature(enable = "avx512bw,avx512vl")]
11528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11529#[cfg_attr(test, assert_instr(vpmovwb))]
11530#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11531pub const fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
11532    unsafe {
11533        let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
11534        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
11535    }
11536}
11537
11538/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
11539///
11540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi8&expand=1401)
11541#[inline]
11542#[target_feature(enable = "avx512bw,avx512vl")]
11543#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11544#[cfg_attr(test, assert_instr(vpmovwb))]
11545#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11546pub const fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
11547    unsafe {
11548        let a = a.as_i16x8();
11549        let v256: i16x16 = simd_shuffle!(
11550            a,
11551            i16x8::ZERO,
11552            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
11553        );
11554        transmute::<i8x16, _>(simd_cast(v256))
11555    }
11556}
11557
11558/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11559///
11560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi8&expand=1402)
11561#[inline]
11562#[target_feature(enable = "avx512bw,avx512vl")]
11563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11564#[cfg_attr(test, assert_instr(vpmovwb))]
11565#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11566pub const fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11567    unsafe {
11568        let a = _mm_cvtepi16_epi8(a).as_i8x16();
11569        let src = simd_shuffle!(
11570            src.as_i8x16(),
11571            i8x16::ZERO,
11572            [0, 1, 2, 3, 4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16]
11573        );
11574        simd_select_bitmask(k as u16, a, src).as_m128i()
11575    }
11576}
11577
11578/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11579///
11580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi8&expand=1403)
11581#[inline]
11582#[target_feature(enable = "avx512bw,avx512vl")]
11583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11584#[cfg_attr(test, assert_instr(vpmovwb))]
11585#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11586pub const fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
11587    _mm_mask_cvtepi16_epi8(_mm_setzero_si128(), k, a)
11588}
11589
11590/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
11591///
11592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi16_epi8&expand=1807)
11593#[inline]
11594#[target_feature(enable = "avx512bw")]
11595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11596#[cfg_attr(test, assert_instr(vpmovswb))]
11597#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11598pub const fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
11599    unsafe {
11600        simd_cast::<_, i8x32>(simd_imax(
11601            simd_imin(a.as_i16x32(), i16x32::splat(i8::MAX as _)),
11602            i16x32::splat(i8::MIN as _),
11603        ))
11604        .as_m256i()
11605    }
11606}
11607
11608/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11609///
11610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_epi8&expand=1808)
11611#[inline]
11612#[target_feature(enable = "avx512bw")]
11613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11614#[cfg_attr(test, assert_instr(vpmovswb))]
11615#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11616pub const fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
11617    unsafe {
11618        simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), src.as_i8x32()).as_m256i()
11619    }
11620}
11621
11622/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11623///
11624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi16_epi8&expand=1809)
11625#[inline]
11626#[target_feature(enable = "avx512bw")]
11627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11628#[cfg_attr(test, assert_instr(vpmovswb))]
11629#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11630pub const fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
11631    unsafe { simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), i8x32::ZERO).as_m256i() }
11632}
11633
11634/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
11635///
11636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi16_epi8&expand=1804)
11637#[inline]
11638#[target_feature(enable = "avx512bw,avx512vl")]
11639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11640#[cfg_attr(test, assert_instr(vpmovswb))]
11641#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11642pub const fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
11643    unsafe {
11644        simd_cast::<_, i8x16>(simd_imax(
11645            simd_imin(a.as_i16x16(), i16x16::splat(i8::MAX as _)),
11646            i16x16::splat(i8::MIN as _),
11647        ))
11648        .as_m128i()
11649    }
11650}
11651
11652/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11653///
11654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_epi8&expand=1805)
11655#[inline]
11656#[target_feature(enable = "avx512bw,avx512vl")]
11657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11658#[cfg_attr(test, assert_instr(vpmovswb))]
11659#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11660pub const fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
11661    unsafe {
11662        simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), src.as_i8x16()).as_m128i()
11663    }
11664}
11665
11666/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11667///
11668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi16_epi8&expand=1806)
11669#[inline]
11670#[target_feature(enable = "avx512bw,avx512vl")]
11671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11672#[cfg_attr(test, assert_instr(vpmovswb))]
11673#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11674pub const fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
11675    unsafe { simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), i8x16::ZERO).as_m128i() }
11676}
11677
11678/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
11679///
11680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi16_epi8&expand=1801)
11681#[inline]
11682#[target_feature(enable = "avx512bw,avx512vl")]
11683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11684#[cfg_attr(test, assert_instr(vpmovswb))]
11685pub fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i {
11686    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, 0b11111111)) }
11687}
11688
11689/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11690///
11691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_epi8&expand=1802)
11692#[inline]
11693#[target_feature(enable = "avx512bw,avx512vl")]
11694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11695#[cfg_attr(test, assert_instr(vpmovswb))]
11696pub fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11697    unsafe { transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k)) }
11698}
11699
11700/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11701///
11702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi16_epi8&expand=1803)
11703#[inline]
11704#[target_feature(enable = "avx512bw,avx512vl")]
11705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11706#[cfg_attr(test, assert_instr(vpmovswb))]
11707pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
11708    unsafe { transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, k)) }
11709}
11710
11711/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
11712///
11713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi16_epi8&expand=2042)
11714#[inline]
11715#[target_feature(enable = "avx512bw")]
11716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11717#[cfg_attr(test, assert_instr(vpmovuswb))]
11718#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11719pub const fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
11720    unsafe {
11721        simd_cast::<_, u8x32>(simd_imin(a.as_u16x32(), u16x32::splat(u8::MAX as _))).as_m256i()
11722    }
11723}
11724
11725/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11726///
11727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_epi8&expand=2043)
11728#[inline]
11729#[target_feature(enable = "avx512bw")]
11730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11731#[cfg_attr(test, assert_instr(vpmovuswb))]
11732#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11733pub const fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
11734    unsafe {
11735        simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), src.as_u8x32()).as_m256i()
11736    }
11737}
11738
11739/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11740///
11741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi16_epi8&expand=2044)
11742#[inline]
11743#[target_feature(enable = "avx512bw")]
11744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11745#[cfg_attr(test, assert_instr(vpmovuswb))]
11746#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11747pub const fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
11748    unsafe { simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), u8x32::ZERO).as_m256i() }
11749}
11750
11751/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
11752///
11753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi16_epi8&expand=2039)
11754#[inline]
11755#[target_feature(enable = "avx512bw,avx512vl")]
11756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11757#[cfg_attr(test, assert_instr(vpmovuswb))]
11758#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11759pub const fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
11760    unsafe {
11761        simd_cast::<_, u8x16>(simd_imin(a.as_u16x16(), u16x16::splat(u8::MAX as _))).as_m128i()
11762    }
11763}
11764
11765/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11766///
11767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_epi8&expand=2040)
11768#[inline]
11769#[target_feature(enable = "avx512bw,avx512vl")]
11770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11771#[cfg_attr(test, assert_instr(vpmovuswb))]
11772#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11773pub const fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
11774    unsafe {
11775        simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), src.as_u8x16()).as_m128i()
11776    }
11777}
11778
11779/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11780///
11781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi16_epi8&expand=2041)
11782#[inline]
11783#[target_feature(enable = "avx512bw,avx512vl")]
11784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11785#[cfg_attr(test, assert_instr(vpmovuswb))]
11786#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
11787pub const fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
11788    unsafe { simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), u8x16::ZERO).as_m128i() }
11789}
11790
11791/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
11792///
11793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi16_epi8&expand=2036)
11794#[inline]
11795#[target_feature(enable = "avx512bw,avx512vl")]
11796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11797#[cfg_attr(test, assert_instr(vpmovuswb))]
11798pub fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i {
11799    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, 0b11111111)) }
11800}
11801
11802/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11803///
11804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_epi8&expand=2037)
11805#[inline]
11806#[target_feature(enable = "avx512bw,avx512vl")]
11807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11808#[cfg_attr(test, assert_instr(vpmovuswb))]
11809pub fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11810    unsafe { transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k)) }
11811}
11812
11813/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11814///
11815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi16_epi8&expand=2038)
11816#[inline]
11817#[target_feature(enable = "avx512bw,avx512vl")]
11818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11819#[cfg_attr(test, assert_instr(vpmovuswb))]
11820pub fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
11821    unsafe { transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, k)) }
11822}
11823
11824/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst.
11825///
11826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi16&expand=1526)
11827#[inline]
11828#[target_feature(enable = "avx512bw")]
11829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11830#[cfg_attr(test, assert_instr(vpmovsxbw))]
11831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11832pub const fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
11833    unsafe {
11834        let a = a.as_i8x32();
11835        transmute::<i16x32, _>(simd_cast(a))
11836    }
11837}
11838
11839/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11840///
11841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi16&expand=1527)
11842#[inline]
11843#[target_feature(enable = "avx512bw")]
11844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11845#[cfg_attr(test, assert_instr(vpmovsxbw))]
11846#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11847pub const fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
11848    unsafe {
11849        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
11850        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
11851    }
11852}
11853
11854/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11855///
11856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi16&expand=1528)
11857#[inline]
11858#[target_feature(enable = "avx512bw")]
11859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11860#[cfg_attr(test, assert_instr(vpmovsxbw))]
11861#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11862pub const fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
11863    unsafe {
11864        let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
11865        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
11866    }
11867}
11868
11869/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11870///
11871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi16&expand=1524)
11872#[inline]
11873#[target_feature(enable = "avx512bw,avx512vl")]
11874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11875#[cfg_attr(test, assert_instr(vpmovsxbw))]
11876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11877pub const fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
11878    unsafe {
11879        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
11880        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11881    }
11882}
11883
11884/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11885///
11886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi16&expand=1525)
11887#[inline]
11888#[target_feature(enable = "avx512bw,avx512vl")]
11889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11890#[cfg_attr(test, assert_instr(vpmovsxbw))]
11891#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11892pub const fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
11893    unsafe {
11894        let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
11895        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
11896    }
11897}
11898
11899/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11900///
11901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi16&expand=1521)
11902#[inline]
11903#[target_feature(enable = "avx512bw,avx512vl")]
11904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11905#[cfg_attr(test, assert_instr(vpmovsxbw))]
11906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11907pub const fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11908    unsafe {
11909        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11910        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
11911    }
11912}
11913
11914/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11915///
11916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi16&expand=1522)
11917#[inline]
11918#[target_feature(enable = "avx512bw,avx512vl")]
11919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11920#[cfg_attr(test, assert_instr(vpmovsxbw))]
11921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11922pub const fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
11923    unsafe {
11924        let convert = _mm_cvtepi8_epi16(a).as_i16x8();
11925        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
11926    }
11927}
11928
11929/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst.
11930///
11931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi16&expand=1612)
11932#[inline]
11933#[target_feature(enable = "avx512bw")]
11934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11935#[cfg_attr(test, assert_instr(vpmovzxbw))]
11936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11937pub const fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
11938    unsafe {
11939        let a = a.as_u8x32();
11940        transmute::<i16x32, _>(simd_cast(a))
11941    }
11942}
11943
11944/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11945///
11946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi16&expand=1613)
11947#[inline]
11948#[target_feature(enable = "avx512bw")]
11949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11950#[cfg_attr(test, assert_instr(vpmovzxbw))]
11951#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11952pub const fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
11953    unsafe {
11954        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11955        transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
11956    }
11957}
11958
11959/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11960///
11961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi16&expand=1614)
11962#[inline]
11963#[target_feature(enable = "avx512bw")]
11964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11965#[cfg_attr(test, assert_instr(vpmovzxbw))]
11966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11967pub const fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
11968    unsafe {
11969        let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
11970        transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
11971    }
11972}
11973
11974/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11975///
11976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi16&expand=1610)
11977#[inline]
11978#[target_feature(enable = "avx512bw,avx512vl")]
11979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11980#[cfg_attr(test, assert_instr(vpmovzxbw))]
11981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11982pub const fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
11983    unsafe {
11984        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
11985        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11986    }
11987}
11988
11989/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11990///
11991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi16&expand=1611)
11992#[inline]
11993#[target_feature(enable = "avx512bw,avx512vl")]
11994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11995#[cfg_attr(test, assert_instr(vpmovzxbw))]
11996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11997pub const fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
11998    unsafe {
11999        let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
12000        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
12001    }
12002}
12003
12004/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12005///
12006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi16&expand=1607)
12007#[inline]
12008#[target_feature(enable = "avx512bw,avx512vl")]
12009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12010#[cfg_attr(test, assert_instr(vpmovzxbw))]
12011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12012pub const fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12013    unsafe {
12014        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
12015        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
12016    }
12017}
12018
12019/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12020///
12021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi16&expand=1608)
12022#[inline]
12023#[target_feature(enable = "avx512bw,avx512vl")]
12024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12025#[cfg_attr(test, assert_instr(vpmovzxbw))]
12026#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12027pub const fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
12028    unsafe {
12029        let convert = _mm_cvtepu8_epi16(a).as_i16x8();
12030        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
12031    }
12032}
12033
12034/// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst.
12035///
12036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bslli_epi128&expand=591)
12037#[inline]
12038#[target_feature(enable = "avx512bw")]
12039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12040#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
12041#[rustc_legacy_const_generics(1)]
12042#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12043pub const fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
12044    unsafe {
12045        static_assert_uimm_bits!(IMM8, 8);
12046        const fn mask(shift: i32, i: u32) -> u32 {
12047            let shift = shift as u32 & 0xff;
12048            if shift > 15 || i % 16 < shift {
12049                0
12050            } else {
12051                64 + (i - shift)
12052            }
12053        }
12054        let a = a.as_i8x64();
12055        let zero = i8x64::ZERO;
12056        let r: i8x64 = simd_shuffle!(
12057            zero,
12058            a,
12059            [
12060                mask(IMM8, 0),
12061                mask(IMM8, 1),
12062                mask(IMM8, 2),
12063                mask(IMM8, 3),
12064                mask(IMM8, 4),
12065                mask(IMM8, 5),
12066                mask(IMM8, 6),
12067                mask(IMM8, 7),
12068                mask(IMM8, 8),
12069                mask(IMM8, 9),
12070                mask(IMM8, 10),
12071                mask(IMM8, 11),
12072                mask(IMM8, 12),
12073                mask(IMM8, 13),
12074                mask(IMM8, 14),
12075                mask(IMM8, 15),
12076                mask(IMM8, 16),
12077                mask(IMM8, 17),
12078                mask(IMM8, 18),
12079                mask(IMM8, 19),
12080                mask(IMM8, 20),
12081                mask(IMM8, 21),
12082                mask(IMM8, 22),
12083                mask(IMM8, 23),
12084                mask(IMM8, 24),
12085                mask(IMM8, 25),
12086                mask(IMM8, 26),
12087                mask(IMM8, 27),
12088                mask(IMM8, 28),
12089                mask(IMM8, 29),
12090                mask(IMM8, 30),
12091                mask(IMM8, 31),
12092                mask(IMM8, 32),
12093                mask(IMM8, 33),
12094                mask(IMM8, 34),
12095                mask(IMM8, 35),
12096                mask(IMM8, 36),
12097                mask(IMM8, 37),
12098                mask(IMM8, 38),
12099                mask(IMM8, 39),
12100                mask(IMM8, 40),
12101                mask(IMM8, 41),
12102                mask(IMM8, 42),
12103                mask(IMM8, 43),
12104                mask(IMM8, 44),
12105                mask(IMM8, 45),
12106                mask(IMM8, 46),
12107                mask(IMM8, 47),
12108                mask(IMM8, 48),
12109                mask(IMM8, 49),
12110                mask(IMM8, 50),
12111                mask(IMM8, 51),
12112                mask(IMM8, 52),
12113                mask(IMM8, 53),
12114                mask(IMM8, 54),
12115                mask(IMM8, 55),
12116                mask(IMM8, 56),
12117                mask(IMM8, 57),
12118                mask(IMM8, 58),
12119                mask(IMM8, 59),
12120                mask(IMM8, 60),
12121                mask(IMM8, 61),
12122                mask(IMM8, 62),
12123                mask(IMM8, 63),
12124            ],
12125        );
12126        transmute(r)
12127    }
12128}
12129
12130/// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst.
12131///
12132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bsrli_epi128&expand=594)
12133#[inline]
12134#[target_feature(enable = "avx512bw")]
12135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12136#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
12137#[rustc_legacy_const_generics(1)]
12138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12139pub const fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
12140    unsafe {
12141        static_assert_uimm_bits!(IMM8, 8);
12142        const fn mask(shift: i32, i: u32) -> u32 {
12143            let shift = shift as u32 & 0xff;
12144            if shift > 15 || (15 - (i % 16)) < shift {
12145                0
12146            } else {
12147                64 + (i + shift)
12148            }
12149        }
12150        let a = a.as_i8x64();
12151        let zero = i8x64::ZERO;
12152        let r: i8x64 = simd_shuffle!(
12153            zero,
12154            a,
12155            [
12156                mask(IMM8, 0),
12157                mask(IMM8, 1),
12158                mask(IMM8, 2),
12159                mask(IMM8, 3),
12160                mask(IMM8, 4),
12161                mask(IMM8, 5),
12162                mask(IMM8, 6),
12163                mask(IMM8, 7),
12164                mask(IMM8, 8),
12165                mask(IMM8, 9),
12166                mask(IMM8, 10),
12167                mask(IMM8, 11),
12168                mask(IMM8, 12),
12169                mask(IMM8, 13),
12170                mask(IMM8, 14),
12171                mask(IMM8, 15),
12172                mask(IMM8, 16),
12173                mask(IMM8, 17),
12174                mask(IMM8, 18),
12175                mask(IMM8, 19),
12176                mask(IMM8, 20),
12177                mask(IMM8, 21),
12178                mask(IMM8, 22),
12179                mask(IMM8, 23),
12180                mask(IMM8, 24),
12181                mask(IMM8, 25),
12182                mask(IMM8, 26),
12183                mask(IMM8, 27),
12184                mask(IMM8, 28),
12185                mask(IMM8, 29),
12186                mask(IMM8, 30),
12187                mask(IMM8, 31),
12188                mask(IMM8, 32),
12189                mask(IMM8, 33),
12190                mask(IMM8, 34),
12191                mask(IMM8, 35),
12192                mask(IMM8, 36),
12193                mask(IMM8, 37),
12194                mask(IMM8, 38),
12195                mask(IMM8, 39),
12196                mask(IMM8, 40),
12197                mask(IMM8, 41),
12198                mask(IMM8, 42),
12199                mask(IMM8, 43),
12200                mask(IMM8, 44),
12201                mask(IMM8, 45),
12202                mask(IMM8, 46),
12203                mask(IMM8, 47),
12204                mask(IMM8, 48),
12205                mask(IMM8, 49),
12206                mask(IMM8, 50),
12207                mask(IMM8, 51),
12208                mask(IMM8, 52),
12209                mask(IMM8, 53),
12210                mask(IMM8, 54),
12211                mask(IMM8, 55),
12212                mask(IMM8, 56),
12213                mask(IMM8, 57),
12214                mask(IMM8, 58),
12215                mask(IMM8, 59),
12216                mask(IMM8, 60),
12217                mask(IMM8, 61),
12218                mask(IMM8, 62),
12219                mask(IMM8, 63),
12220            ],
12221        );
12222        transmute(r)
12223    }
12224}
12225
12226/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst.
12227/// Unlike [`_mm_alignr_epi8`], [`_mm256_alignr_epi8`] functions, where the entire input vectors are concatenated to the temporary result,
12228/// this concatenation happens in 4 steps, where each step builds 32-byte temporary result.
12229///
12230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi8&expand=263)
12231#[inline]
12232#[target_feature(enable = "avx512bw")]
12233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12234#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
12235#[rustc_legacy_const_generics(2)]
12236#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12237pub const fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
12238    const fn mask(shift: u32, i: u32) -> u32 {
12239        let shift = shift % 16;
12240        let mod_i = i % 16;
12241        if mod_i < (16 - shift) {
12242            i + shift
12243        } else {
12244            i + 48 + shift
12245        }
12246    }
12247
12248    // If palignr is shifting the pair of vectors more than the size of two
12249    // lanes, emit zero.
12250    if IMM8 >= 32 {
12251        return _mm512_setzero_si512();
12252    }
12253    // If palignr is shifting the pair of input vectors more than one lane,
12254    // but less than two lanes, convert to shifting in zeroes.
12255    let (a, b) = if IMM8 > 16 {
12256        (_mm512_setzero_si512(), a)
12257    } else {
12258        (a, b)
12259    };
12260    unsafe {
12261        if IMM8 == 16 {
12262            return transmute(a);
12263        }
12264
12265        let r: i8x64 = simd_shuffle!(
12266            b.as_i8x64(),
12267            a.as_i8x64(),
12268            [
12269                mask(IMM8 as u32, 0),
12270                mask(IMM8 as u32, 1),
12271                mask(IMM8 as u32, 2),
12272                mask(IMM8 as u32, 3),
12273                mask(IMM8 as u32, 4),
12274                mask(IMM8 as u32, 5),
12275                mask(IMM8 as u32, 6),
12276                mask(IMM8 as u32, 7),
12277                mask(IMM8 as u32, 8),
12278                mask(IMM8 as u32, 9),
12279                mask(IMM8 as u32, 10),
12280                mask(IMM8 as u32, 11),
12281                mask(IMM8 as u32, 12),
12282                mask(IMM8 as u32, 13),
12283                mask(IMM8 as u32, 14),
12284                mask(IMM8 as u32, 15),
12285                mask(IMM8 as u32, 16),
12286                mask(IMM8 as u32, 17),
12287                mask(IMM8 as u32, 18),
12288                mask(IMM8 as u32, 19),
12289                mask(IMM8 as u32, 20),
12290                mask(IMM8 as u32, 21),
12291                mask(IMM8 as u32, 22),
12292                mask(IMM8 as u32, 23),
12293                mask(IMM8 as u32, 24),
12294                mask(IMM8 as u32, 25),
12295                mask(IMM8 as u32, 26),
12296                mask(IMM8 as u32, 27),
12297                mask(IMM8 as u32, 28),
12298                mask(IMM8 as u32, 29),
12299                mask(IMM8 as u32, 30),
12300                mask(IMM8 as u32, 31),
12301                mask(IMM8 as u32, 32),
12302                mask(IMM8 as u32, 33),
12303                mask(IMM8 as u32, 34),
12304                mask(IMM8 as u32, 35),
12305                mask(IMM8 as u32, 36),
12306                mask(IMM8 as u32, 37),
12307                mask(IMM8 as u32, 38),
12308                mask(IMM8 as u32, 39),
12309                mask(IMM8 as u32, 40),
12310                mask(IMM8 as u32, 41),
12311                mask(IMM8 as u32, 42),
12312                mask(IMM8 as u32, 43),
12313                mask(IMM8 as u32, 44),
12314                mask(IMM8 as u32, 45),
12315                mask(IMM8 as u32, 46),
12316                mask(IMM8 as u32, 47),
12317                mask(IMM8 as u32, 48),
12318                mask(IMM8 as u32, 49),
12319                mask(IMM8 as u32, 50),
12320                mask(IMM8 as u32, 51),
12321                mask(IMM8 as u32, 52),
12322                mask(IMM8 as u32, 53),
12323                mask(IMM8 as u32, 54),
12324                mask(IMM8 as u32, 55),
12325                mask(IMM8 as u32, 56),
12326                mask(IMM8 as u32, 57),
12327                mask(IMM8 as u32, 58),
12328                mask(IMM8 as u32, 59),
12329                mask(IMM8 as u32, 60),
12330                mask(IMM8 as u32, 61),
12331                mask(IMM8 as u32, 62),
12332                mask(IMM8 as u32, 63),
12333            ],
12334        );
12335        transmute(r)
12336    }
12337}
12338
12339/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12340///
12341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi8&expand=264)
12342#[inline]
12343#[target_feature(enable = "avx512bw")]
12344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12345#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
12346#[rustc_legacy_const_generics(4)]
12347#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12348pub const fn _mm512_mask_alignr_epi8<const IMM8: i32>(
12349    src: __m512i,
12350    k: __mmask64,
12351    a: __m512i,
12352    b: __m512i,
12353) -> __m512i {
12354    unsafe {
12355        static_assert_uimm_bits!(IMM8, 8);
12356        let r = _mm512_alignr_epi8::<IMM8>(a, b);
12357        transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64()))
12358    }
12359}
12360
12361/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12362///
12363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi8&expand=265)
12364#[inline]
12365#[target_feature(enable = "avx512bw")]
12366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12367#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
12368#[rustc_legacy_const_generics(3)]
12369#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12370pub const fn _mm512_maskz_alignr_epi8<const IMM8: i32>(
12371    k: __mmask64,
12372    a: __m512i,
12373    b: __m512i,
12374) -> __m512i {
12375    unsafe {
12376        static_assert_uimm_bits!(IMM8, 8);
12377        let r = _mm512_alignr_epi8::<IMM8>(a, b);
12378        transmute(simd_select_bitmask(k, r.as_i8x64(), i8x64::ZERO))
12379    }
12380}
12381
12382/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12383///
12384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi8&expand=261)
12385#[inline]
12386#[target_feature(enable = "avx512bw,avx512vl")]
12387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12388#[rustc_legacy_const_generics(4)]
12389#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12391pub const fn _mm256_mask_alignr_epi8<const IMM8: i32>(
12392    src: __m256i,
12393    k: __mmask32,
12394    a: __m256i,
12395    b: __m256i,
12396) -> __m256i {
12397    unsafe {
12398        static_assert_uimm_bits!(IMM8, 8);
12399        let r = _mm256_alignr_epi8::<IMM8>(a, b);
12400        transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32()))
12401    }
12402}
12403
12404/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12405///
12406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi8&expand=262)
12407#[inline]
12408#[target_feature(enable = "avx512bw,avx512vl")]
12409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12410#[rustc_legacy_const_generics(3)]
12411#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12412#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12413pub const fn _mm256_maskz_alignr_epi8<const IMM8: i32>(
12414    k: __mmask32,
12415    a: __m256i,
12416    b: __m256i,
12417) -> __m256i {
12418    unsafe {
12419        static_assert_uimm_bits!(IMM8, 8);
12420        let r = _mm256_alignr_epi8::<IMM8>(a, b);
12421        transmute(simd_select_bitmask(k, r.as_i8x32(), i8x32::ZERO))
12422    }
12423}
12424
12425/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12426///
12427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi8&expand=258)
12428#[inline]
12429#[target_feature(enable = "avx512bw,avx512vl")]
12430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12431#[rustc_legacy_const_generics(4)]
12432#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12434pub const fn _mm_mask_alignr_epi8<const IMM8: i32>(
12435    src: __m128i,
12436    k: __mmask16,
12437    a: __m128i,
12438    b: __m128i,
12439) -> __m128i {
12440    unsafe {
12441        static_assert_uimm_bits!(IMM8, 8);
12442        let r = _mm_alignr_epi8::<IMM8>(a, b);
12443        transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16()))
12444    }
12445}
12446
12447/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12448///
12449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi8&expand=259)
12450#[inline]
12451#[target_feature(enable = "avx512bw,avx512vl")]
12452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12453#[rustc_legacy_const_generics(3)]
12454#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
12455#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12456pub const fn _mm_maskz_alignr_epi8<const IMM8: i32>(
12457    k: __mmask16,
12458    a: __m128i,
12459    b: __m128i,
12460) -> __m128i {
12461    unsafe {
12462        static_assert_uimm_bits!(IMM8, 8);
12463        let r = _mm_alignr_epi8::<IMM8>(a, b);
12464        transmute(simd_select_bitmask(k, r.as_i8x16(), i8x16::ZERO))
12465    }
12466}
12467
12468/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12469///
12470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
12471#[inline]
12472#[target_feature(enable = "avx512bw")]
12473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12474#[cfg_attr(test, assert_instr(vpmovswb))]
12475pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
12476    vpmovswbmem(mem_addr, a.as_i16x32(), k);
12477}
12478
12479/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12480///
12481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
12482#[inline]
12483#[target_feature(enable = "avx512bw,avx512vl")]
12484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12485#[cfg_attr(test, assert_instr(vpmovswb))]
12486pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
12487    vpmovswbmem256(mem_addr, a.as_i16x16(), k);
12488}
12489
12490/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12491///
12492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
12493#[inline]
12494#[target_feature(enable = "avx512bw,avx512vl")]
12495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12496#[cfg_attr(test, assert_instr(vpmovswb))]
12497pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
12498    vpmovswbmem128(mem_addr, a.as_i16x8(), k);
12499}
12500
12501/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12502///
12503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
12504#[inline]
12505#[target_feature(enable = "avx512bw")]
12506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12507#[cfg_attr(test, assert_instr(vpmovwb))]
12508#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
12509pub const unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
12510    let result = _mm512_cvtepi16_epi8(a).as_i8x32();
12511    let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
12512    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
12513}
12514
12515/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12516///
12517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
12518#[inline]
12519#[target_feature(enable = "avx512bw,avx512vl")]
12520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12521#[cfg_attr(test, assert_instr(vpmovwb))]
12522#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
12523pub const unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
12524    let result = _mm256_cvtepi16_epi8(a).as_i8x16();
12525    let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
12526    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
12527}
12528
12529/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12530///
12531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
12532#[inline]
12533#[target_feature(enable = "avx512bw,avx512vl")]
12534#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12535#[cfg_attr(test, assert_instr(vpmovwb))]
12536#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
12537pub const unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
12538    let result: i8x8 = simd_shuffle!(
12539        _mm_cvtepi16_epi8(a).as_i8x16(),
12540        i8x16::ZERO,
12541        [0, 1, 2, 3, 4, 5, 6, 7]
12542    );
12543    let mask = simd_select_bitmask(k, i8x8::splat(!0), i8x8::ZERO);
12544    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
12545}
12546
12547/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12548///
12549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
12550#[inline]
12551#[target_feature(enable = "avx512bw")]
12552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12553#[cfg_attr(test, assert_instr(vpmovuswb))]
12554pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
12555    vpmovuswbmem(mem_addr, a.as_i16x32(), k);
12556}
12557
12558/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12559///
12560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
12561#[inline]
12562#[target_feature(enable = "avx512bw,avx512vl")]
12563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12564#[cfg_attr(test, assert_instr(vpmovuswb))]
12565pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
12566    vpmovuswbmem256(mem_addr, a.as_i16x16(), k);
12567}
12568
12569/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
12570///
12571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
12572#[inline]
12573#[target_feature(enable = "avx512bw,avx512vl")]
12574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12575#[cfg_attr(test, assert_instr(vpmovuswb))]
12576pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
12577    vpmovuswbmem128(mem_addr, a.as_i16x8(), k);
12578}
12579
12580#[allow(improper_ctypes)]
12581unsafe extern "C" {
12582    #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
12583    fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
12584
12585    #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
12586    fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
12587
12588    #[link_name = "llvm.x86.avx512.packssdw.512"]
12589    fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
12590    #[link_name = "llvm.x86.avx512.packsswb.512"]
12591    fn vpacksswb(a: i16x32, b: i16x32) -> i8x64;
12592    #[link_name = "llvm.x86.avx512.packusdw.512"]
12593    fn vpackusdw(a: i32x16, b: i32x16) -> u16x32;
12594    #[link_name = "llvm.x86.avx512.packuswb.512"]
12595    fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
12596
12597    #[link_name = "llvm.x86.avx512.psll.w.512"]
12598    fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
12599
12600    #[link_name = "llvm.x86.avx512.psrl.w.512"]
12601    fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
12602
12603    #[link_name = "llvm.x86.avx512.psra.w.512"]
12604    fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
12605
12606    #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
12607    fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
12608    #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
12609    fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16;
12610    #[link_name = "llvm.x86.avx512.vpermi2var.hi.128"]
12611    fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8;
12612
12613    #[link_name = "llvm.x86.avx512.permvar.hi.512"]
12614    fn vpermw(a: i16x32, idx: i16x32) -> i16x32;
12615    #[link_name = "llvm.x86.avx512.permvar.hi.256"]
12616    fn vpermw256(a: i16x16, idx: i16x16) -> i16x16;
12617    #[link_name = "llvm.x86.avx512.permvar.hi.128"]
12618    fn vpermw128(a: i16x8, idx: i16x8) -> i16x8;
12619
12620    #[link_name = "llvm.x86.avx512.pshuf.b.512"]
12621    fn vpshufb(a: i8x64, b: i8x64) -> i8x64;
12622
12623    #[link_name = "llvm.x86.avx512.psad.bw.512"]
12624    fn vpsadbw(a: u8x64, b: u8x64) -> u64x8;
12625
12626    #[link_name = "llvm.x86.avx512.dbpsadbw.512"]
12627    fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32;
12628    #[link_name = "llvm.x86.avx512.dbpsadbw.256"]
12629    fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
12630    #[link_name = "llvm.x86.avx512.dbpsadbw.128"]
12631    fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
12632
12633    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
12634    fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
12635
12636    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
12637    fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
12638
12639    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
12640    fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
12641    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"]
12642    fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
12643    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
12644    fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
12645
12646    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
12647    fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
12648    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
12649    fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
12650    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
12651    fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
12652}
12653
12654#[cfg(test)]
12655mod tests {
12656    use crate::core_arch::assert_eq_const as assert_eq;
12657
12658    use stdarch_test::simd_test;
12659
12660    use crate::core_arch::x86::*;
12661    use crate::hint::black_box;
12662    use crate::mem::{self};
12663
12664    #[simd_test(enable = "avx512bw")]
12665    const unsafe fn test_mm512_abs_epi16() {
12666        let a = _mm512_set1_epi16(-1);
12667        let r = _mm512_abs_epi16(a);
12668        let e = _mm512_set1_epi16(1);
12669        assert_eq_m512i(r, e);
12670    }
12671
12672    #[simd_test(enable = "avx512bw")]
12673    const unsafe fn test_mm512_mask_abs_epi16() {
12674        let a = _mm512_set1_epi16(-1);
12675        let r = _mm512_mask_abs_epi16(a, 0, a);
12676        assert_eq_m512i(r, a);
12677        let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a);
12678        #[rustfmt::skip]
12679        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12680                                 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12681        assert_eq_m512i(r, e);
12682    }
12683
12684    #[simd_test(enable = "avx512bw")]
12685    const unsafe fn test_mm512_maskz_abs_epi16() {
12686        let a = _mm512_set1_epi16(-1);
12687        let r = _mm512_maskz_abs_epi16(0, a);
12688        assert_eq_m512i(r, _mm512_setzero_si512());
12689        let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a);
12690        #[rustfmt::skip]
12691        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12692                                  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12693        assert_eq_m512i(r, e);
12694    }
12695
12696    #[simd_test(enable = "avx512bw,avx512vl")]
12697    const unsafe fn test_mm256_mask_abs_epi16() {
12698        let a = _mm256_set1_epi16(-1);
12699        let r = _mm256_mask_abs_epi16(a, 0, a);
12700        assert_eq_m256i(r, a);
12701        let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a);
12702        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12703        assert_eq_m256i(r, e);
12704    }
12705
12706    #[simd_test(enable = "avx512bw,avx512vl")]
12707    const unsafe fn test_mm256_maskz_abs_epi16() {
12708        let a = _mm256_set1_epi16(-1);
12709        let r = _mm256_maskz_abs_epi16(0, a);
12710        assert_eq_m256i(r, _mm256_setzero_si256());
12711        let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a);
12712        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12713        assert_eq_m256i(r, e);
12714    }
12715
12716    #[simd_test(enable = "avx512bw,avx512vl")]
12717    const unsafe fn test_mm_mask_abs_epi16() {
12718        let a = _mm_set1_epi16(-1);
12719        let r = _mm_mask_abs_epi16(a, 0, a);
12720        assert_eq_m128i(r, a);
12721        let r = _mm_mask_abs_epi16(a, 0b00001111, a);
12722        let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1);
12723        assert_eq_m128i(r, e);
12724    }
12725
12726    #[simd_test(enable = "avx512bw,avx512vl")]
12727    const unsafe fn test_mm_maskz_abs_epi16() {
12728        let a = _mm_set1_epi16(-1);
12729        let r = _mm_maskz_abs_epi16(0, a);
12730        assert_eq_m128i(r, _mm_setzero_si128());
12731        let r = _mm_maskz_abs_epi16(0b00001111, a);
12732        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
12733        assert_eq_m128i(r, e);
12734    }
12735
12736    #[simd_test(enable = "avx512bw")]
12737    const unsafe fn test_mm512_abs_epi8() {
12738        let a = _mm512_set1_epi8(-1);
12739        let r = _mm512_abs_epi8(a);
12740        let e = _mm512_set1_epi8(1);
12741        assert_eq_m512i(r, e);
12742    }
12743
12744    #[simd_test(enable = "avx512bw")]
12745    const unsafe fn test_mm512_mask_abs_epi8() {
12746        let a = _mm512_set1_epi8(-1);
12747        let r = _mm512_mask_abs_epi8(a, 0, a);
12748        assert_eq_m512i(r, a);
12749        let r = _mm512_mask_abs_epi8(
12750            a,
12751            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12752            a,
12753        );
12754        #[rustfmt::skip]
12755        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12756                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12757                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12758                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12759        assert_eq_m512i(r, e);
12760    }
12761
12762    #[simd_test(enable = "avx512bw")]
12763    const unsafe fn test_mm512_maskz_abs_epi8() {
12764        let a = _mm512_set1_epi8(-1);
12765        let r = _mm512_maskz_abs_epi8(0, a);
12766        assert_eq_m512i(r, _mm512_setzero_si512());
12767        let r = _mm512_maskz_abs_epi8(
12768            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12769            a,
12770        );
12771        #[rustfmt::skip]
12772        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12773                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12774                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12775                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12776        assert_eq_m512i(r, e);
12777    }
12778
12779    #[simd_test(enable = "avx512bw,avx512vl")]
12780    const unsafe fn test_mm256_mask_abs_epi8() {
12781        let a = _mm256_set1_epi8(-1);
12782        let r = _mm256_mask_abs_epi8(a, 0, a);
12783        assert_eq_m256i(r, a);
12784        let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a);
12785        #[rustfmt::skip]
12786        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
12787                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12788        assert_eq_m256i(r, e);
12789    }
12790
12791    #[simd_test(enable = "avx512bw,avx512vl")]
12792    const unsafe fn test_mm256_maskz_abs_epi8() {
12793        let a = _mm256_set1_epi8(-1);
12794        let r = _mm256_maskz_abs_epi8(0, a);
12795        assert_eq_m256i(r, _mm256_setzero_si256());
12796        let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a);
12797        #[rustfmt::skip]
12798        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
12799                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12800        assert_eq_m256i(r, e);
12801    }
12802
12803    #[simd_test(enable = "avx512bw,avx512vl")]
12804    const unsafe fn test_mm_mask_abs_epi8() {
12805        let a = _mm_set1_epi8(-1);
12806        let r = _mm_mask_abs_epi8(a, 0, a);
12807        assert_eq_m128i(r, a);
12808        let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a);
12809        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
12810        assert_eq_m128i(r, e);
12811    }
12812
12813    #[simd_test(enable = "avx512bw,avx512vl")]
12814    const unsafe fn test_mm_maskz_abs_epi8() {
12815        let a = _mm_set1_epi8(-1);
12816        let r = _mm_maskz_abs_epi8(0, a);
12817        assert_eq_m128i(r, _mm_setzero_si128());
12818        let r = _mm_maskz_abs_epi8(0b00000000_11111111, a);
12819        #[rustfmt::skip]
12820        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
12821        assert_eq_m128i(r, e);
12822    }
12823
12824    #[simd_test(enable = "avx512bw")]
12825    const unsafe fn test_mm512_add_epi16() {
12826        let a = _mm512_set1_epi16(1);
12827        let b = _mm512_set1_epi16(2);
12828        let r = _mm512_add_epi16(a, b);
12829        let e = _mm512_set1_epi16(3);
12830        assert_eq_m512i(r, e);
12831    }
12832
12833    #[simd_test(enable = "avx512bw")]
12834    const unsafe fn test_mm512_mask_add_epi16() {
12835        let a = _mm512_set1_epi16(1);
12836        let b = _mm512_set1_epi16(2);
12837        let r = _mm512_mask_add_epi16(a, 0, a, b);
12838        assert_eq_m512i(r, a);
12839        let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
12840        #[rustfmt::skip]
12841        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12842                                 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12843        assert_eq_m512i(r, e);
12844    }
12845
12846    #[simd_test(enable = "avx512bw")]
12847    const unsafe fn test_mm512_maskz_add_epi16() {
12848        let a = _mm512_set1_epi16(1);
12849        let b = _mm512_set1_epi16(2);
12850        let r = _mm512_maskz_add_epi16(0, a, b);
12851        assert_eq_m512i(r, _mm512_setzero_si512());
12852        let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b);
12853        #[rustfmt::skip]
12854        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12855                                 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12856        assert_eq_m512i(r, e);
12857    }
12858
12859    #[simd_test(enable = "avx512bw,avx512vl")]
12860    const unsafe fn test_mm256_mask_add_epi16() {
12861        let a = _mm256_set1_epi16(1);
12862        let b = _mm256_set1_epi16(2);
12863        let r = _mm256_mask_add_epi16(a, 0, a, b);
12864        assert_eq_m256i(r, a);
12865        let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b);
12866        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12867        assert_eq_m256i(r, e);
12868    }
12869
12870    #[simd_test(enable = "avx512bw,avx512vl")]
12871    const unsafe fn test_mm256_maskz_add_epi16() {
12872        let a = _mm256_set1_epi16(1);
12873        let b = _mm256_set1_epi16(2);
12874        let r = _mm256_maskz_add_epi16(0, a, b);
12875        assert_eq_m256i(r, _mm256_setzero_si256());
12876        let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b);
12877        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12878        assert_eq_m256i(r, e);
12879    }
12880
12881    #[simd_test(enable = "avx512bw,avx512vl")]
12882    const unsafe fn test_mm_mask_add_epi16() {
12883        let a = _mm_set1_epi16(1);
12884        let b = _mm_set1_epi16(2);
12885        let r = _mm_mask_add_epi16(a, 0, a, b);
12886        assert_eq_m128i(r, a);
12887        let r = _mm_mask_add_epi16(a, 0b00001111, a, b);
12888        let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3);
12889        assert_eq_m128i(r, e);
12890    }
12891
12892    #[simd_test(enable = "avx512bw,avx512vl")]
12893    const unsafe fn test_mm_maskz_add_epi16() {
12894        let a = _mm_set1_epi16(1);
12895        let b = _mm_set1_epi16(2);
12896        let r = _mm_maskz_add_epi16(0, a, b);
12897        assert_eq_m128i(r, _mm_setzero_si128());
12898        let r = _mm_maskz_add_epi16(0b00001111, a, b);
12899        let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3);
12900        assert_eq_m128i(r, e);
12901    }
12902
12903    #[simd_test(enable = "avx512bw")]
12904    const unsafe fn test_mm512_add_epi8() {
12905        let a = _mm512_set1_epi8(1);
12906        let b = _mm512_set1_epi8(2);
12907        let r = _mm512_add_epi8(a, b);
12908        let e = _mm512_set1_epi8(3);
12909        assert_eq_m512i(r, e);
12910    }
12911
12912    #[simd_test(enable = "avx512bw")]
12913    const unsafe fn test_mm512_mask_add_epi8() {
12914        let a = _mm512_set1_epi8(1);
12915        let b = _mm512_set1_epi8(2);
12916        let r = _mm512_mask_add_epi8(a, 0, a, b);
12917        assert_eq_m512i(r, a);
12918        let r = _mm512_mask_add_epi8(
12919            a,
12920            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12921            a,
12922            b,
12923        );
12924        #[rustfmt::skip]
12925        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12926                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12927                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12928                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12929        assert_eq_m512i(r, e);
12930    }
12931
12932    #[simd_test(enable = "avx512bw")]
12933    const unsafe fn test_mm512_maskz_add_epi8() {
12934        let a = _mm512_set1_epi8(1);
12935        let b = _mm512_set1_epi8(2);
12936        let r = _mm512_maskz_add_epi8(0, a, b);
12937        assert_eq_m512i(r, _mm512_setzero_si512());
12938        let r = _mm512_maskz_add_epi8(
12939            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12940            a,
12941            b,
12942        );
12943        #[rustfmt::skip]
12944        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12945                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12946                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12947                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12948        assert_eq_m512i(r, e);
12949    }
12950
12951    #[simd_test(enable = "avx512bw,avx512vl")]
12952    const unsafe fn test_mm256_mask_add_epi8() {
12953        let a = _mm256_set1_epi8(1);
12954        let b = _mm256_set1_epi8(2);
12955        let r = _mm256_mask_add_epi8(a, 0, a, b);
12956        assert_eq_m256i(r, a);
12957        let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12958        #[rustfmt::skip]
12959        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
12960                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12961        assert_eq_m256i(r, e);
12962    }
12963
12964    #[simd_test(enable = "avx512bw,avx512vl")]
12965    const unsafe fn test_mm256_maskz_add_epi8() {
12966        let a = _mm256_set1_epi8(1);
12967        let b = _mm256_set1_epi8(2);
12968        let r = _mm256_maskz_add_epi8(0, a, b);
12969        assert_eq_m256i(r, _mm256_setzero_si256());
12970        let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b);
12971        #[rustfmt::skip]
12972        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
12973                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12974        assert_eq_m256i(r, e);
12975    }
12976
12977    #[simd_test(enable = "avx512bw,avx512vl")]
12978    const unsafe fn test_mm_mask_add_epi8() {
12979        let a = _mm_set1_epi8(1);
12980        let b = _mm_set1_epi8(2);
12981        let r = _mm_mask_add_epi8(a, 0, a, b);
12982        assert_eq_m128i(r, a);
12983        let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b);
12984        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
12985        assert_eq_m128i(r, e);
12986    }
12987
12988    #[simd_test(enable = "avx512bw,avx512vl")]
12989    const unsafe fn test_mm_maskz_add_epi8() {
12990        let a = _mm_set1_epi8(1);
12991        let b = _mm_set1_epi8(2);
12992        let r = _mm_maskz_add_epi8(0, a, b);
12993        assert_eq_m128i(r, _mm_setzero_si128());
12994        let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b);
12995        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
12996        assert_eq_m128i(r, e);
12997    }
12998
12999    #[simd_test(enable = "avx512bw")]
13000    const unsafe fn test_mm512_adds_epu16() {
13001        let a = _mm512_set1_epi16(1);
13002        let b = _mm512_set1_epi16(u16::MAX as i16);
13003        let r = _mm512_adds_epu16(a, b);
13004        let e = _mm512_set1_epi16(u16::MAX as i16);
13005        assert_eq_m512i(r, e);
13006    }
13007
13008    #[simd_test(enable = "avx512bw")]
13009    const unsafe fn test_mm512_mask_adds_epu16() {
13010        let a = _mm512_set1_epi16(1);
13011        let b = _mm512_set1_epi16(u16::MAX as i16);
13012        let r = _mm512_mask_adds_epu16(a, 0, a, b);
13013        assert_eq_m512i(r, a);
13014        let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13015        #[rustfmt::skip]
13016        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13017                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13018        assert_eq_m512i(r, e);
13019    }
13020
13021    #[simd_test(enable = "avx512bw")]
13022    const unsafe fn test_mm512_maskz_adds_epu16() {
13023        let a = _mm512_set1_epi16(1);
13024        let b = _mm512_set1_epi16(u16::MAX as i16);
13025        let r = _mm512_maskz_adds_epu16(0, a, b);
13026        assert_eq_m512i(r, _mm512_setzero_si512());
13027        let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b);
13028        #[rustfmt::skip]
13029        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13030                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13031        assert_eq_m512i(r, e);
13032    }
13033
13034    #[simd_test(enable = "avx512bw,avx512vl")]
13035    const unsafe fn test_mm256_mask_adds_epu16() {
13036        let a = _mm256_set1_epi16(1);
13037        let b = _mm256_set1_epi16(u16::MAX as i16);
13038        let r = _mm256_mask_adds_epu16(a, 0, a, b);
13039        assert_eq_m256i(r, a);
13040        let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b);
13041        #[rustfmt::skip]
13042        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13043        assert_eq_m256i(r, e);
13044    }
13045
13046    #[simd_test(enable = "avx512bw,avx512vl")]
13047    const unsafe fn test_mm256_maskz_adds_epu16() {
13048        let a = _mm256_set1_epi16(1);
13049        let b = _mm256_set1_epi16(u16::MAX as i16);
13050        let r = _mm256_maskz_adds_epu16(0, a, b);
13051        assert_eq_m256i(r, _mm256_setzero_si256());
13052        let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b);
13053        #[rustfmt::skip]
13054        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13055        assert_eq_m256i(r, e);
13056    }
13057
13058    #[simd_test(enable = "avx512bw,avx512vl")]
13059    const unsafe fn test_mm_mask_adds_epu16() {
13060        let a = _mm_set1_epi16(1);
13061        let b = _mm_set1_epi16(u16::MAX as i16);
13062        let r = _mm_mask_adds_epu16(a, 0, a, b);
13063        assert_eq_m128i(r, a);
13064        let r = _mm_mask_adds_epu16(a, 0b00001111, a, b);
13065        #[rustfmt::skip]
13066        let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13067        assert_eq_m128i(r, e);
13068    }
13069
13070    #[simd_test(enable = "avx512bw,avx512vl")]
13071    const unsafe fn test_mm_maskz_adds_epu16() {
13072        let a = _mm_set1_epi16(1);
13073        let b = _mm_set1_epi16(u16::MAX as i16);
13074        let r = _mm_maskz_adds_epu16(0, a, b);
13075        assert_eq_m128i(r, _mm_setzero_si128());
13076        let r = _mm_maskz_adds_epu16(0b00001111, a, b);
13077        #[rustfmt::skip]
13078        let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
13079        assert_eq_m128i(r, e);
13080    }
13081
13082    #[simd_test(enable = "avx512bw")]
13083    const unsafe fn test_mm512_adds_epu8() {
13084        let a = _mm512_set1_epi8(1);
13085        let b = _mm512_set1_epi8(u8::MAX as i8);
13086        let r = _mm512_adds_epu8(a, b);
13087        let e = _mm512_set1_epi8(u8::MAX as i8);
13088        assert_eq_m512i(r, e);
13089    }
13090
13091    #[simd_test(enable = "avx512bw")]
13092    const unsafe fn test_mm512_mask_adds_epu8() {
13093        let a = _mm512_set1_epi8(1);
13094        let b = _mm512_set1_epi8(u8::MAX as i8);
13095        let r = _mm512_mask_adds_epu8(a, 0, a, b);
13096        assert_eq_m512i(r, a);
13097        let r = _mm512_mask_adds_epu8(
13098            a,
13099            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13100            a,
13101            b,
13102        );
13103        #[rustfmt::skip]
13104        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13105                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13106                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13107                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13108        assert_eq_m512i(r, e);
13109    }
13110
13111    #[simd_test(enable = "avx512bw")]
13112    const unsafe fn test_mm512_maskz_adds_epu8() {
13113        let a = _mm512_set1_epi8(1);
13114        let b = _mm512_set1_epi8(u8::MAX as i8);
13115        let r = _mm512_maskz_adds_epu8(0, a, b);
13116        assert_eq_m512i(r, _mm512_setzero_si512());
13117        let r = _mm512_maskz_adds_epu8(
13118            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13119            a,
13120            b,
13121        );
13122        #[rustfmt::skip]
13123        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13124                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13125                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13126                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13127        assert_eq_m512i(r, e);
13128    }
13129
13130    #[simd_test(enable = "avx512bw,avx512vl")]
13131    const unsafe fn test_mm256_mask_adds_epu8() {
13132        let a = _mm256_set1_epi8(1);
13133        let b = _mm256_set1_epi8(u8::MAX as i8);
13134        let r = _mm256_mask_adds_epu8(a, 0, a, b);
13135        assert_eq_m256i(r, a);
13136        let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
13137        #[rustfmt::skip]
13138        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13139                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13140        assert_eq_m256i(r, e);
13141    }
13142
13143    #[simd_test(enable = "avx512bw,avx512vl")]
13144    const unsafe fn test_mm256_maskz_adds_epu8() {
13145        let a = _mm256_set1_epi8(1);
13146        let b = _mm256_set1_epi8(u8::MAX as i8);
13147        let r = _mm256_maskz_adds_epu8(0, a, b);
13148        assert_eq_m256i(r, _mm256_setzero_si256());
13149        let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b);
13150        #[rustfmt::skip]
13151        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13152                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13153        assert_eq_m256i(r, e);
13154    }
13155
13156    #[simd_test(enable = "avx512bw,avx512vl")]
13157    const unsafe fn test_mm_mask_adds_epu8() {
13158        let a = _mm_set1_epi8(1);
13159        let b = _mm_set1_epi8(u8::MAX as i8);
13160        let r = _mm_mask_adds_epu8(a, 0, a, b);
13161        assert_eq_m128i(r, a);
13162        let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b);
13163        #[rustfmt::skip]
13164        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13165        assert_eq_m128i(r, e);
13166    }
13167
13168    #[simd_test(enable = "avx512bw,avx512vl")]
13169    const unsafe fn test_mm_maskz_adds_epu8() {
13170        let a = _mm_set1_epi8(1);
13171        let b = _mm_set1_epi8(u8::MAX as i8);
13172        let r = _mm_maskz_adds_epu8(0, a, b);
13173        assert_eq_m128i(r, _mm_setzero_si128());
13174        let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b);
13175        #[rustfmt::skip]
13176        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
13177        assert_eq_m128i(r, e);
13178    }
13179
13180    #[simd_test(enable = "avx512bw")]
13181    const unsafe fn test_mm512_adds_epi16() {
13182        let a = _mm512_set1_epi16(1);
13183        let b = _mm512_set1_epi16(i16::MAX);
13184        let r = _mm512_adds_epi16(a, b);
13185        let e = _mm512_set1_epi16(i16::MAX);
13186        assert_eq_m512i(r, e);
13187    }
13188
13189    #[simd_test(enable = "avx512bw")]
13190    const unsafe fn test_mm512_mask_adds_epi16() {
13191        let a = _mm512_set1_epi16(1);
13192        let b = _mm512_set1_epi16(i16::MAX);
13193        let r = _mm512_mask_adds_epi16(a, 0, a, b);
13194        assert_eq_m512i(r, a);
13195        let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13196        #[rustfmt::skip]
13197        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13198                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13199        assert_eq_m512i(r, e);
13200    }
13201
13202    #[simd_test(enable = "avx512bw")]
13203    const unsafe fn test_mm512_maskz_adds_epi16() {
13204        let a = _mm512_set1_epi16(1);
13205        let b = _mm512_set1_epi16(i16::MAX);
13206        let r = _mm512_maskz_adds_epi16(0, a, b);
13207        assert_eq_m512i(r, _mm512_setzero_si512());
13208        let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b);
13209        #[rustfmt::skip]
13210        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13211                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13212        assert_eq_m512i(r, e);
13213    }
13214
13215    #[simd_test(enable = "avx512bw,avx512vl")]
13216    const unsafe fn test_mm256_mask_adds_epi16() {
13217        let a = _mm256_set1_epi16(1);
13218        let b = _mm256_set1_epi16(i16::MAX);
13219        let r = _mm256_mask_adds_epi16(a, 0, a, b);
13220        assert_eq_m256i(r, a);
13221        let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b);
13222        #[rustfmt::skip]
13223        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13224        assert_eq_m256i(r, e);
13225    }
13226
13227    #[simd_test(enable = "avx512bw,avx512vl")]
13228    const unsafe fn test_mm256_maskz_adds_epi16() {
13229        let a = _mm256_set1_epi16(1);
13230        let b = _mm256_set1_epi16(i16::MAX);
13231        let r = _mm256_maskz_adds_epi16(0, a, b);
13232        assert_eq_m256i(r, _mm256_setzero_si256());
13233        let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b);
13234        #[rustfmt::skip]
13235        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13236        assert_eq_m256i(r, e);
13237    }
13238
13239    #[simd_test(enable = "avx512bw,avx512vl")]
13240    const unsafe fn test_mm_mask_adds_epi16() {
13241        let a = _mm_set1_epi16(1);
13242        let b = _mm_set1_epi16(i16::MAX);
13243        let r = _mm_mask_adds_epi16(a, 0, a, b);
13244        assert_eq_m128i(r, a);
13245        let r = _mm_mask_adds_epi16(a, 0b00001111, a, b);
13246        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13247        assert_eq_m128i(r, e);
13248    }
13249
13250    #[simd_test(enable = "avx512bw,avx512vl")]
13251    const unsafe fn test_mm_maskz_adds_epi16() {
13252        let a = _mm_set1_epi16(1);
13253        let b = _mm_set1_epi16(i16::MAX);
13254        let r = _mm_maskz_adds_epi16(0, a, b);
13255        assert_eq_m128i(r, _mm_setzero_si128());
13256        let r = _mm_maskz_adds_epi16(0b00001111, a, b);
13257        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
13258        assert_eq_m128i(r, e);
13259    }
13260
13261    #[simd_test(enable = "avx512bw")]
13262    const unsafe fn test_mm512_adds_epi8() {
13263        let a = _mm512_set1_epi8(1);
13264        let b = _mm512_set1_epi8(i8::MAX);
13265        let r = _mm512_adds_epi8(a, b);
13266        let e = _mm512_set1_epi8(i8::MAX);
13267        assert_eq_m512i(r, e);
13268    }
13269
13270    #[simd_test(enable = "avx512bw")]
13271    const unsafe fn test_mm512_mask_adds_epi8() {
13272        let a = _mm512_set1_epi8(1);
13273        let b = _mm512_set1_epi8(i8::MAX);
13274        let r = _mm512_mask_adds_epi8(a, 0, a, b);
13275        assert_eq_m512i(r, a);
13276        let r = _mm512_mask_adds_epi8(
13277            a,
13278            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13279            a,
13280            b,
13281        );
13282        #[rustfmt::skip]
13283        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13284                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13285                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13286                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13287        assert_eq_m512i(r, e);
13288    }
13289
13290    #[simd_test(enable = "avx512bw")]
13291    const unsafe fn test_mm512_maskz_adds_epi8() {
13292        let a = _mm512_set1_epi8(1);
13293        let b = _mm512_set1_epi8(i8::MAX);
13294        let r = _mm512_maskz_adds_epi8(0, a, b);
13295        assert_eq_m512i(r, _mm512_setzero_si512());
13296        let r = _mm512_maskz_adds_epi8(
13297            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13298            a,
13299            b,
13300        );
13301        #[rustfmt::skip]
13302        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13303                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13304                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13305                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13306        assert_eq_m512i(r, e);
13307    }
13308
13309    #[simd_test(enable = "avx512bw,avx512vl")]
13310    const unsafe fn test_mm256_mask_adds_epi8() {
13311        let a = _mm256_set1_epi8(1);
13312        let b = _mm256_set1_epi8(i8::MAX);
13313        let r = _mm256_mask_adds_epi8(a, 0, a, b);
13314        assert_eq_m256i(r, a);
13315        let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
13316        #[rustfmt::skip]
13317        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13318                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13319        assert_eq_m256i(r, e);
13320    }
13321
13322    #[simd_test(enable = "avx512bw,avx512vl")]
13323    const unsafe fn test_mm256_maskz_adds_epi8() {
13324        let a = _mm256_set1_epi8(1);
13325        let b = _mm256_set1_epi8(i8::MAX);
13326        let r = _mm256_maskz_adds_epi8(0, a, b);
13327        assert_eq_m256i(r, _mm256_setzero_si256());
13328        let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b);
13329        #[rustfmt::skip]
13330        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13331                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13332        assert_eq_m256i(r, e);
13333    }
13334
13335    #[simd_test(enable = "avx512bw,avx512vl")]
13336    const unsafe fn test_mm_mask_adds_epi8() {
13337        let a = _mm_set1_epi8(1);
13338        let b = _mm_set1_epi8(i8::MAX);
13339        let r = _mm_mask_adds_epi8(a, 0, a, b);
13340        assert_eq_m128i(r, a);
13341        let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b);
13342        #[rustfmt::skip]
13343        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13344        assert_eq_m128i(r, e);
13345    }
13346
13347    #[simd_test(enable = "avx512bw,avx512vl")]
13348    const unsafe fn test_mm_maskz_adds_epi8() {
13349        let a = _mm_set1_epi8(1);
13350        let b = _mm_set1_epi8(i8::MAX);
13351        let r = _mm_maskz_adds_epi8(0, a, b);
13352        assert_eq_m128i(r, _mm_setzero_si128());
13353        let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b);
13354        #[rustfmt::skip]
13355        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
13356        assert_eq_m128i(r, e);
13357    }
13358
13359    #[simd_test(enable = "avx512bw")]
13360    const unsafe fn test_mm512_sub_epi16() {
13361        let a = _mm512_set1_epi16(1);
13362        let b = _mm512_set1_epi16(2);
13363        let r = _mm512_sub_epi16(a, b);
13364        let e = _mm512_set1_epi16(-1);
13365        assert_eq_m512i(r, e);
13366    }
13367
13368    #[simd_test(enable = "avx512bw")]
13369    const unsafe fn test_mm512_mask_sub_epi16() {
13370        let a = _mm512_set1_epi16(1);
13371        let b = _mm512_set1_epi16(2);
13372        let r = _mm512_mask_sub_epi16(a, 0, a, b);
13373        assert_eq_m512i(r, a);
13374        let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
13375        #[rustfmt::skip]
13376        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13377                                 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13378        assert_eq_m512i(r, e);
13379    }
13380
13381    #[simd_test(enable = "avx512bw")]
13382    const unsafe fn test_mm512_maskz_sub_epi16() {
13383        let a = _mm512_set1_epi16(1);
13384        let b = _mm512_set1_epi16(2);
13385        let r = _mm512_maskz_sub_epi16(0, a, b);
13386        assert_eq_m512i(r, _mm512_setzero_si512());
13387        let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b);
13388        #[rustfmt::skip]
13389        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13390                                 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13391        assert_eq_m512i(r, e);
13392    }
13393
13394    #[simd_test(enable = "avx512bw,avx512vl")]
13395    const unsafe fn test_mm256_mask_sub_epi16() {
13396        let a = _mm256_set1_epi16(1);
13397        let b = _mm256_set1_epi16(2);
13398        let r = _mm256_mask_sub_epi16(a, 0, a, b);
13399        assert_eq_m256i(r, a);
13400        let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b);
13401        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13402        assert_eq_m256i(r, e);
13403    }
13404
13405    #[simd_test(enable = "avx512bw,avx512vl")]
13406    const unsafe fn test_mm256_maskz_sub_epi16() {
13407        let a = _mm256_set1_epi16(1);
13408        let b = _mm256_set1_epi16(2);
13409        let r = _mm256_maskz_sub_epi16(0, a, b);
13410        assert_eq_m256i(r, _mm256_setzero_si256());
13411        let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b);
13412        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13413        assert_eq_m256i(r, e);
13414    }
13415
13416    #[simd_test(enable = "avx512bw,avx512vl")]
13417    const unsafe fn test_mm_mask_sub_epi16() {
13418        let a = _mm_set1_epi16(1);
13419        let b = _mm_set1_epi16(2);
13420        let r = _mm_mask_sub_epi16(a, 0, a, b);
13421        assert_eq_m128i(r, a);
13422        let r = _mm_mask_sub_epi16(a, 0b00001111, a, b);
13423        let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1);
13424        assert_eq_m128i(r, e);
13425    }
13426
13427    #[simd_test(enable = "avx512bw,avx512vl")]
13428    const unsafe fn test_mm_maskz_sub_epi16() {
13429        let a = _mm_set1_epi16(1);
13430        let b = _mm_set1_epi16(2);
13431        let r = _mm_maskz_sub_epi16(0, a, b);
13432        assert_eq_m128i(r, _mm_setzero_si128());
13433        let r = _mm_maskz_sub_epi16(0b00001111, a, b);
13434        let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1);
13435        assert_eq_m128i(r, e);
13436    }
13437
13438    #[simd_test(enable = "avx512bw")]
13439    const unsafe fn test_mm512_sub_epi8() {
13440        let a = _mm512_set1_epi8(1);
13441        let b = _mm512_set1_epi8(2);
13442        let r = _mm512_sub_epi8(a, b);
13443        let e = _mm512_set1_epi8(-1);
13444        assert_eq_m512i(r, e);
13445    }
13446
13447    #[simd_test(enable = "avx512bw")]
13448    const unsafe fn test_mm512_mask_sub_epi8() {
13449        let a = _mm512_set1_epi8(1);
13450        let b = _mm512_set1_epi8(2);
13451        let r = _mm512_mask_sub_epi8(a, 0, a, b);
13452        assert_eq_m512i(r, a);
13453        let r = _mm512_mask_sub_epi8(
13454            a,
13455            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13456            a,
13457            b,
13458        );
13459        #[rustfmt::skip]
13460        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13461                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13462                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13463                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13464        assert_eq_m512i(r, e);
13465    }
13466
13467    #[simd_test(enable = "avx512bw")]
13468    const unsafe fn test_mm512_maskz_sub_epi8() {
13469        let a = _mm512_set1_epi8(1);
13470        let b = _mm512_set1_epi8(2);
13471        let r = _mm512_maskz_sub_epi8(0, a, b);
13472        assert_eq_m512i(r, _mm512_setzero_si512());
13473        let r = _mm512_maskz_sub_epi8(
13474            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13475            a,
13476            b,
13477        );
13478        #[rustfmt::skip]
13479        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13480                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13481                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13482                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13483        assert_eq_m512i(r, e);
13484    }
13485
13486    #[simd_test(enable = "avx512bw,avx512vl")]
13487    const unsafe fn test_mm256_mask_sub_epi8() {
13488        let a = _mm256_set1_epi8(1);
13489        let b = _mm256_set1_epi8(2);
13490        let r = _mm256_mask_sub_epi8(a, 0, a, b);
13491        assert_eq_m256i(r, a);
13492        let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
13493        #[rustfmt::skip]
13494        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
13495                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13496        assert_eq_m256i(r, e);
13497    }
13498
13499    #[simd_test(enable = "avx512bw,avx512vl")]
13500    const unsafe fn test_mm256_maskz_sub_epi8() {
13501        let a = _mm256_set1_epi8(1);
13502        let b = _mm256_set1_epi8(2);
13503        let r = _mm256_maskz_sub_epi8(0, a, b);
13504        assert_eq_m256i(r, _mm256_setzero_si256());
13505        let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b);
13506        #[rustfmt::skip]
13507        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
13508                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13509        assert_eq_m256i(r, e);
13510    }
13511
13512    #[simd_test(enable = "avx512bw,avx512vl")]
13513    const unsafe fn test_mm_mask_sub_epi8() {
13514        let a = _mm_set1_epi8(1);
13515        let b = _mm_set1_epi8(2);
13516        let r = _mm_mask_sub_epi8(a, 0, a, b);
13517        assert_eq_m128i(r, a);
13518        let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b);
13519        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
13520        assert_eq_m128i(r, e);
13521    }
13522
13523    #[simd_test(enable = "avx512bw,avx512vl")]
13524    const unsafe fn test_mm_maskz_sub_epi8() {
13525        let a = _mm_set1_epi8(1);
13526        let b = _mm_set1_epi8(2);
13527        let r = _mm_maskz_sub_epi8(0, a, b);
13528        assert_eq_m128i(r, _mm_setzero_si128());
13529        let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b);
13530        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
13531        assert_eq_m128i(r, e);
13532    }
13533
13534    #[simd_test(enable = "avx512bw")]
13535    const unsafe fn test_mm512_subs_epu16() {
13536        let a = _mm512_set1_epi16(1);
13537        let b = _mm512_set1_epi16(u16::MAX as i16);
13538        let r = _mm512_subs_epu16(a, b);
13539        let e = _mm512_set1_epi16(0);
13540        assert_eq_m512i(r, e);
13541    }
13542
13543    #[simd_test(enable = "avx512bw")]
13544    const unsafe fn test_mm512_mask_subs_epu16() {
13545        let a = _mm512_set1_epi16(1);
13546        let b = _mm512_set1_epi16(u16::MAX as i16);
13547        let r = _mm512_mask_subs_epu16(a, 0, a, b);
13548        assert_eq_m512i(r, a);
13549        let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13550        #[rustfmt::skip]
13551        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13552                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13553        assert_eq_m512i(r, e);
13554    }
13555
13556    #[simd_test(enable = "avx512bw")]
13557    const unsafe fn test_mm512_maskz_subs_epu16() {
13558        let a = _mm512_set1_epi16(1);
13559        let b = _mm512_set1_epi16(u16::MAX as i16);
13560        let r = _mm512_maskz_subs_epu16(0, a, b);
13561        assert_eq_m512i(r, _mm512_setzero_si512());
13562        let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b);
13563        #[rustfmt::skip]
13564        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13565                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13566        assert_eq_m512i(r, e);
13567    }
13568
13569    #[simd_test(enable = "avx512bw,avx512vl")]
13570    const unsafe fn test_mm256_mask_subs_epu16() {
13571        let a = _mm256_set1_epi16(1);
13572        let b = _mm256_set1_epi16(u16::MAX as i16);
13573        let r = _mm256_mask_subs_epu16(a, 0, a, b);
13574        assert_eq_m256i(r, a);
13575        let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b);
13576        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13577        assert_eq_m256i(r, e);
13578    }
13579
13580    #[simd_test(enable = "avx512bw,avx512vl")]
13581    const unsafe fn test_mm256_maskz_subs_epu16() {
13582        let a = _mm256_set1_epi16(1);
13583        let b = _mm256_set1_epi16(u16::MAX as i16);
13584        let r = _mm256_maskz_subs_epu16(0, a, b);
13585        assert_eq_m256i(r, _mm256_setzero_si256());
13586        let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b);
13587        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13588        assert_eq_m256i(r, e);
13589    }
13590
13591    #[simd_test(enable = "avx512bw,avx512vl")]
13592    const unsafe fn test_mm_mask_subs_epu16() {
13593        let a = _mm_set1_epi16(1);
13594        let b = _mm_set1_epi16(u16::MAX as i16);
13595        let r = _mm_mask_subs_epu16(a, 0, a, b);
13596        assert_eq_m128i(r, a);
13597        let r = _mm_mask_subs_epu16(a, 0b00001111, a, b);
13598        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13599        assert_eq_m128i(r, e);
13600    }
13601
13602    #[simd_test(enable = "avx512bw,avx512vl")]
13603    const unsafe fn test_mm_maskz_subs_epu16() {
13604        let a = _mm_set1_epi16(1);
13605        let b = _mm_set1_epi16(u16::MAX as i16);
13606        let r = _mm_maskz_subs_epu16(0, a, b);
13607        assert_eq_m128i(r, _mm_setzero_si128());
13608        let r = _mm_maskz_subs_epu16(0b00001111, a, b);
13609        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13610        assert_eq_m128i(r, e);
13611    }
13612
13613    #[simd_test(enable = "avx512bw")]
13614    const unsafe fn test_mm512_subs_epu8() {
13615        let a = _mm512_set1_epi8(1);
13616        let b = _mm512_set1_epi8(u8::MAX as i8);
13617        let r = _mm512_subs_epu8(a, b);
13618        let e = _mm512_set1_epi8(0);
13619        assert_eq_m512i(r, e);
13620    }
13621
13622    #[simd_test(enable = "avx512bw")]
13623    const unsafe fn test_mm512_mask_subs_epu8() {
13624        let a = _mm512_set1_epi8(1);
13625        let b = _mm512_set1_epi8(u8::MAX as i8);
13626        let r = _mm512_mask_subs_epu8(a, 0, a, b);
13627        assert_eq_m512i(r, a);
13628        let r = _mm512_mask_subs_epu8(
13629            a,
13630            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13631            a,
13632            b,
13633        );
13634        #[rustfmt::skip]
13635        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13636                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13637                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13638                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13639        assert_eq_m512i(r, e);
13640    }
13641
13642    #[simd_test(enable = "avx512bw")]
13643    const unsafe fn test_mm512_maskz_subs_epu8() {
13644        let a = _mm512_set1_epi8(1);
13645        let b = _mm512_set1_epi8(u8::MAX as i8);
13646        let r = _mm512_maskz_subs_epu8(0, a, b);
13647        assert_eq_m512i(r, _mm512_setzero_si512());
13648        let r = _mm512_maskz_subs_epu8(
13649            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13650            a,
13651            b,
13652        );
13653        #[rustfmt::skip]
13654        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13655                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13656                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13657                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13658        assert_eq_m512i(r, e);
13659    }
13660
13661    #[simd_test(enable = "avx512bw,avx512vl")]
13662    const unsafe fn test_mm256_mask_subs_epu8() {
13663        let a = _mm256_set1_epi8(1);
13664        let b = _mm256_set1_epi8(u8::MAX as i8);
13665        let r = _mm256_mask_subs_epu8(a, 0, a, b);
13666        assert_eq_m256i(r, a);
13667        let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
13668        #[rustfmt::skip]
13669        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13670                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13671        assert_eq_m256i(r, e);
13672    }
13673
13674    #[simd_test(enable = "avx512bw,avx512vl")]
13675    const unsafe fn test_mm256_maskz_subs_epu8() {
13676        let a = _mm256_set1_epi8(1);
13677        let b = _mm256_set1_epi8(u8::MAX as i8);
13678        let r = _mm256_maskz_subs_epu8(0, a, b);
13679        assert_eq_m256i(r, _mm256_setzero_si256());
13680        let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b);
13681        #[rustfmt::skip]
13682        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13683                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13684        assert_eq_m256i(r, e);
13685    }
13686
13687    #[simd_test(enable = "avx512bw,avx512vl")]
13688    const unsafe fn test_mm_mask_subs_epu8() {
13689        let a = _mm_set1_epi8(1);
13690        let b = _mm_set1_epi8(u8::MAX as i8);
13691        let r = _mm_mask_subs_epu8(a, 0, a, b);
13692        assert_eq_m128i(r, a);
13693        let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b);
13694        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13695        assert_eq_m128i(r, e);
13696    }
13697
13698    #[simd_test(enable = "avx512bw,avx512vl")]
13699    const unsafe fn test_mm_maskz_subs_epu8() {
13700        let a = _mm_set1_epi8(1);
13701        let b = _mm_set1_epi8(u8::MAX as i8);
13702        let r = _mm_maskz_subs_epu8(0, a, b);
13703        assert_eq_m128i(r, _mm_setzero_si128());
13704        let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b);
13705        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13706        assert_eq_m128i(r, e);
13707    }
13708
13709    #[simd_test(enable = "avx512bw")]
13710    const unsafe fn test_mm512_subs_epi16() {
13711        let a = _mm512_set1_epi16(-1);
13712        let b = _mm512_set1_epi16(i16::MAX);
13713        let r = _mm512_subs_epi16(a, b);
13714        let e = _mm512_set1_epi16(i16::MIN);
13715        assert_eq_m512i(r, e);
13716    }
13717
13718    #[simd_test(enable = "avx512bw")]
13719    const unsafe fn test_mm512_mask_subs_epi16() {
13720        let a = _mm512_set1_epi16(-1);
13721        let b = _mm512_set1_epi16(i16::MAX);
13722        let r = _mm512_mask_subs_epi16(a, 0, a, b);
13723        assert_eq_m512i(r, a);
13724        let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13725        #[rustfmt::skip]
13726        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13727                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13728        assert_eq_m512i(r, e);
13729    }
13730
13731    #[simd_test(enable = "avx512bw")]
13732    const unsafe fn test_mm512_maskz_subs_epi16() {
13733        let a = _mm512_set1_epi16(-1);
13734        let b = _mm512_set1_epi16(i16::MAX);
13735        let r = _mm512_maskz_subs_epi16(0, a, b);
13736        assert_eq_m512i(r, _mm512_setzero_si512());
13737        let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b);
13738        #[rustfmt::skip]
13739        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13740                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13741        assert_eq_m512i(r, e);
13742    }
13743
13744    #[simd_test(enable = "avx512bw,avx512vl")]
13745    const unsafe fn test_mm256_mask_subs_epi16() {
13746        let a = _mm256_set1_epi16(-1);
13747        let b = _mm256_set1_epi16(i16::MAX);
13748        let r = _mm256_mask_subs_epi16(a, 0, a, b);
13749        assert_eq_m256i(r, a);
13750        let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b);
13751        #[rustfmt::skip]
13752        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13753        assert_eq_m256i(r, e);
13754    }
13755
13756    #[simd_test(enable = "avx512bw,avx512vl")]
13757    const unsafe fn test_mm256_maskz_subs_epi16() {
13758        let a = _mm256_set1_epi16(-1);
13759        let b = _mm256_set1_epi16(i16::MAX);
13760        let r = _mm256_maskz_subs_epi16(0, a, b);
13761        assert_eq_m256i(r, _mm256_setzero_si256());
13762        let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b);
13763        #[rustfmt::skip]
13764        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13765        assert_eq_m256i(r, e);
13766    }
13767
13768    #[simd_test(enable = "avx512bw,avx512vl")]
13769    const unsafe fn test_mm_mask_subs_epi16() {
13770        let a = _mm_set1_epi16(-1);
13771        let b = _mm_set1_epi16(i16::MAX);
13772        let r = _mm_mask_subs_epi16(a, 0, a, b);
13773        assert_eq_m128i(r, a);
13774        let r = _mm_mask_subs_epi16(a, 0b00001111, a, b);
13775        let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13776        assert_eq_m128i(r, e);
13777    }
13778
13779    #[simd_test(enable = "avx512bw,avx512vl")]
13780    const unsafe fn test_mm_maskz_subs_epi16() {
13781        let a = _mm_set1_epi16(-1);
13782        let b = _mm_set1_epi16(i16::MAX);
13783        let r = _mm_maskz_subs_epi16(0, a, b);
13784        assert_eq_m128i(r, _mm_setzero_si128());
13785        let r = _mm_maskz_subs_epi16(0b00001111, a, b);
13786        let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
13787        assert_eq_m128i(r, e);
13788    }
13789
13790    #[simd_test(enable = "avx512bw")]
13791    const unsafe fn test_mm512_subs_epi8() {
13792        let a = _mm512_set1_epi8(-1);
13793        let b = _mm512_set1_epi8(i8::MAX);
13794        let r = _mm512_subs_epi8(a, b);
13795        let e = _mm512_set1_epi8(i8::MIN);
13796        assert_eq_m512i(r, e);
13797    }
13798
13799    #[simd_test(enable = "avx512bw")]
13800    const unsafe fn test_mm512_mask_subs_epi8() {
13801        let a = _mm512_set1_epi8(-1);
13802        let b = _mm512_set1_epi8(i8::MAX);
13803        let r = _mm512_mask_subs_epi8(a, 0, a, b);
13804        assert_eq_m512i(r, a);
13805        let r = _mm512_mask_subs_epi8(
13806            a,
13807            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13808            a,
13809            b,
13810        );
13811        #[rustfmt::skip]
13812        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13813                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13814                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13815                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13816        assert_eq_m512i(r, e);
13817    }
13818
13819    #[simd_test(enable = "avx512bw")]
13820    const unsafe fn test_mm512_maskz_subs_epi8() {
13821        let a = _mm512_set1_epi8(-1);
13822        let b = _mm512_set1_epi8(i8::MAX);
13823        let r = _mm512_maskz_subs_epi8(0, a, b);
13824        assert_eq_m512i(r, _mm512_setzero_si512());
13825        let r = _mm512_maskz_subs_epi8(
13826            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
13827            a,
13828            b,
13829        );
13830        #[rustfmt::skip]
13831        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13832                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13833                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13834                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13835        assert_eq_m512i(r, e);
13836    }
13837
13838    #[simd_test(enable = "avx512bw,avx512vl")]
13839    const unsafe fn test_mm256_mask_subs_epi8() {
13840        let a = _mm256_set1_epi8(-1);
13841        let b = _mm256_set1_epi8(i8::MAX);
13842        let r = _mm256_mask_subs_epi8(a, 0, a, b);
13843        assert_eq_m256i(r, a);
13844        let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
13845        #[rustfmt::skip]
13846        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
13847                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13848        assert_eq_m256i(r, e);
13849    }
13850
13851    #[simd_test(enable = "avx512bw,avx512vl")]
13852    const unsafe fn test_mm256_maskz_subs_epi8() {
13853        let a = _mm256_set1_epi8(-1);
13854        let b = _mm256_set1_epi8(i8::MAX);
13855        let r = _mm256_maskz_subs_epi8(0, a, b);
13856        assert_eq_m256i(r, _mm256_setzero_si256());
13857        let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b);
13858        #[rustfmt::skip]
13859        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13860                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13861        assert_eq_m256i(r, e);
13862    }
13863
13864    #[simd_test(enable = "avx512bw,avx512vl")]
13865    const unsafe fn test_mm_mask_subs_epi8() {
13866        let a = _mm_set1_epi8(-1);
13867        let b = _mm_set1_epi8(i8::MAX);
13868        let r = _mm_mask_subs_epi8(a, 0, a, b);
13869        assert_eq_m128i(r, a);
13870        let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b);
13871        #[rustfmt::skip]
13872        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13873        assert_eq_m128i(r, e);
13874    }
13875
13876    #[simd_test(enable = "avx512bw,avx512vl")]
13877    const unsafe fn test_mm_maskz_subs_epi8() {
13878        let a = _mm_set1_epi8(-1);
13879        let b = _mm_set1_epi8(i8::MAX);
13880        let r = _mm_maskz_subs_epi8(0, a, b);
13881        assert_eq_m128i(r, _mm_setzero_si128());
13882        let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b);
13883        #[rustfmt::skip]
13884        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
13885        assert_eq_m128i(r, e);
13886    }
13887
13888    #[simd_test(enable = "avx512bw")]
13889    const unsafe fn test_mm512_mulhi_epu16() {
13890        let a = _mm512_set1_epi16(1);
13891        let b = _mm512_set1_epi16(1);
13892        let r = _mm512_mulhi_epu16(a, b);
13893        let e = _mm512_set1_epi16(0);
13894        assert_eq_m512i(r, e);
13895    }
13896
13897    #[simd_test(enable = "avx512bw")]
13898    const unsafe fn test_mm512_mask_mulhi_epu16() {
13899        let a = _mm512_set1_epi16(1);
13900        let b = _mm512_set1_epi16(1);
13901        let r = _mm512_mask_mulhi_epu16(a, 0, a, b);
13902        assert_eq_m512i(r, a);
13903        let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
13904        #[rustfmt::skip]
13905        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13906                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13907        assert_eq_m512i(r, e);
13908    }
13909
13910    #[simd_test(enable = "avx512bw")]
13911    const unsafe fn test_mm512_maskz_mulhi_epu16() {
13912        let a = _mm512_set1_epi16(1);
13913        let b = _mm512_set1_epi16(1);
13914        let r = _mm512_maskz_mulhi_epu16(0, a, b);
13915        assert_eq_m512i(r, _mm512_setzero_si512());
13916        let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b);
13917        #[rustfmt::skip]
13918        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13919                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13920        assert_eq_m512i(r, e);
13921    }
13922
13923    #[simd_test(enable = "avx512bw,avx512vl")]
13924    const unsafe fn test_mm256_mask_mulhi_epu16() {
13925        let a = _mm256_set1_epi16(1);
13926        let b = _mm256_set1_epi16(1);
13927        let r = _mm256_mask_mulhi_epu16(a, 0, a, b);
13928        assert_eq_m256i(r, a);
13929        let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b);
13930        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13931        assert_eq_m256i(r, e);
13932    }
13933
13934    #[simd_test(enable = "avx512bw,avx512vl")]
13935    const unsafe fn test_mm256_maskz_mulhi_epu16() {
13936        let a = _mm256_set1_epi16(1);
13937        let b = _mm256_set1_epi16(1);
13938        let r = _mm256_maskz_mulhi_epu16(0, a, b);
13939        assert_eq_m256i(r, _mm256_setzero_si256());
13940        let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b);
13941        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13942        assert_eq_m256i(r, e);
13943    }
13944
13945    #[simd_test(enable = "avx512bw,avx512vl")]
13946    const unsafe fn test_mm_mask_mulhi_epu16() {
13947        let a = _mm_set1_epi16(1);
13948        let b = _mm_set1_epi16(1);
13949        let r = _mm_mask_mulhi_epu16(a, 0, a, b);
13950        assert_eq_m128i(r, a);
13951        let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b);
13952        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
13953        assert_eq_m128i(r, e);
13954    }
13955
13956    #[simd_test(enable = "avx512bw,avx512vl")]
13957    const unsafe fn test_mm_maskz_mulhi_epu16() {
13958        let a = _mm_set1_epi16(1);
13959        let b = _mm_set1_epi16(1);
13960        let r = _mm_maskz_mulhi_epu16(0, a, b);
13961        assert_eq_m128i(r, _mm_setzero_si128());
13962        let r = _mm_maskz_mulhi_epu16(0b00001111, a, b);
13963        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
13964        assert_eq_m128i(r, e);
13965    }
13966
13967    #[simd_test(enable = "avx512bw")]
13968    const unsafe fn test_mm512_mulhi_epi16() {
13969        let a = _mm512_set1_epi16(1);
13970        let b = _mm512_set1_epi16(1);
13971        let r = _mm512_mulhi_epi16(a, b);
13972        let e = _mm512_set1_epi16(0);
13973        assert_eq_m512i(r, e);
13974    }
13975
13976    #[simd_test(enable = "avx512bw")]
13977    const unsafe fn test_mm512_mask_mulhi_epi16() {
13978        let a = _mm512_set1_epi16(1);
13979        let b = _mm512_set1_epi16(1);
13980        let r = _mm512_mask_mulhi_epi16(a, 0, a, b);
13981        assert_eq_m512i(r, a);
13982        let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
13983        #[rustfmt::skip]
13984        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13985                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
13986        assert_eq_m512i(r, e);
13987    }
13988
13989    #[simd_test(enable = "avx512bw")]
13990    const unsafe fn test_mm512_maskz_mulhi_epi16() {
13991        let a = _mm512_set1_epi16(1);
13992        let b = _mm512_set1_epi16(1);
13993        let r = _mm512_maskz_mulhi_epi16(0, a, b);
13994        assert_eq_m512i(r, _mm512_setzero_si512());
13995        let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b);
13996        #[rustfmt::skip]
13997        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13998                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
13999        assert_eq_m512i(r, e);
14000    }
14001
14002    #[simd_test(enable = "avx512bw,avx512vl")]
14003    const unsafe fn test_mm256_mask_mulhi_epi16() {
14004        let a = _mm256_set1_epi16(1);
14005        let b = _mm256_set1_epi16(1);
14006        let r = _mm256_mask_mulhi_epi16(a, 0, a, b);
14007        assert_eq_m256i(r, a);
14008        let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b);
14009        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14010        assert_eq_m256i(r, e);
14011    }
14012
14013    #[simd_test(enable = "avx512bw,avx512vl")]
14014    const unsafe fn test_mm256_maskz_mulhi_epi16() {
14015        let a = _mm256_set1_epi16(1);
14016        let b = _mm256_set1_epi16(1);
14017        let r = _mm256_maskz_mulhi_epi16(0, a, b);
14018        assert_eq_m256i(r, _mm256_setzero_si256());
14019        let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b);
14020        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14021        assert_eq_m256i(r, e);
14022    }
14023
14024    #[simd_test(enable = "avx512bw,avx512vl")]
14025    const unsafe fn test_mm_mask_mulhi_epi16() {
14026        let a = _mm_set1_epi16(1);
14027        let b = _mm_set1_epi16(1);
14028        let r = _mm_mask_mulhi_epi16(a, 0, a, b);
14029        assert_eq_m128i(r, a);
14030        let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b);
14031        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
14032        assert_eq_m128i(r, e);
14033    }
14034
14035    #[simd_test(enable = "avx512bw,avx512vl")]
14036    const unsafe fn test_mm_maskz_mulhi_epi16() {
14037        let a = _mm_set1_epi16(1);
14038        let b = _mm_set1_epi16(1);
14039        let r = _mm_maskz_mulhi_epi16(0, a, b);
14040        assert_eq_m128i(r, _mm_setzero_si128());
14041        let r = _mm_maskz_mulhi_epi16(0b00001111, a, b);
14042        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
14043        assert_eq_m128i(r, e);
14044    }
14045
14046    #[simd_test(enable = "avx512bw")]
14047    unsafe fn test_mm512_mulhrs_epi16() {
14048        let a = _mm512_set1_epi16(1);
14049        let b = _mm512_set1_epi16(1);
14050        let r = _mm512_mulhrs_epi16(a, b);
14051        let e = _mm512_set1_epi16(0);
14052        assert_eq_m512i(r, e);
14053    }
14054
14055    #[simd_test(enable = "avx512bw")]
14056    unsafe fn test_mm512_mask_mulhrs_epi16() {
14057        let a = _mm512_set1_epi16(1);
14058        let b = _mm512_set1_epi16(1);
14059        let r = _mm512_mask_mulhrs_epi16(a, 0, a, b);
14060        assert_eq_m512i(r, a);
14061        let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
14062        #[rustfmt::skip]
14063        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14064                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14065        assert_eq_m512i(r, e);
14066    }
14067
14068    #[simd_test(enable = "avx512bw")]
14069    unsafe fn test_mm512_maskz_mulhrs_epi16() {
14070        let a = _mm512_set1_epi16(1);
14071        let b = _mm512_set1_epi16(1);
14072        let r = _mm512_maskz_mulhrs_epi16(0, a, b);
14073        assert_eq_m512i(r, _mm512_setzero_si512());
14074        let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b);
14075        #[rustfmt::skip]
14076        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14077                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14078        assert_eq_m512i(r, e);
14079    }
14080
14081    #[simd_test(enable = "avx512bw,avx512vl")]
14082    unsafe fn test_mm256_mask_mulhrs_epi16() {
14083        let a = _mm256_set1_epi16(1);
14084        let b = _mm256_set1_epi16(1);
14085        let r = _mm256_mask_mulhrs_epi16(a, 0, a, b);
14086        assert_eq_m256i(r, a);
14087        let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b);
14088        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14089        assert_eq_m256i(r, e);
14090    }
14091
14092    #[simd_test(enable = "avx512bw,avx512vl")]
14093    unsafe fn test_mm256_maskz_mulhrs_epi16() {
14094        let a = _mm256_set1_epi16(1);
14095        let b = _mm256_set1_epi16(1);
14096        let r = _mm256_maskz_mulhrs_epi16(0, a, b);
14097        assert_eq_m256i(r, _mm256_setzero_si256());
14098        let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b);
14099        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14100        assert_eq_m256i(r, e);
14101    }
14102
14103    #[simd_test(enable = "avx512bw,avx512vl")]
14104    unsafe fn test_mm_mask_mulhrs_epi16() {
14105        let a = _mm_set1_epi16(1);
14106        let b = _mm_set1_epi16(1);
14107        let r = _mm_mask_mulhrs_epi16(a, 0, a, b);
14108        assert_eq_m128i(r, a);
14109        let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b);
14110        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
14111        assert_eq_m128i(r, e);
14112    }
14113
14114    #[simd_test(enable = "avx512bw,avx512vl")]
14115    unsafe fn test_mm_maskz_mulhrs_epi16() {
14116        let a = _mm_set1_epi16(1);
14117        let b = _mm_set1_epi16(1);
14118        let r = _mm_maskz_mulhrs_epi16(0, a, b);
14119        assert_eq_m128i(r, _mm_setzero_si128());
14120        let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b);
14121        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
14122        assert_eq_m128i(r, e);
14123    }
14124
14125    #[simd_test(enable = "avx512bw")]
14126    const unsafe fn test_mm512_mullo_epi16() {
14127        let a = _mm512_set1_epi16(1);
14128        let b = _mm512_set1_epi16(1);
14129        let r = _mm512_mullo_epi16(a, b);
14130        let e = _mm512_set1_epi16(1);
14131        assert_eq_m512i(r, e);
14132    }
14133
14134    #[simd_test(enable = "avx512bw")]
14135    const unsafe fn test_mm512_mask_mullo_epi16() {
14136        let a = _mm512_set1_epi16(1);
14137        let b = _mm512_set1_epi16(1);
14138        let r = _mm512_mask_mullo_epi16(a, 0, a, b);
14139        assert_eq_m512i(r, a);
14140        let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
14141        #[rustfmt::skip]
14142        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14143                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14144        assert_eq_m512i(r, e);
14145    }
14146
14147    #[simd_test(enable = "avx512bw")]
14148    const unsafe fn test_mm512_maskz_mullo_epi16() {
14149        let a = _mm512_set1_epi16(1);
14150        let b = _mm512_set1_epi16(1);
14151        let r = _mm512_maskz_mullo_epi16(0, a, b);
14152        assert_eq_m512i(r, _mm512_setzero_si512());
14153        let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b);
14154        #[rustfmt::skip]
14155        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14156                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14157        assert_eq_m512i(r, e);
14158    }
14159
14160    #[simd_test(enable = "avx512bw,avx512vl")]
14161    const unsafe fn test_mm256_mask_mullo_epi16() {
14162        let a = _mm256_set1_epi16(1);
14163        let b = _mm256_set1_epi16(1);
14164        let r = _mm256_mask_mullo_epi16(a, 0, a, b);
14165        assert_eq_m256i(r, a);
14166        let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b);
14167        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14168        assert_eq_m256i(r, e);
14169    }
14170
14171    #[simd_test(enable = "avx512bw,avx512vl")]
14172    const unsafe fn test_mm256_maskz_mullo_epi16() {
14173        let a = _mm256_set1_epi16(1);
14174        let b = _mm256_set1_epi16(1);
14175        let r = _mm256_maskz_mullo_epi16(0, a, b);
14176        assert_eq_m256i(r, _mm256_setzero_si256());
14177        let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b);
14178        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14179        assert_eq_m256i(r, e);
14180    }
14181
14182    #[simd_test(enable = "avx512bw,avx512vl")]
14183    const unsafe fn test_mm_mask_mullo_epi16() {
14184        let a = _mm_set1_epi16(1);
14185        let b = _mm_set1_epi16(1);
14186        let r = _mm_mask_mullo_epi16(a, 0, a, b);
14187        assert_eq_m128i(r, a);
14188        let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b);
14189        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
14190        assert_eq_m128i(r, e);
14191    }
14192
14193    #[simd_test(enable = "avx512bw,avx512vl")]
14194    const unsafe fn test_mm_maskz_mullo_epi16() {
14195        let a = _mm_set1_epi16(1);
14196        let b = _mm_set1_epi16(1);
14197        let r = _mm_maskz_mullo_epi16(0, a, b);
14198        assert_eq_m128i(r, _mm_setzero_si128());
14199        let r = _mm_maskz_mullo_epi16(0b00001111, a, b);
14200        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
14201        assert_eq_m128i(r, e);
14202    }
14203
14204    #[simd_test(enable = "avx512bw")]
14205    const unsafe fn test_mm512_max_epu16() {
14206        #[rustfmt::skip]
14207        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14208                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14209        #[rustfmt::skip]
14210        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14211                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14212        let r = _mm512_max_epu16(a, b);
14213        #[rustfmt::skip]
14214        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14215                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14216        assert_eq_m512i(r, e);
14217    }
14218
14219    #[simd_test(enable = "avx512bw")]
14220    const unsafe fn test_mm512_mask_max_epu16() {
14221        #[rustfmt::skip]
14222        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14223                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14224        #[rustfmt::skip]
14225        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14226                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14227        let r = _mm512_mask_max_epu16(a, 0, a, b);
14228        assert_eq_m512i(r, a);
14229        let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
14230        #[rustfmt::skip]
14231        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14232                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14233        assert_eq_m512i(r, e);
14234    }
14235
14236    #[simd_test(enable = "avx512bw")]
14237    const unsafe fn test_mm512_maskz_max_epu16() {
14238        #[rustfmt::skip]
14239        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14240                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14241        #[rustfmt::skip]
14242        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14243                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14244        let r = _mm512_maskz_max_epu16(0, a, b);
14245        assert_eq_m512i(r, _mm512_setzero_si512());
14246        let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b);
14247        #[rustfmt::skip]
14248        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14249                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14250        assert_eq_m512i(r, e);
14251    }
14252
14253    #[simd_test(enable = "avx512bw,avx512vl")]
14254    const unsafe fn test_mm256_mask_max_epu16() {
14255        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14256        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14257        let r = _mm256_mask_max_epu16(a, 0, a, b);
14258        assert_eq_m256i(r, a);
14259        let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b);
14260        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14261        assert_eq_m256i(r, e);
14262    }
14263
14264    #[simd_test(enable = "avx512bw,avx512vl")]
14265    const unsafe fn test_mm256_maskz_max_epu16() {
14266        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14267        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14268        let r = _mm256_maskz_max_epu16(0, a, b);
14269        assert_eq_m256i(r, _mm256_setzero_si256());
14270        let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b);
14271        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14272        assert_eq_m256i(r, e);
14273    }
14274
14275    #[simd_test(enable = "avx512bw,avx512vl")]
14276    const unsafe fn test_mm_mask_max_epu16() {
14277        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14278        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14279        let r = _mm_mask_max_epu16(a, 0, a, b);
14280        assert_eq_m128i(r, a);
14281        let r = _mm_mask_max_epu16(a, 0b00001111, a, b);
14282        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14283        assert_eq_m128i(r, e);
14284    }
14285
14286    #[simd_test(enable = "avx512bw,avx512vl")]
14287    const unsafe fn test_mm_maskz_max_epu16() {
14288        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14289        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14290        let r = _mm_maskz_max_epu16(0, a, b);
14291        assert_eq_m128i(r, _mm_setzero_si128());
14292        let r = _mm_maskz_max_epu16(0b00001111, a, b);
14293        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
14294        assert_eq_m128i(r, e);
14295    }
14296
14297    #[simd_test(enable = "avx512bw")]
14298    const unsafe fn test_mm512_max_epu8() {
14299        #[rustfmt::skip]
14300        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14301                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14302                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14303                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14304        #[rustfmt::skip]
14305        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14306                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14307                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14308                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14309        let r = _mm512_max_epu8(a, b);
14310        #[rustfmt::skip]
14311        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14312                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14313                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14314                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14315        assert_eq_m512i(r, e);
14316    }
14317
14318    #[simd_test(enable = "avx512bw")]
14319    const unsafe fn test_mm512_mask_max_epu8() {
14320        #[rustfmt::skip]
14321        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14322                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14323                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14324                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14325        #[rustfmt::skip]
14326        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14327                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14328                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14329                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14330        let r = _mm512_mask_max_epu8(a, 0, a, b);
14331        assert_eq_m512i(r, a);
14332        let r = _mm512_mask_max_epu8(
14333            a,
14334            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14335            a,
14336            b,
14337        );
14338        #[rustfmt::skip]
14339        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14340                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14341                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14342                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14343        assert_eq_m512i(r, e);
14344    }
14345
14346    #[simd_test(enable = "avx512bw")]
14347    const unsafe fn test_mm512_maskz_max_epu8() {
14348        #[rustfmt::skip]
14349        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14350                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14351                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14352                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14353        #[rustfmt::skip]
14354        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14355                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14356                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14357                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14358        let r = _mm512_maskz_max_epu8(0, a, b);
14359        assert_eq_m512i(r, _mm512_setzero_si512());
14360        let r = _mm512_maskz_max_epu8(
14361            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14362            a,
14363            b,
14364        );
14365        #[rustfmt::skip]
14366        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14367                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14368                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14369                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14370        assert_eq_m512i(r, e);
14371    }
14372
14373    #[simd_test(enable = "avx512bw,avx512vl")]
14374    const unsafe fn test_mm256_mask_max_epu8() {
14375        #[rustfmt::skip]
14376        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14377                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14378        #[rustfmt::skip]
14379        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14380                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14381        let r = _mm256_mask_max_epu8(a, 0, a, b);
14382        assert_eq_m256i(r, a);
14383        let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
14384        #[rustfmt::skip]
14385        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14386                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14387        assert_eq_m256i(r, e);
14388    }
14389
14390    #[simd_test(enable = "avx512bw,avx512vl")]
14391    const unsafe fn test_mm256_maskz_max_epu8() {
14392        #[rustfmt::skip]
14393        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14394                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14395        #[rustfmt::skip]
14396        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14397                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14398        let r = _mm256_maskz_max_epu8(0, a, b);
14399        assert_eq_m256i(r, _mm256_setzero_si256());
14400        let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b);
14401        #[rustfmt::skip]
14402        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14403                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14404        assert_eq_m256i(r, e);
14405    }
14406
14407    #[simd_test(enable = "avx512bw,avx512vl")]
14408    const unsafe fn test_mm_mask_max_epu8() {
14409        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14410        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14411        let r = _mm_mask_max_epu8(a, 0, a, b);
14412        assert_eq_m128i(r, a);
14413        let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b);
14414        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14415        assert_eq_m128i(r, e);
14416    }
14417
14418    #[simd_test(enable = "avx512bw,avx512vl")]
14419    const unsafe fn test_mm_maskz_max_epu8() {
14420        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14421        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14422        let r = _mm_maskz_max_epu8(0, a, b);
14423        assert_eq_m128i(r, _mm_setzero_si128());
14424        let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b);
14425        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14426        assert_eq_m128i(r, e);
14427    }
14428
14429    #[simd_test(enable = "avx512bw")]
14430    const unsafe fn test_mm512_max_epi16() {
14431        #[rustfmt::skip]
14432        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14433                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14434        #[rustfmt::skip]
14435        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14436                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14437        let r = _mm512_max_epi16(a, b);
14438        #[rustfmt::skip]
14439        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14440                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14441        assert_eq_m512i(r, e);
14442    }
14443
14444    #[simd_test(enable = "avx512bw")]
14445    const unsafe fn test_mm512_mask_max_epi16() {
14446        #[rustfmt::skip]
14447        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14448                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14449        #[rustfmt::skip]
14450        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14451                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14452        let r = _mm512_mask_max_epi16(a, 0, a, b);
14453        assert_eq_m512i(r, a);
14454        let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
14455        #[rustfmt::skip]
14456        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14457                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14458        assert_eq_m512i(r, e);
14459    }
14460
14461    #[simd_test(enable = "avx512bw")]
14462    const unsafe fn test_mm512_maskz_max_epi16() {
14463        #[rustfmt::skip]
14464        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14465                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14466        #[rustfmt::skip]
14467        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14468                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14469        let r = _mm512_maskz_max_epi16(0, a, b);
14470        assert_eq_m512i(r, _mm512_setzero_si512());
14471        let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b);
14472        #[rustfmt::skip]
14473        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14474                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14475        assert_eq_m512i(r, e);
14476    }
14477
14478    #[simd_test(enable = "avx512bw,avx512vl")]
14479    const unsafe fn test_mm256_mask_max_epi16() {
14480        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14481        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14482        let r = _mm256_mask_max_epi16(a, 0, a, b);
14483        assert_eq_m256i(r, a);
14484        let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b);
14485        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14486        assert_eq_m256i(r, e);
14487    }
14488
14489    #[simd_test(enable = "avx512bw,avx512vl")]
14490    const unsafe fn test_mm256_maskz_max_epi16() {
14491        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14492        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14493        let r = _mm256_maskz_max_epi16(0, a, b);
14494        assert_eq_m256i(r, _mm256_setzero_si256());
14495        let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b);
14496        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14497        assert_eq_m256i(r, e);
14498    }
14499
14500    #[simd_test(enable = "avx512bw,avx512vl")]
14501    const unsafe fn test_mm_mask_max_epi16() {
14502        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14503        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14504        let r = _mm_mask_max_epi16(a, 0, a, b);
14505        assert_eq_m128i(r, a);
14506        let r = _mm_mask_max_epi16(a, 0b00001111, a, b);
14507        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14508        assert_eq_m128i(r, e);
14509    }
14510
14511    #[simd_test(enable = "avx512bw,avx512vl")]
14512    const unsafe fn test_mm_maskz_max_epi16() {
14513        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14514        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14515        let r = _mm_maskz_max_epi16(0, a, b);
14516        assert_eq_m128i(r, _mm_setzero_si128());
14517        let r = _mm_maskz_max_epi16(0b00001111, a, b);
14518        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
14519        assert_eq_m128i(r, e);
14520    }
14521
14522    #[simd_test(enable = "avx512bw")]
14523    const unsafe fn test_mm512_max_epi8() {
14524        #[rustfmt::skip]
14525        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14526                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14527                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14528                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14529        #[rustfmt::skip]
14530        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14531                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14532                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14533                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14534        let r = _mm512_max_epi8(a, b);
14535        #[rustfmt::skip]
14536        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14537                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14538                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
14539                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
14540        assert_eq_m512i(r, e);
14541    }
14542
14543    #[simd_test(enable = "avx512bw")]
14544    const unsafe fn test_mm512_mask_max_epi8() {
14545        #[rustfmt::skip]
14546        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14547                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14548                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14549                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14550        #[rustfmt::skip]
14551        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14552                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14553                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14554                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14555        let r = _mm512_mask_max_epi8(a, 0, a, b);
14556        assert_eq_m512i(r, a);
14557        let r = _mm512_mask_max_epi8(
14558            a,
14559            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14560            a,
14561            b,
14562        );
14563        #[rustfmt::skip]
14564        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14565                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14566                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14567                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14568        assert_eq_m512i(r, e);
14569    }
14570
14571    #[simd_test(enable = "avx512bw")]
14572    const unsafe fn test_mm512_maskz_max_epi8() {
14573        #[rustfmt::skip]
14574        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14575                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14576                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14577                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14578        #[rustfmt::skip]
14579        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14580                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14581                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14582                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14583        let r = _mm512_maskz_max_epi8(0, a, b);
14584        assert_eq_m512i(r, _mm512_setzero_si512());
14585        let r = _mm512_maskz_max_epi8(
14586            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14587            a,
14588            b,
14589        );
14590        #[rustfmt::skip]
14591        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14592                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14593                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14594                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14595        assert_eq_m512i(r, e);
14596    }
14597
14598    #[simd_test(enable = "avx512bw,avx512vl")]
14599    const unsafe fn test_mm256_mask_max_epi8() {
14600        #[rustfmt::skip]
14601        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14602                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14603        #[rustfmt::skip]
14604        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14605                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14606        let r = _mm256_mask_max_epi8(a, 0, a, b);
14607        assert_eq_m256i(r, a);
14608        let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
14609        #[rustfmt::skip]
14610        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14611                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14612        assert_eq_m256i(r, e);
14613    }
14614
14615    #[simd_test(enable = "avx512bw,avx512vl")]
14616    const unsafe fn test_mm256_maskz_max_epi8() {
14617        #[rustfmt::skip]
14618        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14619                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14620        #[rustfmt::skip]
14621        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14622                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14623        let r = _mm256_maskz_max_epi8(0, a, b);
14624        assert_eq_m256i(r, _mm256_setzero_si256());
14625        let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b);
14626        #[rustfmt::skip]
14627        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
14628                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14629        assert_eq_m256i(r, e);
14630    }
14631
14632    #[simd_test(enable = "avx512bw,avx512vl")]
14633    const unsafe fn test_mm_mask_max_epi8() {
14634        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14635        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14636        let r = _mm_mask_max_epi8(a, 0, a, b);
14637        assert_eq_m128i(r, a);
14638        let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b);
14639        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14640        assert_eq_m128i(r, e);
14641    }
14642
14643    #[simd_test(enable = "avx512bw,avx512vl")]
14644    const unsafe fn test_mm_maskz_max_epi8() {
14645        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14646        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14647        let r = _mm_maskz_max_epi8(0, a, b);
14648        assert_eq_m128i(r, _mm_setzero_si128());
14649        let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b);
14650        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
14651        assert_eq_m128i(r, e);
14652    }
14653
14654    #[simd_test(enable = "avx512bw")]
14655    const unsafe fn test_mm512_min_epu16() {
14656        #[rustfmt::skip]
14657        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14658                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14659        #[rustfmt::skip]
14660        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14661                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14662        let r = _mm512_min_epu16(a, b);
14663        #[rustfmt::skip]
14664        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14665                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14666        assert_eq_m512i(r, e);
14667    }
14668
14669    #[simd_test(enable = "avx512bw")]
14670    const unsafe fn test_mm512_mask_min_epu16() {
14671        #[rustfmt::skip]
14672        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14673                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14674        #[rustfmt::skip]
14675        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14676                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14677        let r = _mm512_mask_min_epu16(a, 0, a, b);
14678        assert_eq_m512i(r, a);
14679        let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
14680        #[rustfmt::skip]
14681        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14682                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14683        assert_eq_m512i(r, e);
14684    }
14685
14686    #[simd_test(enable = "avx512bw")]
14687    const unsafe fn test_mm512_maskz_min_epu16() {
14688        #[rustfmt::skip]
14689        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14690                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14691        #[rustfmt::skip]
14692        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14693                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14694        let r = _mm512_maskz_min_epu16(0, a, b);
14695        assert_eq_m512i(r, _mm512_setzero_si512());
14696        let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b);
14697        #[rustfmt::skip]
14698        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14699                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14700        assert_eq_m512i(r, e);
14701    }
14702
14703    #[simd_test(enable = "avx512bw,avx512vl")]
14704    const unsafe fn test_mm256_mask_min_epu16() {
14705        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14706        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14707        let r = _mm256_mask_min_epu16(a, 0, a, b);
14708        assert_eq_m256i(r, a);
14709        let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b);
14710        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14711        assert_eq_m256i(r, e);
14712    }
14713
14714    #[simd_test(enable = "avx512bw,avx512vl")]
14715    const unsafe fn test_mm256_maskz_min_epu16() {
14716        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14717        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14718        let r = _mm256_maskz_min_epu16(0, a, b);
14719        assert_eq_m256i(r, _mm256_setzero_si256());
14720        let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b);
14721        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14722        assert_eq_m256i(r, e);
14723    }
14724
14725    #[simd_test(enable = "avx512bw,avx512vl")]
14726    const unsafe fn test_mm_mask_min_epu16() {
14727        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14728        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14729        let r = _mm_mask_min_epu16(a, 0, a, b);
14730        assert_eq_m128i(r, a);
14731        let r = _mm_mask_min_epu16(a, 0b00001111, a, b);
14732        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
14733        assert_eq_m128i(r, e);
14734    }
14735
14736    #[simd_test(enable = "avx512bw,avx512vl")]
14737    const unsafe fn test_mm_maskz_min_epu16() {
14738        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14739        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14740        let r = _mm_maskz_min_epu16(0, a, b);
14741        assert_eq_m128i(r, _mm_setzero_si128());
14742        let r = _mm_maskz_min_epu16(0b00001111, a, b);
14743        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
14744        assert_eq_m128i(r, e);
14745    }
14746
14747    #[simd_test(enable = "avx512bw")]
14748    const unsafe fn test_mm512_min_epu8() {
14749        #[rustfmt::skip]
14750        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14751                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14752                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14753                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14754        #[rustfmt::skip]
14755        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14756                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14757                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14758                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14759        let r = _mm512_min_epu8(a, b);
14760        #[rustfmt::skip]
14761        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14762                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14763                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14764                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14765        assert_eq_m512i(r, e);
14766    }
14767
14768    #[simd_test(enable = "avx512bw")]
14769    const unsafe fn test_mm512_mask_min_epu8() {
14770        #[rustfmt::skip]
14771        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14772                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14773                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14774                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14775        #[rustfmt::skip]
14776        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14777                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14778                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14779                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14780        let r = _mm512_mask_min_epu8(a, 0, a, b);
14781        assert_eq_m512i(r, a);
14782        let r = _mm512_mask_min_epu8(
14783            a,
14784            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14785            a,
14786            b,
14787        );
14788        #[rustfmt::skip]
14789        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14790                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14791                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14792                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14793        assert_eq_m512i(r, e);
14794    }
14795
14796    #[simd_test(enable = "avx512bw")]
14797    const unsafe fn test_mm512_maskz_min_epu8() {
14798        #[rustfmt::skip]
14799        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14800                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14801                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14802                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14803        #[rustfmt::skip]
14804        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14805                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14806                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14807                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14808        let r = _mm512_maskz_min_epu8(0, a, b);
14809        assert_eq_m512i(r, _mm512_setzero_si512());
14810        let r = _mm512_maskz_min_epu8(
14811            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
14812            a,
14813            b,
14814        );
14815        #[rustfmt::skip]
14816        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14817                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14818                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14819                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14820        assert_eq_m512i(r, e);
14821    }
14822
14823    #[simd_test(enable = "avx512bw,avx512vl")]
14824    const unsafe fn test_mm256_mask_min_epu8() {
14825        #[rustfmt::skip]
14826        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14827                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14828        #[rustfmt::skip]
14829        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14830                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14831        let r = _mm256_mask_min_epu8(a, 0, a, b);
14832        assert_eq_m256i(r, a);
14833        let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
14834        #[rustfmt::skip]
14835        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14836                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14837        assert_eq_m256i(r, e);
14838    }
14839
14840    #[simd_test(enable = "avx512bw,avx512vl")]
14841    const unsafe fn test_mm256_maskz_min_epu8() {
14842        #[rustfmt::skip]
14843        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14844                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14845        #[rustfmt::skip]
14846        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14847                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14848        let r = _mm256_maskz_min_epu8(0, a, b);
14849        assert_eq_m256i(r, _mm256_setzero_si256());
14850        let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b);
14851        #[rustfmt::skip]
14852        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14853                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14854        assert_eq_m256i(r, e);
14855    }
14856
14857    #[simd_test(enable = "avx512bw,avx512vl")]
14858    const unsafe fn test_mm_mask_min_epu8() {
14859        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14860        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14861        let r = _mm_mask_min_epu8(a, 0, a, b);
14862        assert_eq_m128i(r, a);
14863        let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b);
14864        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14865        assert_eq_m128i(r, e);
14866    }
14867
14868    #[simd_test(enable = "avx512bw,avx512vl")]
14869    const unsafe fn test_mm_maskz_min_epu8() {
14870        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14871        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14872        let r = _mm_maskz_min_epu8(0, a, b);
14873        assert_eq_m128i(r, _mm_setzero_si128());
14874        let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b);
14875        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14876        assert_eq_m128i(r, e);
14877    }
14878
14879    #[simd_test(enable = "avx512bw")]
14880    const unsafe fn test_mm512_min_epi16() {
14881        #[rustfmt::skip]
14882        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14883                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14884        #[rustfmt::skip]
14885        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14886                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14887        let r = _mm512_min_epi16(a, b);
14888        #[rustfmt::skip]
14889        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14890                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14891        assert_eq_m512i(r, e);
14892    }
14893
14894    #[simd_test(enable = "avx512bw")]
14895    const unsafe fn test_mm512_mask_min_epi16() {
14896        #[rustfmt::skip]
14897        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14898                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14899        #[rustfmt::skip]
14900        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14901                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14902        let r = _mm512_mask_min_epi16(a, 0, a, b);
14903        assert_eq_m512i(r, a);
14904        let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
14905        #[rustfmt::skip]
14906        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14907                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14908        assert_eq_m512i(r, e);
14909    }
14910
14911    #[simd_test(enable = "avx512bw")]
14912    const unsafe fn test_mm512_maskz_min_epi16() {
14913        #[rustfmt::skip]
14914        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14915                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14916        #[rustfmt::skip]
14917        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14918                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14919        let r = _mm512_maskz_min_epi16(0, a, b);
14920        assert_eq_m512i(r, _mm512_setzero_si512());
14921        let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b);
14922        #[rustfmt::skip]
14923        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
14924                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14925        assert_eq_m512i(r, e);
14926    }
14927
14928    #[simd_test(enable = "avx512bw,avx512vl")]
14929    const unsafe fn test_mm256_mask_min_epi16() {
14930        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14931        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14932        let r = _mm256_mask_min_epi16(a, 0, a, b);
14933        assert_eq_m256i(r, a);
14934        let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b);
14935        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14936        assert_eq_m256i(r, e);
14937    }
14938
14939    #[simd_test(enable = "avx512bw,avx512vl")]
14940    const unsafe fn test_mm256_maskz_min_epi16() {
14941        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14942        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14943        let r = _mm256_maskz_min_epi16(0, a, b);
14944        assert_eq_m256i(r, _mm256_setzero_si256());
14945        let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b);
14946        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
14947        assert_eq_m256i(r, e);
14948    }
14949
14950    #[simd_test(enable = "avx512bw,avx512vl")]
14951    const unsafe fn test_mm_mask_min_epi16() {
14952        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14953        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14954        let r = _mm_mask_min_epi16(a, 0, a, b);
14955        assert_eq_m128i(r, a);
14956        let r = _mm_mask_min_epi16(a, 0b00001111, a, b);
14957        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
14958        assert_eq_m128i(r, e);
14959    }
14960
14961    #[simd_test(enable = "avx512bw,avx512vl")]
14962    const unsafe fn test_mm_maskz_min_epi16() {
14963        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
14964        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
14965        let r = _mm_maskz_min_epi16(0, a, b);
14966        assert_eq_m128i(r, _mm_setzero_si128());
14967        let r = _mm_maskz_min_epi16(0b00001111, a, b);
14968        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
14969        assert_eq_m128i(r, e);
14970    }
14971
14972    #[simd_test(enable = "avx512bw")]
14973    const unsafe fn test_mm512_min_epi8() {
14974        #[rustfmt::skip]
14975        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14976                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14977                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14978                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
14979        #[rustfmt::skip]
14980        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14981                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14982                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
14983                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
14984        let r = _mm512_min_epi8(a, b);
14985        #[rustfmt::skip]
14986        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14987                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14988                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
14989                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
14990        assert_eq_m512i(r, e);
14991    }
14992
14993    #[simd_test(enable = "avx512bw")]
14994    const unsafe fn test_mm512_mask_min_epi8() {
14995        #[rustfmt::skip]
14996        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14997                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14998                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14999                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15000        #[rustfmt::skip]
15001        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15002                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15003                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15004                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15005        let r = _mm512_mask_min_epi8(a, 0, a, b);
15006        assert_eq_m512i(r, a);
15007        let r = _mm512_mask_min_epi8(
15008            a,
15009            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
15010            a,
15011            b,
15012        );
15013        #[rustfmt::skip]
15014        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15015                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15016                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15017                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15018        assert_eq_m512i(r, e);
15019    }
15020
15021    #[simd_test(enable = "avx512bw")]
15022    const unsafe fn test_mm512_maskz_min_epi8() {
15023        #[rustfmt::skip]
15024        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15025                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15026                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15027                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15028        #[rustfmt::skip]
15029        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15030                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15031                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15032                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15033        let r = _mm512_maskz_min_epi8(0, a, b);
15034        assert_eq_m512i(r, _mm512_setzero_si512());
15035        let r = _mm512_maskz_min_epi8(
15036            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
15037            a,
15038            b,
15039        );
15040        #[rustfmt::skip]
15041        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15042                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15043                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15044                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15045        assert_eq_m512i(r, e);
15046    }
15047
15048    #[simd_test(enable = "avx512bw,avx512vl")]
15049    const unsafe fn test_mm256_mask_min_epi8() {
15050        #[rustfmt::skip]
15051        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15052                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15053        #[rustfmt::skip]
15054        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15055                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15056        let r = _mm256_mask_min_epi8(a, 0, a, b);
15057        assert_eq_m256i(r, a);
15058        let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
15059        #[rustfmt::skip]
15060        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
15061                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15062        assert_eq_m256i(r, e);
15063    }
15064
15065    #[simd_test(enable = "avx512bw,avx512vl")]
15066    const unsafe fn test_mm256_maskz_min_epi8() {
15067        #[rustfmt::skip]
15068        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15069                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15070        #[rustfmt::skip]
15071        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
15072                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15073        let r = _mm256_maskz_min_epi8(0, a, b);
15074        assert_eq_m256i(r, _mm256_setzero_si256());
15075        let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b);
15076        #[rustfmt::skip]
15077        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
15078                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15079        assert_eq_m256i(r, e);
15080    }
15081
15082    #[simd_test(enable = "avx512bw,avx512vl")]
15083    const unsafe fn test_mm_mask_min_epi8() {
15084        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15085        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15086        let r = _mm_mask_min_epi8(a, 0, a, b);
15087        assert_eq_m128i(r, a);
15088        let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b);
15089        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
15090        assert_eq_m128i(r, e);
15091    }
15092
15093    #[simd_test(enable = "avx512bw,avx512vl")]
15094    const unsafe fn test_mm_maskz_min_epi8() {
15095        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15096        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
15097        let r = _mm_maskz_min_epi8(0, a, b);
15098        assert_eq_m128i(r, _mm_setzero_si128());
15099        let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b);
15100        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
15101        assert_eq_m128i(r, e);
15102    }
15103
15104    #[simd_test(enable = "avx512bw")]
15105    const unsafe fn test_mm512_cmplt_epu16_mask() {
15106        let a = _mm512_set1_epi16(-2);
15107        let b = _mm512_set1_epi16(-1);
15108        let m = _mm512_cmplt_epu16_mask(a, b);
15109        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15110    }
15111
15112    #[simd_test(enable = "avx512bw")]
15113    const unsafe fn test_mm512_mask_cmplt_epu16_mask() {
15114        let a = _mm512_set1_epi16(-2);
15115        let b = _mm512_set1_epi16(-1);
15116        let mask = 0b01010101_01010101_01010101_01010101;
15117        let r = _mm512_mask_cmplt_epu16_mask(mask, a, b);
15118        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15119    }
15120
15121    #[simd_test(enable = "avx512bw,avx512vl")]
15122    const unsafe fn test_mm256_cmplt_epu16_mask() {
15123        let a = _mm256_set1_epi16(-2);
15124        let b = _mm256_set1_epi16(-1);
15125        let m = _mm256_cmplt_epu16_mask(a, b);
15126        assert_eq!(m, 0b11111111_11111111);
15127    }
15128
15129    #[simd_test(enable = "avx512bw,avx512vl")]
15130    const unsafe fn test_mm256_mask_cmplt_epu16_mask() {
15131        let a = _mm256_set1_epi16(-2);
15132        let b = _mm256_set1_epi16(-1);
15133        let mask = 0b01010101_01010101;
15134        let r = _mm256_mask_cmplt_epu16_mask(mask, a, b);
15135        assert_eq!(r, 0b01010101_01010101);
15136    }
15137
15138    #[simd_test(enable = "avx512bw,avx512vl")]
15139    const unsafe fn test_mm_cmplt_epu16_mask() {
15140        let a = _mm_set1_epi16(-2);
15141        let b = _mm_set1_epi16(-1);
15142        let m = _mm_cmplt_epu16_mask(a, b);
15143        assert_eq!(m, 0b11111111);
15144    }
15145
15146    #[simd_test(enable = "avx512bw,avx512vl")]
15147    const unsafe fn test_mm_mask_cmplt_epu16_mask() {
15148        let a = _mm_set1_epi16(-2);
15149        let b = _mm_set1_epi16(-1);
15150        let mask = 0b01010101;
15151        let r = _mm_mask_cmplt_epu16_mask(mask, a, b);
15152        assert_eq!(r, 0b01010101);
15153    }
15154
15155    #[simd_test(enable = "avx512bw")]
15156    const unsafe fn test_mm512_cmplt_epu8_mask() {
15157        let a = _mm512_set1_epi8(-2);
15158        let b = _mm512_set1_epi8(-1);
15159        let m = _mm512_cmplt_epu8_mask(a, b);
15160        assert_eq!(
15161            m,
15162            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15163        );
15164    }
15165
15166    #[simd_test(enable = "avx512bw")]
15167    const unsafe fn test_mm512_mask_cmplt_epu8_mask() {
15168        let a = _mm512_set1_epi8(-2);
15169        let b = _mm512_set1_epi8(-1);
15170        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15171        let r = _mm512_mask_cmplt_epu8_mask(mask, a, b);
15172        assert_eq!(
15173            r,
15174            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15175        );
15176    }
15177
15178    #[simd_test(enable = "avx512bw,avx512vl")]
15179    const unsafe fn test_mm256_cmplt_epu8_mask() {
15180        let a = _mm256_set1_epi8(-2);
15181        let b = _mm256_set1_epi8(-1);
15182        let m = _mm256_cmplt_epu8_mask(a, b);
15183        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15184    }
15185
15186    #[simd_test(enable = "avx512bw,avx512vl")]
15187    const unsafe fn test_mm256_mask_cmplt_epu8_mask() {
15188        let a = _mm256_set1_epi8(-2);
15189        let b = _mm256_set1_epi8(-1);
15190        let mask = 0b01010101_01010101_01010101_01010101;
15191        let r = _mm256_mask_cmplt_epu8_mask(mask, a, b);
15192        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15193    }
15194
15195    #[simd_test(enable = "avx512bw,avx512vl")]
15196    const unsafe fn test_mm_cmplt_epu8_mask() {
15197        let a = _mm_set1_epi8(-2);
15198        let b = _mm_set1_epi8(-1);
15199        let m = _mm_cmplt_epu8_mask(a, b);
15200        assert_eq!(m, 0b11111111_11111111);
15201    }
15202
15203    #[simd_test(enable = "avx512bw,avx512vl")]
15204    const unsafe fn test_mm_mask_cmplt_epu8_mask() {
15205        let a = _mm_set1_epi8(-2);
15206        let b = _mm_set1_epi8(-1);
15207        let mask = 0b01010101_01010101;
15208        let r = _mm_mask_cmplt_epu8_mask(mask, a, b);
15209        assert_eq!(r, 0b01010101_01010101);
15210    }
15211
15212    #[simd_test(enable = "avx512bw")]
15213    const unsafe fn test_mm512_cmplt_epi16_mask() {
15214        let a = _mm512_set1_epi16(-2);
15215        let b = _mm512_set1_epi16(-1);
15216        let m = _mm512_cmplt_epi16_mask(a, b);
15217        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15218    }
15219
15220    #[simd_test(enable = "avx512bw")]
15221    const unsafe fn test_mm512_mask_cmplt_epi16_mask() {
15222        let a = _mm512_set1_epi16(-2);
15223        let b = _mm512_set1_epi16(-1);
15224        let mask = 0b01010101_01010101_01010101_01010101;
15225        let r = _mm512_mask_cmplt_epi16_mask(mask, a, b);
15226        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15227    }
15228
15229    #[simd_test(enable = "avx512bw,avx512vl")]
15230    const unsafe fn test_mm256_cmplt_epi16_mask() {
15231        let a = _mm256_set1_epi16(-2);
15232        let b = _mm256_set1_epi16(-1);
15233        let m = _mm256_cmplt_epi16_mask(a, b);
15234        assert_eq!(m, 0b11111111_11111111);
15235    }
15236
15237    #[simd_test(enable = "avx512bw,avx512vl")]
15238    const unsafe fn test_mm256_mask_cmplt_epi16_mask() {
15239        let a = _mm256_set1_epi16(-2);
15240        let b = _mm256_set1_epi16(-1);
15241        let mask = 0b01010101_01010101;
15242        let r = _mm256_mask_cmplt_epi16_mask(mask, a, b);
15243        assert_eq!(r, 0b01010101_01010101);
15244    }
15245
15246    #[simd_test(enable = "avx512bw,avx512vl")]
15247    const unsafe fn test_mm_cmplt_epi16_mask() {
15248        let a = _mm_set1_epi16(-2);
15249        let b = _mm_set1_epi16(-1);
15250        let m = _mm_cmplt_epi16_mask(a, b);
15251        assert_eq!(m, 0b11111111);
15252    }
15253
15254    #[simd_test(enable = "avx512bw,avx512vl")]
15255    const unsafe fn test_mm_mask_cmplt_epi16_mask() {
15256        let a = _mm_set1_epi16(-2);
15257        let b = _mm_set1_epi16(-1);
15258        let mask = 0b01010101;
15259        let r = _mm_mask_cmplt_epi16_mask(mask, a, b);
15260        assert_eq!(r, 0b01010101);
15261    }
15262
15263    #[simd_test(enable = "avx512bw")]
15264    const unsafe fn test_mm512_cmplt_epi8_mask() {
15265        let a = _mm512_set1_epi8(-2);
15266        let b = _mm512_set1_epi8(-1);
15267        let m = _mm512_cmplt_epi8_mask(a, b);
15268        assert_eq!(
15269            m,
15270            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15271        );
15272    }
15273
15274    #[simd_test(enable = "avx512bw")]
15275    const unsafe fn test_mm512_mask_cmplt_epi8_mask() {
15276        let a = _mm512_set1_epi8(-2);
15277        let b = _mm512_set1_epi8(-1);
15278        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15279        let r = _mm512_mask_cmplt_epi8_mask(mask, a, b);
15280        assert_eq!(
15281            r,
15282            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15283        );
15284    }
15285
15286    #[simd_test(enable = "avx512bw,avx512vl")]
15287    const unsafe fn test_mm256_cmplt_epi8_mask() {
15288        let a = _mm256_set1_epi8(-2);
15289        let b = _mm256_set1_epi8(-1);
15290        let m = _mm256_cmplt_epi8_mask(a, b);
15291        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15292    }
15293
15294    #[simd_test(enable = "avx512bw,avx512vl")]
15295    const unsafe fn test_mm256_mask_cmplt_epi8_mask() {
15296        let a = _mm256_set1_epi8(-2);
15297        let b = _mm256_set1_epi8(-1);
15298        let mask = 0b01010101_01010101_01010101_01010101;
15299        let r = _mm256_mask_cmplt_epi8_mask(mask, a, b);
15300        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15301    }
15302
15303    #[simd_test(enable = "avx512bw,avx512vl")]
15304    const unsafe fn test_mm_cmplt_epi8_mask() {
15305        let a = _mm_set1_epi8(-2);
15306        let b = _mm_set1_epi8(-1);
15307        let m = _mm_cmplt_epi8_mask(a, b);
15308        assert_eq!(m, 0b11111111_11111111);
15309    }
15310
15311    #[simd_test(enable = "avx512bw,avx512vl")]
15312    const unsafe fn test_mm_mask_cmplt_epi8_mask() {
15313        let a = _mm_set1_epi8(-2);
15314        let b = _mm_set1_epi8(-1);
15315        let mask = 0b01010101_01010101;
15316        let r = _mm_mask_cmplt_epi8_mask(mask, a, b);
15317        assert_eq!(r, 0b01010101_01010101);
15318    }
15319
15320    #[simd_test(enable = "avx512bw")]
15321    const unsafe fn test_mm512_cmpgt_epu16_mask() {
15322        let a = _mm512_set1_epi16(2);
15323        let b = _mm512_set1_epi16(1);
15324        let m = _mm512_cmpgt_epu16_mask(a, b);
15325        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15326    }
15327
15328    #[simd_test(enable = "avx512bw")]
15329    const unsafe fn test_mm512_mask_cmpgt_epu16_mask() {
15330        let a = _mm512_set1_epi16(2);
15331        let b = _mm512_set1_epi16(1);
15332        let mask = 0b01010101_01010101_01010101_01010101;
15333        let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b);
15334        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15335    }
15336
15337    #[simd_test(enable = "avx512bw,avx512vl")]
15338    const unsafe fn test_mm256_cmpgt_epu16_mask() {
15339        let a = _mm256_set1_epi16(2);
15340        let b = _mm256_set1_epi16(1);
15341        let m = _mm256_cmpgt_epu16_mask(a, b);
15342        assert_eq!(m, 0b11111111_11111111);
15343    }
15344
15345    #[simd_test(enable = "avx512bw,avx512vl")]
15346    const unsafe fn test_mm256_mask_cmpgt_epu16_mask() {
15347        let a = _mm256_set1_epi16(2);
15348        let b = _mm256_set1_epi16(1);
15349        let mask = 0b01010101_01010101;
15350        let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b);
15351        assert_eq!(r, 0b01010101_01010101);
15352    }
15353
15354    #[simd_test(enable = "avx512bw,avx512vl")]
15355    const unsafe fn test_mm_cmpgt_epu16_mask() {
15356        let a = _mm_set1_epi16(2);
15357        let b = _mm_set1_epi16(1);
15358        let m = _mm_cmpgt_epu16_mask(a, b);
15359        assert_eq!(m, 0b11111111);
15360    }
15361
15362    #[simd_test(enable = "avx512bw,avx512vl")]
15363    const unsafe fn test_mm_mask_cmpgt_epu16_mask() {
15364        let a = _mm_set1_epi16(2);
15365        let b = _mm_set1_epi16(1);
15366        let mask = 0b01010101;
15367        let r = _mm_mask_cmpgt_epu16_mask(mask, a, b);
15368        assert_eq!(r, 0b01010101);
15369    }
15370
15371    #[simd_test(enable = "avx512bw")]
15372    const unsafe fn test_mm512_cmpgt_epu8_mask() {
15373        let a = _mm512_set1_epi8(2);
15374        let b = _mm512_set1_epi8(1);
15375        let m = _mm512_cmpgt_epu8_mask(a, b);
15376        assert_eq!(
15377            m,
15378            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15379        );
15380    }
15381
15382    #[simd_test(enable = "avx512bw")]
15383    const unsafe fn test_mm512_mask_cmpgt_epu8_mask() {
15384        let a = _mm512_set1_epi8(2);
15385        let b = _mm512_set1_epi8(1);
15386        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15387        let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b);
15388        assert_eq!(
15389            r,
15390            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15391        );
15392    }
15393
15394    #[simd_test(enable = "avx512bw,avx512vl")]
15395    const unsafe fn test_mm256_cmpgt_epu8_mask() {
15396        let a = _mm256_set1_epi8(2);
15397        let b = _mm256_set1_epi8(1);
15398        let m = _mm256_cmpgt_epu8_mask(a, b);
15399        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15400    }
15401
15402    #[simd_test(enable = "avx512bw,avx512vl")]
15403    const unsafe fn test_mm256_mask_cmpgt_epu8_mask() {
15404        let a = _mm256_set1_epi8(2);
15405        let b = _mm256_set1_epi8(1);
15406        let mask = 0b01010101_01010101_01010101_01010101;
15407        let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b);
15408        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15409    }
15410
15411    #[simd_test(enable = "avx512bw,avx512vl")]
15412    const unsafe fn test_mm_cmpgt_epu8_mask() {
15413        let a = _mm_set1_epi8(2);
15414        let b = _mm_set1_epi8(1);
15415        let m = _mm_cmpgt_epu8_mask(a, b);
15416        assert_eq!(m, 0b11111111_11111111);
15417    }
15418
15419    #[simd_test(enable = "avx512bw,avx512vl")]
15420    const unsafe fn test_mm_mask_cmpgt_epu8_mask() {
15421        let a = _mm_set1_epi8(2);
15422        let b = _mm_set1_epi8(1);
15423        let mask = 0b01010101_01010101;
15424        let r = _mm_mask_cmpgt_epu8_mask(mask, a, b);
15425        assert_eq!(r, 0b01010101_01010101);
15426    }
15427
15428    #[simd_test(enable = "avx512bw")]
15429    const unsafe fn test_mm512_cmpgt_epi16_mask() {
15430        let a = _mm512_set1_epi16(2);
15431        let b = _mm512_set1_epi16(-1);
15432        let m = _mm512_cmpgt_epi16_mask(a, b);
15433        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15434    }
15435
15436    #[simd_test(enable = "avx512bw")]
15437    const unsafe fn test_mm512_mask_cmpgt_epi16_mask() {
15438        let a = _mm512_set1_epi16(2);
15439        let b = _mm512_set1_epi16(-1);
15440        let mask = 0b01010101_01010101_01010101_01010101;
15441        let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b);
15442        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15443    }
15444
15445    #[simd_test(enable = "avx512bw,avx512vl")]
15446    const unsafe fn test_mm256_cmpgt_epi16_mask() {
15447        let a = _mm256_set1_epi16(2);
15448        let b = _mm256_set1_epi16(-1);
15449        let m = _mm256_cmpgt_epi16_mask(a, b);
15450        assert_eq!(m, 0b11111111_11111111);
15451    }
15452
15453    #[simd_test(enable = "avx512bw,avx512vl")]
15454    const unsafe fn test_mm256_mask_cmpgt_epi16_mask() {
15455        let a = _mm256_set1_epi16(2);
15456        let b = _mm256_set1_epi16(-1);
15457        let mask = 0b001010101_01010101;
15458        let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b);
15459        assert_eq!(r, 0b01010101_01010101);
15460    }
15461
15462    #[simd_test(enable = "avx512bw,avx512vl")]
15463    const unsafe fn test_mm_cmpgt_epi16_mask() {
15464        let a = _mm_set1_epi16(2);
15465        let b = _mm_set1_epi16(-1);
15466        let m = _mm_cmpgt_epi16_mask(a, b);
15467        assert_eq!(m, 0b11111111);
15468    }
15469
15470    #[simd_test(enable = "avx512bw,avx512vl")]
15471    const unsafe fn test_mm_mask_cmpgt_epi16_mask() {
15472        let a = _mm_set1_epi16(2);
15473        let b = _mm_set1_epi16(-1);
15474        let mask = 0b01010101;
15475        let r = _mm_mask_cmpgt_epi16_mask(mask, a, b);
15476        assert_eq!(r, 0b01010101);
15477    }
15478
15479    #[simd_test(enable = "avx512bw")]
15480    const unsafe fn test_mm512_cmpgt_epi8_mask() {
15481        let a = _mm512_set1_epi8(2);
15482        let b = _mm512_set1_epi8(-1);
15483        let m = _mm512_cmpgt_epi8_mask(a, b);
15484        assert_eq!(
15485            m,
15486            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15487        );
15488    }
15489
15490    #[simd_test(enable = "avx512bw")]
15491    const unsafe fn test_mm512_mask_cmpgt_epi8_mask() {
15492        let a = _mm512_set1_epi8(2);
15493        let b = _mm512_set1_epi8(-1);
15494        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15495        let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b);
15496        assert_eq!(
15497            r,
15498            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15499        );
15500    }
15501
15502    #[simd_test(enable = "avx512bw,avx512vl")]
15503    const unsafe fn test_mm256_cmpgt_epi8_mask() {
15504        let a = _mm256_set1_epi8(2);
15505        let b = _mm256_set1_epi8(-1);
15506        let m = _mm256_cmpgt_epi8_mask(a, b);
15507        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15508    }
15509
15510    #[simd_test(enable = "avx512bw,avx512vl")]
15511    const unsafe fn test_mm256_mask_cmpgt_epi8_mask() {
15512        let a = _mm256_set1_epi8(2);
15513        let b = _mm256_set1_epi8(-1);
15514        let mask = 0b01010101_01010101_01010101_01010101;
15515        let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b);
15516        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15517    }
15518
15519    #[simd_test(enable = "avx512bw,avx512vl")]
15520    const unsafe fn test_mm_cmpgt_epi8_mask() {
15521        let a = _mm_set1_epi8(2);
15522        let b = _mm_set1_epi8(-1);
15523        let m = _mm_cmpgt_epi8_mask(a, b);
15524        assert_eq!(m, 0b11111111_11111111);
15525    }
15526
15527    #[simd_test(enable = "avx512bw,avx512vl")]
15528    const unsafe fn test_mm_mask_cmpgt_epi8_mask() {
15529        let a = _mm_set1_epi8(2);
15530        let b = _mm_set1_epi8(-1);
15531        let mask = 0b01010101_01010101;
15532        let r = _mm_mask_cmpgt_epi8_mask(mask, a, b);
15533        assert_eq!(r, 0b01010101_01010101);
15534    }
15535
15536    #[simd_test(enable = "avx512bw")]
15537    const unsafe fn test_mm512_cmple_epu16_mask() {
15538        let a = _mm512_set1_epi16(-1);
15539        let b = _mm512_set1_epi16(-1);
15540        let m = _mm512_cmple_epu16_mask(a, b);
15541        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15542    }
15543
15544    #[simd_test(enable = "avx512bw")]
15545    const unsafe fn test_mm512_mask_cmple_epu16_mask() {
15546        let a = _mm512_set1_epi16(-1);
15547        let b = _mm512_set1_epi16(-1);
15548        let mask = 0b01010101_01010101_01010101_01010101;
15549        let r = _mm512_mask_cmple_epu16_mask(mask, a, b);
15550        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15551    }
15552
15553    #[simd_test(enable = "avx512bw,avx512vl")]
15554    const unsafe fn test_mm256_cmple_epu16_mask() {
15555        let a = _mm256_set1_epi16(-1);
15556        let b = _mm256_set1_epi16(-1);
15557        let m = _mm256_cmple_epu16_mask(a, b);
15558        assert_eq!(m, 0b11111111_11111111);
15559    }
15560
15561    #[simd_test(enable = "avx512bw,avx512vl")]
15562    const unsafe fn test_mm256_mask_cmple_epu16_mask() {
15563        let a = _mm256_set1_epi16(-1);
15564        let b = _mm256_set1_epi16(-1);
15565        let mask = 0b01010101_01010101;
15566        let r = _mm256_mask_cmple_epu16_mask(mask, a, b);
15567        assert_eq!(r, 0b01010101_01010101);
15568    }
15569
15570    #[simd_test(enable = "avx512bw,avx512vl")]
15571    const unsafe fn test_mm_cmple_epu16_mask() {
15572        let a = _mm_set1_epi16(-1);
15573        let b = _mm_set1_epi16(-1);
15574        let m = _mm_cmple_epu16_mask(a, b);
15575        assert_eq!(m, 0b11111111);
15576    }
15577
15578    #[simd_test(enable = "avx512bw,avx512vl")]
15579    const unsafe fn test_mm_mask_cmple_epu16_mask() {
15580        let a = _mm_set1_epi16(-1);
15581        let b = _mm_set1_epi16(-1);
15582        let mask = 0b01010101;
15583        let r = _mm_mask_cmple_epu16_mask(mask, a, b);
15584        assert_eq!(r, 0b01010101);
15585    }
15586
15587    #[simd_test(enable = "avx512bw")]
15588    const unsafe fn test_mm512_cmple_epu8_mask() {
15589        let a = _mm512_set1_epi8(-1);
15590        let b = _mm512_set1_epi8(-1);
15591        let m = _mm512_cmple_epu8_mask(a, b);
15592        assert_eq!(
15593            m,
15594            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15595        );
15596    }
15597
15598    #[simd_test(enable = "avx512bw")]
15599    const unsafe fn test_mm512_mask_cmple_epu8_mask() {
15600        let a = _mm512_set1_epi8(-1);
15601        let b = _mm512_set1_epi8(-1);
15602        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15603        let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
15604        assert_eq!(
15605            r,
15606            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15607        );
15608    }
15609
15610    #[simd_test(enable = "avx512bw,avx512vl")]
15611    const unsafe fn test_mm256_cmple_epu8_mask() {
15612        let a = _mm256_set1_epi8(-1);
15613        let b = _mm256_set1_epi8(-1);
15614        let m = _mm256_cmple_epu8_mask(a, b);
15615        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15616    }
15617
15618    #[simd_test(enable = "avx512bw,avx512vl")]
15619    const unsafe fn test_mm256_mask_cmple_epu8_mask() {
15620        let a = _mm256_set1_epi8(-1);
15621        let b = _mm256_set1_epi8(-1);
15622        let mask = 0b01010101_01010101_01010101_01010101;
15623        let r = _mm256_mask_cmple_epu8_mask(mask, a, b);
15624        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15625    }
15626
15627    #[simd_test(enable = "avx512bw,avx512vl")]
15628    const unsafe fn test_mm_cmple_epu8_mask() {
15629        let a = _mm_set1_epi8(-1);
15630        let b = _mm_set1_epi8(-1);
15631        let m = _mm_cmple_epu8_mask(a, b);
15632        assert_eq!(m, 0b11111111_11111111);
15633    }
15634
15635    #[simd_test(enable = "avx512bw,avx512vl")]
15636    const unsafe fn test_mm_mask_cmple_epu8_mask() {
15637        let a = _mm_set1_epi8(-1);
15638        let b = _mm_set1_epi8(-1);
15639        let mask = 0b01010101_01010101;
15640        let r = _mm_mask_cmple_epu8_mask(mask, a, b);
15641        assert_eq!(r, 0b01010101_01010101);
15642    }
15643
15644    #[simd_test(enable = "avx512bw")]
15645    const unsafe fn test_mm512_cmple_epi16_mask() {
15646        let a = _mm512_set1_epi16(-1);
15647        let b = _mm512_set1_epi16(-1);
15648        let m = _mm512_cmple_epi16_mask(a, b);
15649        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15650    }
15651
15652    #[simd_test(enable = "avx512bw")]
15653    const unsafe fn test_mm512_mask_cmple_epi16_mask() {
15654        let a = _mm512_set1_epi16(-1);
15655        let b = _mm512_set1_epi16(-1);
15656        let mask = 0b01010101_01010101_01010101_01010101;
15657        let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
15658        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15659    }
15660
15661    #[simd_test(enable = "avx512bw,avx512vl")]
15662    const unsafe fn test_mm256_cmple_epi16_mask() {
15663        let a = _mm256_set1_epi16(-1);
15664        let b = _mm256_set1_epi16(-1);
15665        let m = _mm256_cmple_epi16_mask(a, b);
15666        assert_eq!(m, 0b11111111_11111111);
15667    }
15668
15669    #[simd_test(enable = "avx512bw,avx512vl")]
15670    const unsafe fn test_mm256_mask_cmple_epi16_mask() {
15671        let a = _mm256_set1_epi16(-1);
15672        let b = _mm256_set1_epi16(-1);
15673        let mask = 0b01010101_01010101;
15674        let r = _mm256_mask_cmple_epi16_mask(mask, a, b);
15675        assert_eq!(r, 0b01010101_01010101);
15676    }
15677
15678    #[simd_test(enable = "avx512bw,avx512vl")]
15679    const unsafe fn test_mm_cmple_epi16_mask() {
15680        let a = _mm_set1_epi16(-1);
15681        let b = _mm_set1_epi16(-1);
15682        let m = _mm_cmple_epi16_mask(a, b);
15683        assert_eq!(m, 0b11111111);
15684    }
15685
15686    #[simd_test(enable = "avx512bw,avx512vl")]
15687    const unsafe fn test_mm_mask_cmple_epi16_mask() {
15688        let a = _mm_set1_epi16(-1);
15689        let b = _mm_set1_epi16(-1);
15690        let mask = 0b01010101;
15691        let r = _mm_mask_cmple_epi16_mask(mask, a, b);
15692        assert_eq!(r, 0b01010101);
15693    }
15694
15695    #[simd_test(enable = "avx512bw")]
15696    const unsafe fn test_mm512_cmple_epi8_mask() {
15697        let a = _mm512_set1_epi8(-1);
15698        let b = _mm512_set1_epi8(-1);
15699        let m = _mm512_cmple_epi8_mask(a, b);
15700        assert_eq!(
15701            m,
15702            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15703        );
15704    }
15705
15706    #[simd_test(enable = "avx512bw")]
15707    const unsafe fn test_mm512_mask_cmple_epi8_mask() {
15708        let a = _mm512_set1_epi8(-1);
15709        let b = _mm512_set1_epi8(-1);
15710        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15711        let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
15712        assert_eq!(
15713            r,
15714            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15715        );
15716    }
15717
15718    #[simd_test(enable = "avx512bw,avx512vl")]
15719    const unsafe fn test_mm256_cmple_epi8_mask() {
15720        let a = _mm256_set1_epi8(-1);
15721        let b = _mm256_set1_epi8(-1);
15722        let m = _mm256_cmple_epi8_mask(a, b);
15723        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15724    }
15725
15726    #[simd_test(enable = "avx512bw,avx512vl")]
15727    const unsafe fn test_mm256_mask_cmple_epi8_mask() {
15728        let a = _mm256_set1_epi8(-1);
15729        let b = _mm256_set1_epi8(-1);
15730        let mask = 0b01010101_01010101_01010101_01010101;
15731        let r = _mm256_mask_cmple_epi8_mask(mask, a, b);
15732        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15733    }
15734
15735    #[simd_test(enable = "avx512bw,avx512vl")]
15736    const unsafe fn test_mm_cmple_epi8_mask() {
15737        let a = _mm_set1_epi8(-1);
15738        let b = _mm_set1_epi8(-1);
15739        let m = _mm_cmple_epi8_mask(a, b);
15740        assert_eq!(m, 0b11111111_11111111);
15741    }
15742
15743    #[simd_test(enable = "avx512bw,avx512vl")]
15744    const unsafe fn test_mm_mask_cmple_epi8_mask() {
15745        let a = _mm_set1_epi8(-1);
15746        let b = _mm_set1_epi8(-1);
15747        let mask = 0b01010101_01010101;
15748        let r = _mm_mask_cmple_epi8_mask(mask, a, b);
15749        assert_eq!(r, 0b01010101_01010101);
15750    }
15751
15752    #[simd_test(enable = "avx512bw")]
15753    const unsafe fn test_mm512_cmpge_epu16_mask() {
15754        let a = _mm512_set1_epi16(1);
15755        let b = _mm512_set1_epi16(1);
15756        let m = _mm512_cmpge_epu16_mask(a, b);
15757        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15758    }
15759
15760    #[simd_test(enable = "avx512bw")]
15761    const unsafe fn test_mm512_mask_cmpge_epu16_mask() {
15762        let a = _mm512_set1_epi16(1);
15763        let b = _mm512_set1_epi16(1);
15764        let mask = 0b01010101_01010101_01010101_01010101;
15765        let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
15766        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15767    }
15768
15769    #[simd_test(enable = "avx512bw,avx512vl")]
15770    const unsafe fn test_mm256_cmpge_epu16_mask() {
15771        let a = _mm256_set1_epi16(1);
15772        let b = _mm256_set1_epi16(1);
15773        let m = _mm256_cmpge_epu16_mask(a, b);
15774        assert_eq!(m, 0b11111111_11111111);
15775    }
15776
15777    #[simd_test(enable = "avx512bw,avx512vl")]
15778    const unsafe fn test_mm256_mask_cmpge_epu16_mask() {
15779        let a = _mm256_set1_epi16(1);
15780        let b = _mm256_set1_epi16(1);
15781        let mask = 0b01010101_01010101;
15782        let r = _mm256_mask_cmpge_epu16_mask(mask, a, b);
15783        assert_eq!(r, 0b01010101_01010101);
15784    }
15785
15786    #[simd_test(enable = "avx512bw,avx512vl")]
15787    const unsafe fn test_mm_cmpge_epu16_mask() {
15788        let a = _mm_set1_epi16(1);
15789        let b = _mm_set1_epi16(1);
15790        let m = _mm_cmpge_epu16_mask(a, b);
15791        assert_eq!(m, 0b11111111);
15792    }
15793
15794    #[simd_test(enable = "avx512bw,avx512vl")]
15795    const unsafe fn test_mm_mask_cmpge_epu16_mask() {
15796        let a = _mm_set1_epi16(1);
15797        let b = _mm_set1_epi16(1);
15798        let mask = 0b01010101;
15799        let r = _mm_mask_cmpge_epu16_mask(mask, a, b);
15800        assert_eq!(r, 0b01010101);
15801    }
15802
15803    #[simd_test(enable = "avx512bw")]
15804    const unsafe fn test_mm512_cmpge_epu8_mask() {
15805        let a = _mm512_set1_epi8(1);
15806        let b = _mm512_set1_epi8(1);
15807        let m = _mm512_cmpge_epu8_mask(a, b);
15808        assert_eq!(
15809            m,
15810            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15811        );
15812    }
15813
15814    #[simd_test(enable = "avx512bw")]
15815    const unsafe fn test_mm512_mask_cmpge_epu8_mask() {
15816        let a = _mm512_set1_epi8(1);
15817        let b = _mm512_set1_epi8(1);
15818        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15819        let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
15820        assert_eq!(
15821            r,
15822            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15823        );
15824    }
15825
15826    #[simd_test(enable = "avx512bw,avx512vl")]
15827    const unsafe fn test_mm256_cmpge_epu8_mask() {
15828        let a = _mm256_set1_epi8(1);
15829        let b = _mm256_set1_epi8(1);
15830        let m = _mm256_cmpge_epu8_mask(a, b);
15831        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15832    }
15833
15834    #[simd_test(enable = "avx512bw,avx512vl")]
15835    const unsafe fn test_mm256_mask_cmpge_epu8_mask() {
15836        let a = _mm256_set1_epi8(1);
15837        let b = _mm256_set1_epi8(1);
15838        let mask = 0b01010101_01010101_01010101_01010101;
15839        let r = _mm256_mask_cmpge_epu8_mask(mask, a, b);
15840        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15841    }
15842
15843    #[simd_test(enable = "avx512bw,avx512vl")]
15844    const unsafe fn test_mm_cmpge_epu8_mask() {
15845        let a = _mm_set1_epi8(1);
15846        let b = _mm_set1_epi8(1);
15847        let m = _mm_cmpge_epu8_mask(a, b);
15848        assert_eq!(m, 0b11111111_11111111);
15849    }
15850
15851    #[simd_test(enable = "avx512bw,avx512vl")]
15852    const unsafe fn test_mm_mask_cmpge_epu8_mask() {
15853        let a = _mm_set1_epi8(1);
15854        let b = _mm_set1_epi8(1);
15855        let mask = 0b01010101_01010101;
15856        let r = _mm_mask_cmpge_epu8_mask(mask, a, b);
15857        assert_eq!(r, 0b01010101_01010101);
15858    }
15859
15860    #[simd_test(enable = "avx512bw")]
15861    const unsafe fn test_mm512_cmpge_epi16_mask() {
15862        let a = _mm512_set1_epi16(-1);
15863        let b = _mm512_set1_epi16(-1);
15864        let m = _mm512_cmpge_epi16_mask(a, b);
15865        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15866    }
15867
15868    #[simd_test(enable = "avx512bw")]
15869    const unsafe fn test_mm512_mask_cmpge_epi16_mask() {
15870        let a = _mm512_set1_epi16(-1);
15871        let b = _mm512_set1_epi16(-1);
15872        let mask = 0b01010101_01010101_01010101_01010101;
15873        let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
15874        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15875    }
15876
15877    #[simd_test(enable = "avx512bw,avx512vl")]
15878    const unsafe fn test_mm256_cmpge_epi16_mask() {
15879        let a = _mm256_set1_epi16(-1);
15880        let b = _mm256_set1_epi16(-1);
15881        let m = _mm256_cmpge_epi16_mask(a, b);
15882        assert_eq!(m, 0b11111111_11111111);
15883    }
15884
15885    #[simd_test(enable = "avx512bw,avx512vl")]
15886    const unsafe fn test_mm256_mask_cmpge_epi16_mask() {
15887        let a = _mm256_set1_epi16(-1);
15888        let b = _mm256_set1_epi16(-1);
15889        let mask = 0b01010101_01010101;
15890        let r = _mm256_mask_cmpge_epi16_mask(mask, a, b);
15891        assert_eq!(r, 0b01010101_01010101);
15892    }
15893
15894    #[simd_test(enable = "avx512bw,avx512vl")]
15895    const unsafe fn test_mm_cmpge_epi16_mask() {
15896        let a = _mm_set1_epi16(-1);
15897        let b = _mm_set1_epi16(-1);
15898        let m = _mm_cmpge_epi16_mask(a, b);
15899        assert_eq!(m, 0b11111111);
15900    }
15901
15902    #[simd_test(enable = "avx512bw,avx512vl")]
15903    const unsafe fn test_mm_mask_cmpge_epi16_mask() {
15904        let a = _mm_set1_epi16(-1);
15905        let b = _mm_set1_epi16(-1);
15906        let mask = 0b01010101;
15907        let r = _mm_mask_cmpge_epi16_mask(mask, a, b);
15908        assert_eq!(r, 0b01010101);
15909    }
15910
15911    #[simd_test(enable = "avx512bw")]
15912    const unsafe fn test_mm512_cmpge_epi8_mask() {
15913        let a = _mm512_set1_epi8(-1);
15914        let b = _mm512_set1_epi8(-1);
15915        let m = _mm512_cmpge_epi8_mask(a, b);
15916        assert_eq!(
15917            m,
15918            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15919        );
15920    }
15921
15922    #[simd_test(enable = "avx512bw")]
15923    const unsafe fn test_mm512_mask_cmpge_epi8_mask() {
15924        let a = _mm512_set1_epi8(-1);
15925        let b = _mm512_set1_epi8(-1);
15926        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15927        let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
15928        assert_eq!(
15929            r,
15930            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15931        );
15932    }
15933
15934    #[simd_test(enable = "avx512bw,avx512vl")]
15935    const unsafe fn test_mm256_cmpge_epi8_mask() {
15936        let a = _mm256_set1_epi8(-1);
15937        let b = _mm256_set1_epi8(-1);
15938        let m = _mm256_cmpge_epi8_mask(a, b);
15939        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15940    }
15941
15942    #[simd_test(enable = "avx512bw,avx512vl")]
15943    const unsafe fn test_mm256_mask_cmpge_epi8_mask() {
15944        let a = _mm256_set1_epi8(-1);
15945        let b = _mm256_set1_epi8(-1);
15946        let mask = 0b01010101_01010101_01010101_01010101;
15947        let r = _mm256_mask_cmpge_epi8_mask(mask, a, b);
15948        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15949    }
15950
15951    #[simd_test(enable = "avx512bw,avx512vl")]
15952    const unsafe fn test_mm_cmpge_epi8_mask() {
15953        let a = _mm_set1_epi8(-1);
15954        let b = _mm_set1_epi8(-1);
15955        let m = _mm_cmpge_epi8_mask(a, b);
15956        assert_eq!(m, 0b11111111_11111111);
15957    }
15958
15959    #[simd_test(enable = "avx512bw,avx512vl")]
15960    const unsafe fn test_mm_mask_cmpge_epi8_mask() {
15961        let a = _mm_set1_epi8(-1);
15962        let b = _mm_set1_epi8(-1);
15963        let mask = 0b01010101_01010101;
15964        let r = _mm_mask_cmpge_epi8_mask(mask, a, b);
15965        assert_eq!(r, 0b01010101_01010101);
15966    }
15967
15968    #[simd_test(enable = "avx512bw")]
15969    const unsafe fn test_mm512_cmpeq_epu16_mask() {
15970        let a = _mm512_set1_epi16(1);
15971        let b = _mm512_set1_epi16(1);
15972        let m = _mm512_cmpeq_epu16_mask(a, b);
15973        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15974    }
15975
15976    #[simd_test(enable = "avx512bw")]
15977    const unsafe fn test_mm512_mask_cmpeq_epu16_mask() {
15978        let a = _mm512_set1_epi16(1);
15979        let b = _mm512_set1_epi16(1);
15980        let mask = 0b01010101_01010101_01010101_01010101;
15981        let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
15982        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15983    }
15984
15985    #[simd_test(enable = "avx512bw,avx512vl")]
15986    const unsafe fn test_mm256_cmpeq_epu16_mask() {
15987        let a = _mm256_set1_epi16(1);
15988        let b = _mm256_set1_epi16(1);
15989        let m = _mm256_cmpeq_epu16_mask(a, b);
15990        assert_eq!(m, 0b11111111_11111111);
15991    }
15992
15993    #[simd_test(enable = "avx512bw,avx512vl")]
15994    const unsafe fn test_mm256_mask_cmpeq_epu16_mask() {
15995        let a = _mm256_set1_epi16(1);
15996        let b = _mm256_set1_epi16(1);
15997        let mask = 0b01010101_01010101;
15998        let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b);
15999        assert_eq!(r, 0b01010101_01010101);
16000    }
16001
16002    #[simd_test(enable = "avx512bw,avx512vl")]
16003    const unsafe fn test_mm_cmpeq_epu16_mask() {
16004        let a = _mm_set1_epi16(1);
16005        let b = _mm_set1_epi16(1);
16006        let m = _mm_cmpeq_epu16_mask(a, b);
16007        assert_eq!(m, 0b11111111);
16008    }
16009
16010    #[simd_test(enable = "avx512bw,avx512vl")]
16011    const unsafe fn test_mm_mask_cmpeq_epu16_mask() {
16012        let a = _mm_set1_epi16(1);
16013        let b = _mm_set1_epi16(1);
16014        let mask = 0b01010101;
16015        let r = _mm_mask_cmpeq_epu16_mask(mask, a, b);
16016        assert_eq!(r, 0b01010101);
16017    }
16018
16019    #[simd_test(enable = "avx512bw")]
16020    const unsafe fn test_mm512_cmpeq_epu8_mask() {
16021        let a = _mm512_set1_epi8(1);
16022        let b = _mm512_set1_epi8(1);
16023        let m = _mm512_cmpeq_epu8_mask(a, b);
16024        assert_eq!(
16025            m,
16026            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16027        );
16028    }
16029
16030    #[simd_test(enable = "avx512bw")]
16031    const unsafe fn test_mm512_mask_cmpeq_epu8_mask() {
16032        let a = _mm512_set1_epi8(1);
16033        let b = _mm512_set1_epi8(1);
16034        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16035        let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
16036        assert_eq!(
16037            r,
16038            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16039        );
16040    }
16041
16042    #[simd_test(enable = "avx512bw,avx512vl")]
16043    const unsafe fn test_mm256_cmpeq_epu8_mask() {
16044        let a = _mm256_set1_epi8(1);
16045        let b = _mm256_set1_epi8(1);
16046        let m = _mm256_cmpeq_epu8_mask(a, b);
16047        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16048    }
16049
16050    #[simd_test(enable = "avx512bw,avx512vl")]
16051    const unsafe fn test_mm256_mask_cmpeq_epu8_mask() {
16052        let a = _mm256_set1_epi8(1);
16053        let b = _mm256_set1_epi8(1);
16054        let mask = 0b01010101_01010101_01010101_01010101;
16055        let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b);
16056        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16057    }
16058
16059    #[simd_test(enable = "avx512bw,avx512vl")]
16060    const unsafe fn test_mm_cmpeq_epu8_mask() {
16061        let a = _mm_set1_epi8(1);
16062        let b = _mm_set1_epi8(1);
16063        let m = _mm_cmpeq_epu8_mask(a, b);
16064        assert_eq!(m, 0b11111111_11111111);
16065    }
16066
16067    #[simd_test(enable = "avx512bw,avx512vl")]
16068    const unsafe fn test_mm_mask_cmpeq_epu8_mask() {
16069        let a = _mm_set1_epi8(1);
16070        let b = _mm_set1_epi8(1);
16071        let mask = 0b01010101_01010101;
16072        let r = _mm_mask_cmpeq_epu8_mask(mask, a, b);
16073        assert_eq!(r, 0b01010101_01010101);
16074    }
16075
16076    #[simd_test(enable = "avx512bw")]
16077    const unsafe fn test_mm512_cmpeq_epi16_mask() {
16078        let a = _mm512_set1_epi16(-1);
16079        let b = _mm512_set1_epi16(-1);
16080        let m = _mm512_cmpeq_epi16_mask(a, b);
16081        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16082    }
16083
16084    #[simd_test(enable = "avx512bw")]
16085    const unsafe fn test_mm512_mask_cmpeq_epi16_mask() {
16086        let a = _mm512_set1_epi16(-1);
16087        let b = _mm512_set1_epi16(-1);
16088        let mask = 0b01010101_01010101_01010101_01010101;
16089        let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
16090        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16091    }
16092
16093    #[simd_test(enable = "avx512bw,avx512vl")]
16094    const unsafe fn test_mm256_cmpeq_epi16_mask() {
16095        let a = _mm256_set1_epi16(-1);
16096        let b = _mm256_set1_epi16(-1);
16097        let m = _mm256_cmpeq_epi16_mask(a, b);
16098        assert_eq!(m, 0b11111111_11111111);
16099    }
16100
16101    #[simd_test(enable = "avx512bw,avx512vl")]
16102    const unsafe fn test_mm256_mask_cmpeq_epi16_mask() {
16103        let a = _mm256_set1_epi16(-1);
16104        let b = _mm256_set1_epi16(-1);
16105        let mask = 0b01010101_01010101;
16106        let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b);
16107        assert_eq!(r, 0b01010101_01010101);
16108    }
16109
16110    #[simd_test(enable = "avx512bw,avx512vl")]
16111    const unsafe fn test_mm_cmpeq_epi16_mask() {
16112        let a = _mm_set1_epi16(-1);
16113        let b = _mm_set1_epi16(-1);
16114        let m = _mm_cmpeq_epi16_mask(a, b);
16115        assert_eq!(m, 0b11111111);
16116    }
16117
16118    #[simd_test(enable = "avx512bw,avx512vl")]
16119    const unsafe fn test_mm_mask_cmpeq_epi16_mask() {
16120        let a = _mm_set1_epi16(-1);
16121        let b = _mm_set1_epi16(-1);
16122        let mask = 0b01010101;
16123        let r = _mm_mask_cmpeq_epi16_mask(mask, a, b);
16124        assert_eq!(r, 0b01010101);
16125    }
16126
16127    #[simd_test(enable = "avx512bw")]
16128    const unsafe fn test_mm512_cmpeq_epi8_mask() {
16129        let a = _mm512_set1_epi8(-1);
16130        let b = _mm512_set1_epi8(-1);
16131        let m = _mm512_cmpeq_epi8_mask(a, b);
16132        assert_eq!(
16133            m,
16134            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16135        );
16136    }
16137
16138    #[simd_test(enable = "avx512bw")]
16139    const unsafe fn test_mm512_mask_cmpeq_epi8_mask() {
16140        let a = _mm512_set1_epi8(-1);
16141        let b = _mm512_set1_epi8(-1);
16142        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16143        let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
16144        assert_eq!(
16145            r,
16146            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16147        );
16148    }
16149
16150    #[simd_test(enable = "avx512bw,avx512vl")]
16151    const unsafe fn test_mm256_cmpeq_epi8_mask() {
16152        let a = _mm256_set1_epi8(-1);
16153        let b = _mm256_set1_epi8(-1);
16154        let m = _mm256_cmpeq_epi8_mask(a, b);
16155        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16156    }
16157
16158    #[simd_test(enable = "avx512bw,avx512vl")]
16159    const unsafe fn test_mm256_mask_cmpeq_epi8_mask() {
16160        let a = _mm256_set1_epi8(-1);
16161        let b = _mm256_set1_epi8(-1);
16162        let mask = 0b01010101_01010101_01010101_01010101;
16163        let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b);
16164        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16165    }
16166
16167    #[simd_test(enable = "avx512bw,avx512vl")]
16168    const unsafe fn test_mm_cmpeq_epi8_mask() {
16169        let a = _mm_set1_epi8(-1);
16170        let b = _mm_set1_epi8(-1);
16171        let m = _mm_cmpeq_epi8_mask(a, b);
16172        assert_eq!(m, 0b11111111_11111111);
16173    }
16174
16175    #[simd_test(enable = "avx512bw,avx512vl")]
16176    const unsafe fn test_mm_mask_cmpeq_epi8_mask() {
16177        let a = _mm_set1_epi8(-1);
16178        let b = _mm_set1_epi8(-1);
16179        let mask = 0b01010101_01010101;
16180        let r = _mm_mask_cmpeq_epi8_mask(mask, a, b);
16181        assert_eq!(r, 0b01010101_01010101);
16182    }
16183
16184    #[simd_test(enable = "avx512bw")]
16185    const unsafe fn test_mm512_cmpneq_epu16_mask() {
16186        let a = _mm512_set1_epi16(2);
16187        let b = _mm512_set1_epi16(1);
16188        let m = _mm512_cmpneq_epu16_mask(a, b);
16189        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16190    }
16191
16192    #[simd_test(enable = "avx512bw")]
16193    const unsafe fn test_mm512_mask_cmpneq_epu16_mask() {
16194        let a = _mm512_set1_epi16(2);
16195        let b = _mm512_set1_epi16(1);
16196        let mask = 0b01010101_01010101_01010101_01010101;
16197        let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
16198        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16199    }
16200
16201    #[simd_test(enable = "avx512bw,avx512vl")]
16202    const unsafe fn test_mm256_cmpneq_epu16_mask() {
16203        let a = _mm256_set1_epi16(2);
16204        let b = _mm256_set1_epi16(1);
16205        let m = _mm256_cmpneq_epu16_mask(a, b);
16206        assert_eq!(m, 0b11111111_11111111);
16207    }
16208
16209    #[simd_test(enable = "avx512bw,avx512vl")]
16210    const unsafe fn test_mm256_mask_cmpneq_epu16_mask() {
16211        let a = _mm256_set1_epi16(2);
16212        let b = _mm256_set1_epi16(1);
16213        let mask = 0b01010101_01010101;
16214        let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b);
16215        assert_eq!(r, 0b01010101_01010101);
16216    }
16217
16218    #[simd_test(enable = "avx512bw,avx512vl")]
16219    const unsafe fn test_mm_cmpneq_epu16_mask() {
16220        let a = _mm_set1_epi16(2);
16221        let b = _mm_set1_epi16(1);
16222        let m = _mm_cmpneq_epu16_mask(a, b);
16223        assert_eq!(m, 0b11111111);
16224    }
16225
16226    #[simd_test(enable = "avx512bw,avx512vl")]
16227    const unsafe fn test_mm_mask_cmpneq_epu16_mask() {
16228        let a = _mm_set1_epi16(2);
16229        let b = _mm_set1_epi16(1);
16230        let mask = 0b01010101;
16231        let r = _mm_mask_cmpneq_epu16_mask(mask, a, b);
16232        assert_eq!(r, 0b01010101);
16233    }
16234
16235    #[simd_test(enable = "avx512bw")]
16236    const unsafe fn test_mm512_cmpneq_epu8_mask() {
16237        let a = _mm512_set1_epi8(2);
16238        let b = _mm512_set1_epi8(1);
16239        let m = _mm512_cmpneq_epu8_mask(a, b);
16240        assert_eq!(
16241            m,
16242            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16243        );
16244    }
16245
16246    #[simd_test(enable = "avx512bw")]
16247    const unsafe fn test_mm512_mask_cmpneq_epu8_mask() {
16248        let a = _mm512_set1_epi8(2);
16249        let b = _mm512_set1_epi8(1);
16250        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16251        let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
16252        assert_eq!(
16253            r,
16254            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16255        );
16256    }
16257
16258    #[simd_test(enable = "avx512bw,avx512vl")]
16259    const unsafe fn test_mm256_cmpneq_epu8_mask() {
16260        let a = _mm256_set1_epi8(2);
16261        let b = _mm256_set1_epi8(1);
16262        let m = _mm256_cmpneq_epu8_mask(a, b);
16263        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16264    }
16265
16266    #[simd_test(enable = "avx512bw,avx512vl")]
16267    const unsafe fn test_mm256_mask_cmpneq_epu8_mask() {
16268        let a = _mm256_set1_epi8(2);
16269        let b = _mm256_set1_epi8(1);
16270        let mask = 0b01010101_01010101_01010101_01010101;
16271        let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b);
16272        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16273    }
16274
16275    #[simd_test(enable = "avx512bw,avx512vl")]
16276    const unsafe fn test_mm_cmpneq_epu8_mask() {
16277        let a = _mm_set1_epi8(2);
16278        let b = _mm_set1_epi8(1);
16279        let m = _mm_cmpneq_epu8_mask(a, b);
16280        assert_eq!(m, 0b11111111_11111111);
16281    }
16282
16283    #[simd_test(enable = "avx512bw,avx512vl")]
16284    const unsafe fn test_mm_mask_cmpneq_epu8_mask() {
16285        let a = _mm_set1_epi8(2);
16286        let b = _mm_set1_epi8(1);
16287        let mask = 0b01010101_01010101;
16288        let r = _mm_mask_cmpneq_epu8_mask(mask, a, b);
16289        assert_eq!(r, 0b01010101_01010101);
16290    }
16291
16292    #[simd_test(enable = "avx512bw")]
16293    const unsafe fn test_mm512_cmpneq_epi16_mask() {
16294        let a = _mm512_set1_epi16(1);
16295        let b = _mm512_set1_epi16(-1);
16296        let m = _mm512_cmpneq_epi16_mask(a, b);
16297        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16298    }
16299
16300    #[simd_test(enable = "avx512bw")]
16301    const unsafe fn test_mm512_mask_cmpneq_epi16_mask() {
16302        let a = _mm512_set1_epi16(1);
16303        let b = _mm512_set1_epi16(-1);
16304        let mask = 0b01010101_01010101_01010101_01010101;
16305        let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
16306        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16307    }
16308
16309    #[simd_test(enable = "avx512bw,avx512vl")]
16310    const unsafe fn test_mm256_cmpneq_epi16_mask() {
16311        let a = _mm256_set1_epi16(1);
16312        let b = _mm256_set1_epi16(-1);
16313        let m = _mm256_cmpneq_epi16_mask(a, b);
16314        assert_eq!(m, 0b11111111_11111111);
16315    }
16316
16317    #[simd_test(enable = "avx512bw,avx512vl")]
16318    const unsafe fn test_mm256_mask_cmpneq_epi16_mask() {
16319        let a = _mm256_set1_epi16(1);
16320        let b = _mm256_set1_epi16(-1);
16321        let mask = 0b01010101_01010101;
16322        let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b);
16323        assert_eq!(r, 0b01010101_01010101);
16324    }
16325
16326    #[simd_test(enable = "avx512bw,avx512vl")]
16327    const unsafe fn test_mm_cmpneq_epi16_mask() {
16328        let a = _mm_set1_epi16(1);
16329        let b = _mm_set1_epi16(-1);
16330        let m = _mm_cmpneq_epi16_mask(a, b);
16331        assert_eq!(m, 0b11111111);
16332    }
16333
16334    #[simd_test(enable = "avx512bw,avx512vl")]
16335    const unsafe fn test_mm_mask_cmpneq_epi16_mask() {
16336        let a = _mm_set1_epi16(1);
16337        let b = _mm_set1_epi16(-1);
16338        let mask = 0b01010101;
16339        let r = _mm_mask_cmpneq_epi16_mask(mask, a, b);
16340        assert_eq!(r, 0b01010101);
16341    }
16342
16343    #[simd_test(enable = "avx512bw")]
16344    const unsafe fn test_mm512_cmpneq_epi8_mask() {
16345        let a = _mm512_set1_epi8(1);
16346        let b = _mm512_set1_epi8(-1);
16347        let m = _mm512_cmpneq_epi8_mask(a, b);
16348        assert_eq!(
16349            m,
16350            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16351        );
16352    }
16353
16354    #[simd_test(enable = "avx512bw")]
16355    const unsafe fn test_mm512_mask_cmpneq_epi8_mask() {
16356        let a = _mm512_set1_epi8(1);
16357        let b = _mm512_set1_epi8(-1);
16358        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16359        let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
16360        assert_eq!(
16361            r,
16362            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16363        );
16364    }
16365
16366    #[simd_test(enable = "avx512bw,avx512vl")]
16367    const unsafe fn test_mm256_cmpneq_epi8_mask() {
16368        let a = _mm256_set1_epi8(1);
16369        let b = _mm256_set1_epi8(-1);
16370        let m = _mm256_cmpneq_epi8_mask(a, b);
16371        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16372    }
16373
16374    #[simd_test(enable = "avx512bw,avx512vl")]
16375    const unsafe fn test_mm256_mask_cmpneq_epi8_mask() {
16376        let a = _mm256_set1_epi8(1);
16377        let b = _mm256_set1_epi8(-1);
16378        let mask = 0b01010101_01010101_01010101_01010101;
16379        let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b);
16380        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16381    }
16382
16383    #[simd_test(enable = "avx512bw,avx512vl")]
16384    const unsafe fn test_mm_cmpneq_epi8_mask() {
16385        let a = _mm_set1_epi8(1);
16386        let b = _mm_set1_epi8(-1);
16387        let m = _mm_cmpneq_epi8_mask(a, b);
16388        assert_eq!(m, 0b11111111_11111111);
16389    }
16390
16391    #[simd_test(enable = "avx512bw,avx512vl")]
16392    const unsafe fn test_mm_mask_cmpneq_epi8_mask() {
16393        let a = _mm_set1_epi8(1);
16394        let b = _mm_set1_epi8(-1);
16395        let mask = 0b01010101_01010101;
16396        let r = _mm_mask_cmpneq_epi8_mask(mask, a, b);
16397        assert_eq!(r, 0b01010101_01010101);
16398    }
16399
16400    #[simd_test(enable = "avx512bw")]
16401    const unsafe fn test_mm512_cmp_epu16_mask() {
16402        let a = _mm512_set1_epi16(0);
16403        let b = _mm512_set1_epi16(1);
16404        let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
16405        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16406    }
16407
16408    #[simd_test(enable = "avx512bw")]
16409    const unsafe fn test_mm512_mask_cmp_epu16_mask() {
16410        let a = _mm512_set1_epi16(0);
16411        let b = _mm512_set1_epi16(1);
16412        let mask = 0b01010101_01010101_01010101_01010101;
16413        let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
16414        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16415    }
16416
16417    #[simd_test(enable = "avx512bw,avx512vl")]
16418    const unsafe fn test_mm256_cmp_epu16_mask() {
16419        let a = _mm256_set1_epi16(0);
16420        let b = _mm256_set1_epi16(1);
16421        let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
16422        assert_eq!(m, 0b11111111_11111111);
16423    }
16424
16425    #[simd_test(enable = "avx512bw,avx512vl")]
16426    const unsafe fn test_mm256_mask_cmp_epu16_mask() {
16427        let a = _mm256_set1_epi16(0);
16428        let b = _mm256_set1_epi16(1);
16429        let mask = 0b01010101_01010101;
16430        let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
16431        assert_eq!(r, 0b01010101_01010101);
16432    }
16433
16434    #[simd_test(enable = "avx512bw,avx512vl")]
16435    const unsafe fn test_mm_cmp_epu16_mask() {
16436        let a = _mm_set1_epi16(0);
16437        let b = _mm_set1_epi16(1);
16438        let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
16439        assert_eq!(m, 0b11111111);
16440    }
16441
16442    #[simd_test(enable = "avx512bw,avx512vl")]
16443    const unsafe fn test_mm_mask_cmp_epu16_mask() {
16444        let a = _mm_set1_epi16(0);
16445        let b = _mm_set1_epi16(1);
16446        let mask = 0b01010101;
16447        let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
16448        assert_eq!(r, 0b01010101);
16449    }
16450
16451    #[simd_test(enable = "avx512bw")]
16452    const unsafe fn test_mm512_cmp_epu8_mask() {
16453        let a = _mm512_set1_epi8(0);
16454        let b = _mm512_set1_epi8(1);
16455        let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
16456        assert_eq!(
16457            m,
16458            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16459        );
16460    }
16461
16462    #[simd_test(enable = "avx512bw")]
16463    const unsafe fn test_mm512_mask_cmp_epu8_mask() {
16464        let a = _mm512_set1_epi8(0);
16465        let b = _mm512_set1_epi8(1);
16466        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16467        let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
16468        assert_eq!(
16469            r,
16470            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16471        );
16472    }
16473
16474    #[simd_test(enable = "avx512bw,avx512vl")]
16475    const unsafe fn test_mm256_cmp_epu8_mask() {
16476        let a = _mm256_set1_epi8(0);
16477        let b = _mm256_set1_epi8(1);
16478        let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
16479        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16480    }
16481
16482    #[simd_test(enable = "avx512bw,avx512vl")]
16483    const unsafe fn test_mm256_mask_cmp_epu8_mask() {
16484        let a = _mm256_set1_epi8(0);
16485        let b = _mm256_set1_epi8(1);
16486        let mask = 0b01010101_01010101_01010101_01010101;
16487        let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
16488        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16489    }
16490
16491    #[simd_test(enable = "avx512bw,avx512vl")]
16492    const unsafe fn test_mm_cmp_epu8_mask() {
16493        let a = _mm_set1_epi8(0);
16494        let b = _mm_set1_epi8(1);
16495        let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
16496        assert_eq!(m, 0b11111111_11111111);
16497    }
16498
16499    #[simd_test(enable = "avx512bw,avx512vl")]
16500    const unsafe fn test_mm_mask_cmp_epu8_mask() {
16501        let a = _mm_set1_epi8(0);
16502        let b = _mm_set1_epi8(1);
16503        let mask = 0b01010101_01010101;
16504        let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
16505        assert_eq!(r, 0b01010101_01010101);
16506    }
16507
16508    #[simd_test(enable = "avx512bw")]
16509    const unsafe fn test_mm512_cmp_epi16_mask() {
16510        let a = _mm512_set1_epi16(0);
16511        let b = _mm512_set1_epi16(1);
16512        let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
16513        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16514    }
16515
16516    #[simd_test(enable = "avx512bw")]
16517    const unsafe fn test_mm512_mask_cmp_epi16_mask() {
16518        let a = _mm512_set1_epi16(0);
16519        let b = _mm512_set1_epi16(1);
16520        let mask = 0b01010101_01010101_01010101_01010101;
16521        let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
16522        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16523    }
16524
16525    #[simd_test(enable = "avx512bw,avx512vl")]
16526    const unsafe fn test_mm256_cmp_epi16_mask() {
16527        let a = _mm256_set1_epi16(0);
16528        let b = _mm256_set1_epi16(1);
16529        let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
16530        assert_eq!(m, 0b11111111_11111111);
16531    }
16532
16533    #[simd_test(enable = "avx512bw,avx512vl")]
16534    const unsafe fn test_mm256_mask_cmp_epi16_mask() {
16535        let a = _mm256_set1_epi16(0);
16536        let b = _mm256_set1_epi16(1);
16537        let mask = 0b01010101_01010101;
16538        let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
16539        assert_eq!(r, 0b01010101_01010101);
16540    }
16541
16542    #[simd_test(enable = "avx512bw,avx512vl")]
16543    const unsafe fn test_mm_cmp_epi16_mask() {
16544        let a = _mm_set1_epi16(0);
16545        let b = _mm_set1_epi16(1);
16546        let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
16547        assert_eq!(m, 0b11111111);
16548    }
16549
16550    #[simd_test(enable = "avx512bw,avx512vl")]
16551    const unsafe fn test_mm_mask_cmp_epi16_mask() {
16552        let a = _mm_set1_epi16(0);
16553        let b = _mm_set1_epi16(1);
16554        let mask = 0b01010101;
16555        let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
16556        assert_eq!(r, 0b01010101);
16557    }
16558
16559    #[simd_test(enable = "avx512bw")]
16560    const unsafe fn test_mm512_cmp_epi8_mask() {
16561        let a = _mm512_set1_epi8(0);
16562        let b = _mm512_set1_epi8(1);
16563        let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
16564        assert_eq!(
16565            m,
16566            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
16567        );
16568    }
16569
16570    #[simd_test(enable = "avx512bw")]
16571    const unsafe fn test_mm512_mask_cmp_epi8_mask() {
16572        let a = _mm512_set1_epi8(0);
16573        let b = _mm512_set1_epi8(1);
16574        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
16575        let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
16576        assert_eq!(
16577            r,
16578            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
16579        );
16580    }
16581
16582    #[simd_test(enable = "avx512bw,avx512vl")]
16583    const unsafe fn test_mm256_cmp_epi8_mask() {
16584        let a = _mm256_set1_epi8(0);
16585        let b = _mm256_set1_epi8(1);
16586        let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
16587        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
16588    }
16589
16590    #[simd_test(enable = "avx512bw,avx512vl")]
16591    const unsafe fn test_mm256_mask_cmp_epi8_mask() {
16592        let a = _mm256_set1_epi8(0);
16593        let b = _mm256_set1_epi8(1);
16594        let mask = 0b01010101_01010101_01010101_01010101;
16595        let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
16596        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
16597    }
16598
16599    #[simd_test(enable = "avx512bw,avx512vl")]
16600    const unsafe fn test_mm_cmp_epi8_mask() {
16601        let a = _mm_set1_epi8(0);
16602        let b = _mm_set1_epi8(1);
16603        let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
16604        assert_eq!(m, 0b11111111_11111111);
16605    }
16606
16607    #[simd_test(enable = "avx512bw,avx512vl")]
16608    const unsafe fn test_mm_mask_cmp_epi8_mask() {
16609        let a = _mm_set1_epi8(0);
16610        let b = _mm_set1_epi8(1);
16611        let mask = 0b01010101_01010101;
16612        let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
16613        assert_eq!(r, 0b01010101_01010101);
16614    }
16615
16616    #[simd_test(enable = "avx512bw,avx512vl")]
16617    const unsafe fn test_mm256_reduce_add_epi16() {
16618        let a = _mm256_set1_epi16(1);
16619        let e = _mm256_reduce_add_epi16(a);
16620        assert_eq!(16, e);
16621    }
16622
16623    #[simd_test(enable = "avx512bw,avx512vl")]
16624    const unsafe fn test_mm256_mask_reduce_add_epi16() {
16625        let a = _mm256_set1_epi16(1);
16626        let e = _mm256_mask_reduce_add_epi16(0b11111111_00000000, a);
16627        assert_eq!(8, e);
16628    }
16629
16630    #[simd_test(enable = "avx512bw,avx512vl")]
16631    const unsafe fn test_mm_reduce_add_epi16() {
16632        let a = _mm_set1_epi16(1);
16633        let e = _mm_reduce_add_epi16(a);
16634        assert_eq!(8, e);
16635    }
16636
16637    #[simd_test(enable = "avx512bw,avx512vl")]
16638    const unsafe fn test_mm_mask_reduce_add_epi16() {
16639        let a = _mm_set1_epi16(1);
16640        let e = _mm_mask_reduce_add_epi16(0b11110000, a);
16641        assert_eq!(4, e);
16642    }
16643
16644    #[simd_test(enable = "avx512bw,avx512vl")]
16645    const unsafe fn test_mm256_reduce_add_epi8() {
16646        let a = _mm256_set1_epi8(1);
16647        let e = _mm256_reduce_add_epi8(a);
16648        assert_eq!(32, e);
16649    }
16650
16651    #[simd_test(enable = "avx512bw,avx512vl")]
16652    const unsafe fn test_mm256_mask_reduce_add_epi8() {
16653        let a = _mm256_set1_epi8(1);
16654        let e = _mm256_mask_reduce_add_epi8(0b11111111_00000000_11111111_00000000, a);
16655        assert_eq!(16, e);
16656    }
16657
16658    #[simd_test(enable = "avx512bw,avx512vl")]
16659    const unsafe fn test_mm_reduce_add_epi8() {
16660        let a = _mm_set1_epi8(1);
16661        let e = _mm_reduce_add_epi8(a);
16662        assert_eq!(16, e);
16663    }
16664
16665    #[simd_test(enable = "avx512bw,avx512vl")]
16666    const unsafe fn test_mm_mask_reduce_add_epi8() {
16667        let a = _mm_set1_epi8(1);
16668        let e = _mm_mask_reduce_add_epi8(0b11111111_00000000, a);
16669        assert_eq!(8, e);
16670    }
16671
16672    #[simd_test(enable = "avx512bw,avx512vl")]
16673    const unsafe fn test_mm256_reduce_and_epi16() {
16674        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16675        let e = _mm256_reduce_and_epi16(a);
16676        assert_eq!(0, e);
16677    }
16678
16679    #[simd_test(enable = "avx512bw,avx512vl")]
16680    const unsafe fn test_mm256_mask_reduce_and_epi16() {
16681        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16682        let e = _mm256_mask_reduce_and_epi16(0b11111111_00000000, a);
16683        assert_eq!(1, e);
16684    }
16685
16686    #[simd_test(enable = "avx512bw,avx512vl")]
16687    const unsafe fn test_mm_reduce_and_epi16() {
16688        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16689        let e = _mm_reduce_and_epi16(a);
16690        assert_eq!(0, e);
16691    }
16692
16693    #[simd_test(enable = "avx512bw,avx512vl")]
16694    const unsafe fn test_mm_mask_reduce_and_epi16() {
16695        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16696        let e = _mm_mask_reduce_and_epi16(0b11110000, a);
16697        assert_eq!(1, e);
16698    }
16699
16700    #[simd_test(enable = "avx512bw,avx512vl")]
16701    const unsafe fn test_mm256_reduce_and_epi8() {
16702        let a = _mm256_set_epi8(
16703            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16704            2, 2, 2,
16705        );
16706        let e = _mm256_reduce_and_epi8(a);
16707        assert_eq!(0, e);
16708    }
16709
16710    #[simd_test(enable = "avx512bw,avx512vl")]
16711    const unsafe fn test_mm256_mask_reduce_and_epi8() {
16712        let a = _mm256_set_epi8(
16713            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
16714            2, 2, 2,
16715        );
16716        let e = _mm256_mask_reduce_and_epi8(0b11111111_00000000_11111111_00000000, a);
16717        assert_eq!(1, e);
16718    }
16719
16720    #[simd_test(enable = "avx512bw,avx512vl")]
16721    const unsafe fn test_mm_reduce_and_epi8() {
16722        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16723        let e = _mm_reduce_and_epi8(a);
16724        assert_eq!(0, e);
16725    }
16726
16727    #[simd_test(enable = "avx512bw,avx512vl")]
16728    const unsafe fn test_mm_mask_reduce_and_epi8() {
16729        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16730        let e = _mm_mask_reduce_and_epi8(0b11111111_00000000, a);
16731        assert_eq!(1, e);
16732    }
16733
16734    #[simd_test(enable = "avx512bw,avx512vl")]
16735    const unsafe fn test_mm256_reduce_mul_epi16() {
16736        let a = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
16737        let e = _mm256_reduce_mul_epi16(a);
16738        assert_eq!(256, e);
16739    }
16740
16741    #[simd_test(enable = "avx512bw,avx512vl")]
16742    const unsafe fn test_mm256_mask_reduce_mul_epi16() {
16743        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
16744        let e = _mm256_mask_reduce_mul_epi16(0b11111111_00000000, a);
16745        assert_eq!(1, e);
16746    }
16747
16748    #[simd_test(enable = "avx512bw,avx512vl")]
16749    const unsafe fn test_mm_reduce_mul_epi16() {
16750        let a = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
16751        let e = _mm_reduce_mul_epi16(a);
16752        assert_eq!(16, e);
16753    }
16754
16755    #[simd_test(enable = "avx512bw,avx512vl")]
16756    const unsafe fn test_mm_mask_reduce_mul_epi16() {
16757        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
16758        let e = _mm_mask_reduce_mul_epi16(0b11110000, a);
16759        assert_eq!(1, e);
16760    }
16761
16762    #[simd_test(enable = "avx512bw,avx512vl")]
16763    const unsafe fn test_mm256_reduce_mul_epi8() {
16764        let a = _mm256_set_epi8(
16765            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16766            2, 2, 2,
16767        );
16768        let e = _mm256_reduce_mul_epi8(a);
16769        assert_eq!(64, e);
16770    }
16771
16772    #[simd_test(enable = "avx512bw,avx512vl")]
16773    const unsafe fn test_mm256_mask_reduce_mul_epi8() {
16774        let a = _mm256_set_epi8(
16775            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16776            2, 2, 2,
16777        );
16778        let e = _mm256_mask_reduce_mul_epi8(0b11111111_00000000_11111111_00000000, a);
16779        assert_eq!(1, e);
16780    }
16781
16782    #[simd_test(enable = "avx512bw,avx512vl")]
16783    const unsafe fn test_mm_reduce_mul_epi8() {
16784        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
16785        let e = _mm_reduce_mul_epi8(a);
16786        assert_eq!(8, e);
16787    }
16788
16789    #[simd_test(enable = "avx512bw,avx512vl")]
16790    const unsafe fn test_mm_mask_reduce_mul_epi8() {
16791        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
16792        let e = _mm_mask_reduce_mul_epi8(0b11111111_00000000, a);
16793        assert_eq!(1, e);
16794    }
16795
16796    #[simd_test(enable = "avx512bw,avx512vl")]
16797    const unsafe fn test_mm256_reduce_max_epi16() {
16798        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16799        let e: i16 = _mm256_reduce_max_epi16(a);
16800        assert_eq!(15, e);
16801    }
16802
16803    #[simd_test(enable = "avx512bw,avx512vl")]
16804    const unsafe fn test_mm256_mask_reduce_max_epi16() {
16805        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16806        let e: i16 = _mm256_mask_reduce_max_epi16(0b11111111_00000000, a);
16807        assert_eq!(7, e);
16808    }
16809
16810    #[simd_test(enable = "avx512bw,avx512vl")]
16811    const unsafe fn test_mm_reduce_max_epi16() {
16812        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16813        let e: i16 = _mm_reduce_max_epi16(a);
16814        assert_eq!(7, e);
16815    }
16816
16817    #[simd_test(enable = "avx512bw,avx512vl")]
16818    const unsafe fn test_mm_mask_reduce_max_epi16() {
16819        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16820        let e: i16 = _mm_mask_reduce_max_epi16(0b11110000, a);
16821        assert_eq!(3, e);
16822    }
16823
16824    #[simd_test(enable = "avx512bw,avx512vl")]
16825    const unsafe fn test_mm256_reduce_max_epi8() {
16826        let a = _mm256_set_epi8(
16827            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16828            24, 25, 26, 27, 28, 29, 30, 31,
16829        );
16830        let e: i8 = _mm256_reduce_max_epi8(a);
16831        assert_eq!(31, e);
16832    }
16833
16834    #[simd_test(enable = "avx512bw,avx512vl")]
16835    const unsafe fn test_mm256_mask_reduce_max_epi8() {
16836        let a = _mm256_set_epi8(
16837            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16838            24, 25, 26, 27, 28, 29, 30, 31,
16839        );
16840        let e: i8 = _mm256_mask_reduce_max_epi8(0b1111111111111111_0000000000000000, a);
16841        assert_eq!(15, e);
16842    }
16843
16844    #[simd_test(enable = "avx512bw,avx512vl")]
16845    const unsafe fn test_mm_reduce_max_epi8() {
16846        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16847        let e: i8 = _mm_reduce_max_epi8(a);
16848        assert_eq!(15, e);
16849    }
16850
16851    #[simd_test(enable = "avx512bw,avx512vl")]
16852    const unsafe fn test_mm_mask_reduce_max_epi8() {
16853        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16854        let e: i8 = _mm_mask_reduce_max_epi8(0b11111111_00000000, a);
16855        assert_eq!(7, e);
16856    }
16857
16858    #[simd_test(enable = "avx512bw,avx512vl")]
16859    const unsafe fn test_mm256_reduce_max_epu16() {
16860        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16861        let e: u16 = _mm256_reduce_max_epu16(a);
16862        assert_eq!(15, e);
16863    }
16864
16865    #[simd_test(enable = "avx512bw,avx512vl")]
16866    const unsafe fn test_mm256_mask_reduce_max_epu16() {
16867        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16868        let e: u16 = _mm256_mask_reduce_max_epu16(0b11111111_00000000, a);
16869        assert_eq!(7, e);
16870    }
16871
16872    #[simd_test(enable = "avx512bw,avx512vl")]
16873    const unsafe fn test_mm_reduce_max_epu16() {
16874        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16875        let e: u16 = _mm_reduce_max_epu16(a);
16876        assert_eq!(7, e);
16877    }
16878
16879    #[simd_test(enable = "avx512bw,avx512vl")]
16880    const unsafe fn test_mm_mask_reduce_max_epu16() {
16881        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16882        let e: u16 = _mm_mask_reduce_max_epu16(0b11110000, a);
16883        assert_eq!(3, e);
16884    }
16885
16886    #[simd_test(enable = "avx512bw,avx512vl")]
16887    const unsafe fn test_mm256_reduce_max_epu8() {
16888        let a = _mm256_set_epi8(
16889            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16890            24, 25, 26, 27, 28, 29, 30, 31,
16891        );
16892        let e: u8 = _mm256_reduce_max_epu8(a);
16893        assert_eq!(31, e);
16894    }
16895
16896    #[simd_test(enable = "avx512bw,avx512vl")]
16897    const unsafe fn test_mm256_mask_reduce_max_epu8() {
16898        let a = _mm256_set_epi8(
16899            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16900            24, 25, 26, 27, 28, 29, 30, 31,
16901        );
16902        let e: u8 = _mm256_mask_reduce_max_epu8(0b1111111111111111_0000000000000000, a);
16903        assert_eq!(15, e);
16904    }
16905
16906    #[simd_test(enable = "avx512bw,avx512vl")]
16907    const unsafe fn test_mm_reduce_max_epu8() {
16908        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16909        let e: u8 = _mm_reduce_max_epu8(a);
16910        assert_eq!(15, e);
16911    }
16912
16913    #[simd_test(enable = "avx512bw,avx512vl")]
16914    const unsafe fn test_mm_mask_reduce_max_epu8() {
16915        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16916        let e: u8 = _mm_mask_reduce_max_epu8(0b11111111_00000000, a);
16917        assert_eq!(7, e);
16918    }
16919
16920    #[simd_test(enable = "avx512bw,avx512vl")]
16921    const unsafe fn test_mm256_reduce_min_epi16() {
16922        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16923        let e: i16 = _mm256_reduce_min_epi16(a);
16924        assert_eq!(0, e);
16925    }
16926
16927    #[simd_test(enable = "avx512bw,avx512vl")]
16928    const unsafe fn test_mm256_mask_reduce_min_epi16() {
16929        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16930        let e: i16 = _mm256_mask_reduce_min_epi16(0b11111111_00000000, a);
16931        assert_eq!(0, e);
16932    }
16933
16934    #[simd_test(enable = "avx512bw,avx512vl")]
16935    const unsafe fn test_mm_reduce_min_epi16() {
16936        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16937        let e: i16 = _mm_reduce_min_epi16(a);
16938        assert_eq!(0, e);
16939    }
16940
16941    #[simd_test(enable = "avx512bw,avx512vl")]
16942    const unsafe fn test_mm_mask_reduce_min_epi16() {
16943        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16944        let e: i16 = _mm_mask_reduce_min_epi16(0b11110000, a);
16945        assert_eq!(0, e);
16946    }
16947
16948    #[simd_test(enable = "avx512bw,avx512vl")]
16949    const unsafe fn test_mm256_reduce_min_epi8() {
16950        let a = _mm256_set_epi8(
16951            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16952            24, 25, 26, 27, 28, 29, 30, 31,
16953        );
16954        let e: i8 = _mm256_reduce_min_epi8(a);
16955        assert_eq!(0, e);
16956    }
16957
16958    #[simd_test(enable = "avx512bw,avx512vl")]
16959    const unsafe fn test_mm256_mask_reduce_min_epi8() {
16960        let a = _mm256_set_epi8(
16961            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16962            24, 25, 26, 27, 28, 29, 30, 31,
16963        );
16964        let e: i8 = _mm256_mask_reduce_min_epi8(0b1111111111111111_0000000000000000, a);
16965        assert_eq!(0, e);
16966    }
16967
16968    #[simd_test(enable = "avx512bw,avx512vl")]
16969    const unsafe fn test_mm_reduce_min_epi8() {
16970        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16971        let e: i8 = _mm_reduce_min_epi8(a);
16972        assert_eq!(0, e);
16973    }
16974
16975    #[simd_test(enable = "avx512bw,avx512vl")]
16976    const unsafe fn test_mm_mask_reduce_min_epi8() {
16977        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16978        let e: i8 = _mm_mask_reduce_min_epi8(0b11111111_00000000, a);
16979        assert_eq!(0, e);
16980    }
16981
16982    #[simd_test(enable = "avx512bw,avx512vl")]
16983    const unsafe fn test_mm256_reduce_min_epu16() {
16984        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16985        let e: u16 = _mm256_reduce_min_epu16(a);
16986        assert_eq!(0, e);
16987    }
16988
16989    #[simd_test(enable = "avx512bw,avx512vl")]
16990    const unsafe fn test_mm256_mask_reduce_min_epu16() {
16991        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16992        let e: u16 = _mm256_mask_reduce_min_epu16(0b11111111_00000000, a);
16993        assert_eq!(0, e);
16994    }
16995
16996    #[simd_test(enable = "avx512bw,avx512vl")]
16997    const unsafe fn test_mm_reduce_min_epu16() {
16998        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16999        let e: u16 = _mm_reduce_min_epu16(a);
17000        assert_eq!(0, e);
17001    }
17002
17003    #[simd_test(enable = "avx512bw,avx512vl")]
17004    const unsafe fn test_mm_mask_reduce_min_epu16() {
17005        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17006        let e: u16 = _mm_mask_reduce_min_epu16(0b11110000, a);
17007        assert_eq!(0, e);
17008    }
17009
17010    #[simd_test(enable = "avx512bw,avx512vl")]
17011    const unsafe fn test_mm256_reduce_min_epu8() {
17012        let a = _mm256_set_epi8(
17013            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17014            24, 25, 26, 27, 28, 29, 30, 31,
17015        );
17016        let e: u8 = _mm256_reduce_min_epu8(a);
17017        assert_eq!(0, e);
17018    }
17019
17020    #[simd_test(enable = "avx512bw,avx512vl")]
17021    const unsafe fn test_mm256_mask_reduce_min_epu8() {
17022        let a = _mm256_set_epi8(
17023            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17024            24, 25, 26, 27, 28, 29, 30, 31,
17025        );
17026        let e: u8 = _mm256_mask_reduce_min_epu8(0b1111111111111111_0000000000000000, a);
17027        assert_eq!(0, e);
17028    }
17029
17030    #[simd_test(enable = "avx512bw,avx512vl")]
17031    const unsafe fn test_mm_reduce_min_epu8() {
17032        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17033        let e: u8 = _mm_reduce_min_epu8(a);
17034        assert_eq!(0, e);
17035    }
17036
17037    #[simd_test(enable = "avx512bw,avx512vl")]
17038    const unsafe fn test_mm_mask_reduce_min_epu8() {
17039        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17040        let e: u8 = _mm_mask_reduce_min_epu8(0b11111111_00000000, a);
17041        assert_eq!(0, e);
17042    }
17043
17044    #[simd_test(enable = "avx512bw,avx512vl")]
17045    const unsafe fn test_mm256_reduce_or_epi16() {
17046        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17047        let e = _mm256_reduce_or_epi16(a);
17048        assert_eq!(3, e);
17049    }
17050
17051    #[simd_test(enable = "avx512bw,avx512vl")]
17052    const unsafe fn test_mm256_mask_reduce_or_epi16() {
17053        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17054        let e = _mm256_mask_reduce_or_epi16(0b11111111_00000000, a);
17055        assert_eq!(1, e);
17056    }
17057
17058    #[simd_test(enable = "avx512bw,avx512vl")]
17059    const unsafe fn test_mm_reduce_or_epi16() {
17060        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
17061        let e = _mm_reduce_or_epi16(a);
17062        assert_eq!(3, e);
17063    }
17064
17065    #[simd_test(enable = "avx512bw,avx512vl")]
17066    const unsafe fn test_mm_mask_reduce_or_epi16() {
17067        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
17068        let e = _mm_mask_reduce_or_epi16(0b11110000, a);
17069        assert_eq!(1, e);
17070    }
17071
17072    #[simd_test(enable = "avx512bw,avx512vl")]
17073    const unsafe fn test_mm256_reduce_or_epi8() {
17074        let a = _mm256_set_epi8(
17075            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
17076            2, 2, 2,
17077        );
17078        let e = _mm256_reduce_or_epi8(a);
17079        assert_eq!(3, e);
17080    }
17081
17082    #[simd_test(enable = "avx512bw,avx512vl")]
17083    const unsafe fn test_mm256_mask_reduce_or_epi8() {
17084        let a = _mm256_set_epi8(
17085            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
17086            2, 2, 2,
17087        );
17088        let e = _mm256_mask_reduce_or_epi8(0b11111111_00000000_11111111_00000000, a);
17089        assert_eq!(1, e);
17090    }
17091
17092    #[simd_test(enable = "avx512bw,avx512vl")]
17093    const unsafe fn test_mm_reduce_or_epi8() {
17094        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17095        let e = _mm_reduce_or_epi8(a);
17096        assert_eq!(3, e);
17097    }
17098
17099    #[simd_test(enable = "avx512bw,avx512vl")]
17100    const unsafe fn test_mm_mask_reduce_or_epi8() {
17101        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
17102        let e = _mm_mask_reduce_or_epi8(0b11111111_00000000, a);
17103        assert_eq!(1, e);
17104    }
17105
17106    #[simd_test(enable = "avx512bw")]
17107    const unsafe fn test_mm512_loadu_epi16() {
17108        #[rustfmt::skip]
17109        let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
17110        let r = _mm512_loadu_epi16(&a[0]);
17111        #[rustfmt::skip]
17112        let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17113        assert_eq_m512i(r, e);
17114    }
17115
17116    #[simd_test(enable = "avx512bw,avx512vl")]
17117    const unsafe fn test_mm256_loadu_epi16() {
17118        let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17119        let r = _mm256_loadu_epi16(&a[0]);
17120        let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17121        assert_eq_m256i(r, e);
17122    }
17123
17124    #[simd_test(enable = "avx512bw,avx512vl")]
17125    const unsafe fn test_mm_loadu_epi16() {
17126        let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
17127        let r = _mm_loadu_epi16(&a[0]);
17128        let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
17129        assert_eq_m128i(r, e);
17130    }
17131
17132    #[simd_test(enable = "avx512bw")]
17133    const unsafe fn test_mm512_loadu_epi8() {
17134        #[rustfmt::skip]
17135        let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
17136                           1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
17137        let r = _mm512_loadu_epi8(&a[0]);
17138        #[rustfmt::skip]
17139        let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
17140                                32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17141        assert_eq_m512i(r, e);
17142    }
17143
17144    #[simd_test(enable = "avx512bw,avx512vl")]
17145    const unsafe fn test_mm256_loadu_epi8() {
17146        #[rustfmt::skip]
17147        let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
17148        let r = _mm256_loadu_epi8(&a[0]);
17149        #[rustfmt::skip]
17150        let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17151        assert_eq_m256i(r, e);
17152    }
17153
17154    #[simd_test(enable = "avx512bw,avx512vl")]
17155    const unsafe fn test_mm_loadu_epi8() {
17156        let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17157        let r = _mm_loadu_epi8(&a[0]);
17158        let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
17159        assert_eq_m128i(r, e);
17160    }
17161
17162    #[simd_test(enable = "avx512bw")]
17163    const unsafe fn test_mm512_storeu_epi16() {
17164        let a = _mm512_set1_epi16(9);
17165        let mut r = _mm512_undefined_epi32();
17166        _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
17167        assert_eq_m512i(r, a);
17168    }
17169
17170    #[simd_test(enable = "avx512bw,avx512vl")]
17171    const unsafe fn test_mm256_storeu_epi16() {
17172        let a = _mm256_set1_epi16(9);
17173        let mut r = _mm256_set1_epi32(0);
17174        _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
17175        assert_eq_m256i(r, a);
17176    }
17177
17178    #[simd_test(enable = "avx512bw,avx512vl")]
17179    const unsafe fn test_mm_storeu_epi16() {
17180        let a = _mm_set1_epi16(9);
17181        let mut r = _mm_set1_epi32(0);
17182        _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
17183        assert_eq_m128i(r, a);
17184    }
17185
17186    #[simd_test(enable = "avx512bw")]
17187    const unsafe fn test_mm512_storeu_epi8() {
17188        let a = _mm512_set1_epi8(9);
17189        let mut r = _mm512_undefined_epi32();
17190        _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
17191        assert_eq_m512i(r, a);
17192    }
17193
17194    #[simd_test(enable = "avx512bw,avx512vl")]
17195    const unsafe fn test_mm256_storeu_epi8() {
17196        let a = _mm256_set1_epi8(9);
17197        let mut r = _mm256_set1_epi32(0);
17198        _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
17199        assert_eq_m256i(r, a);
17200    }
17201
17202    #[simd_test(enable = "avx512bw,avx512vl")]
17203    const unsafe fn test_mm_storeu_epi8() {
17204        let a = _mm_set1_epi8(9);
17205        let mut r = _mm_set1_epi32(0);
17206        _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
17207        assert_eq_m128i(r, a);
17208    }
17209
17210    #[simd_test(enable = "avx512bw")]
17211    const unsafe fn test_mm512_mask_loadu_epi16() {
17212        let src = _mm512_set1_epi16(42);
17213        let a = &[
17214            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17215            24, 25, 26, 27, 28, 29, 30, 31, 32,
17216        ];
17217        let p = a.as_ptr();
17218        let m = 0b10101010_11001100_11101000_11001010;
17219        let r = _mm512_mask_loadu_epi16(src, m, black_box(p));
17220        let e = &[
17221            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17222            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17223        ];
17224        let e = _mm512_loadu_epi16(e.as_ptr());
17225        assert_eq_m512i(r, e);
17226    }
17227
17228    #[simd_test(enable = "avx512bw")]
17229    const unsafe fn test_mm512_maskz_loadu_epi16() {
17230        let a = &[
17231            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17232            24, 25, 26, 27, 28, 29, 30, 31, 32,
17233        ];
17234        let p = a.as_ptr();
17235        let m = 0b10101010_11001100_11101000_11001010;
17236        let r = _mm512_maskz_loadu_epi16(m, black_box(p));
17237        let e = &[
17238            0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
17239            26, 0, 28, 0, 30, 0, 32,
17240        ];
17241        let e = _mm512_loadu_epi16(e.as_ptr());
17242        assert_eq_m512i(r, e);
17243    }
17244
17245    #[simd_test(enable = "avx512bw")]
17246    const unsafe fn test_mm512_mask_storeu_epi16() {
17247        let mut r = [42_i16; 32];
17248        let a = &[
17249            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17250            24, 25, 26, 27, 28, 29, 30, 31, 32,
17251        ];
17252        let a = _mm512_loadu_epi16(a.as_ptr());
17253        let m = 0b10101010_11001100_11101000_11001010;
17254        _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
17255        let e = &[
17256            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17257            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17258        ];
17259        let e = _mm512_loadu_epi16(e.as_ptr());
17260        assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e);
17261    }
17262
17263    #[simd_test(enable = "avx512bw")]
17264    const unsafe fn test_mm512_mask_loadu_epi8() {
17265        let src = _mm512_set1_epi8(42);
17266        let a = &[
17267            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17268            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17269            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17270        ];
17271        let p = a.as_ptr();
17272        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
17273        let r = _mm512_mask_loadu_epi8(src, m, black_box(p));
17274        let e = &[
17275            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17276            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
17277            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
17278        ];
17279        let e = _mm512_loadu_epi8(e.as_ptr());
17280        assert_eq_m512i(r, e);
17281    }
17282
17283    #[simd_test(enable = "avx512bw")]
17284    const unsafe fn test_mm512_maskz_loadu_epi8() {
17285        let a = &[
17286            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17287            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17288            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17289        ];
17290        let p = a.as_ptr();
17291        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
17292        let r = _mm512_maskz_loadu_epi8(m, black_box(p));
17293        let e = &[
17294            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
17295            26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49,
17296            50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0,
17297        ];
17298        let e = _mm512_loadu_epi8(e.as_ptr());
17299        assert_eq_m512i(r, e);
17300    }
17301
17302    #[simd_test(enable = "avx512bw")]
17303    const unsafe fn test_mm512_mask_storeu_epi8() {
17304        let mut r = [42_i8; 64];
17305        let a = &[
17306            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17307            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17308            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17309        ];
17310        let a = _mm512_loadu_epi8(a.as_ptr());
17311        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
17312        _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
17313        let e = &[
17314            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17315            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
17316            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
17317        ];
17318        let e = _mm512_loadu_epi8(e.as_ptr());
17319        assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e);
17320    }
17321
17322    #[simd_test(enable = "avx512bw,avx512vl")]
17323    const unsafe fn test_mm256_mask_loadu_epi16() {
17324        let src = _mm256_set1_epi16(42);
17325        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17326        let p = a.as_ptr();
17327        let m = 0b11101000_11001010;
17328        let r = _mm256_mask_loadu_epi16(src, m, black_box(p));
17329        let e = &[
17330            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17331        ];
17332        let e = _mm256_loadu_epi16(e.as_ptr());
17333        assert_eq_m256i(r, e);
17334    }
17335
17336    #[simd_test(enable = "avx512bw,avx512vl")]
17337    const unsafe fn test_mm256_maskz_loadu_epi16() {
17338        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17339        let p = a.as_ptr();
17340        let m = 0b11101000_11001010;
17341        let r = _mm256_maskz_loadu_epi16(m, black_box(p));
17342        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
17343        let e = _mm256_loadu_epi16(e.as_ptr());
17344        assert_eq_m256i(r, e);
17345    }
17346
17347    #[simd_test(enable = "avx512bw,avx512vl")]
17348    const unsafe fn test_mm256_mask_storeu_epi16() {
17349        let mut r = [42_i16; 16];
17350        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17351        let a = _mm256_loadu_epi16(a.as_ptr());
17352        let m = 0b11101000_11001010;
17353        _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
17354        let e = &[
17355            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17356        ];
17357        let e = _mm256_loadu_epi16(e.as_ptr());
17358        assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e);
17359    }
17360
17361    #[simd_test(enable = "avx512bw,avx512vl")]
17362    const unsafe fn test_mm256_mask_loadu_epi8() {
17363        let src = _mm256_set1_epi8(42);
17364        let a = &[
17365            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17366            24, 25, 26, 27, 28, 29, 30, 31, 32,
17367        ];
17368        let p = a.as_ptr();
17369        let m = 0b10101010_11001100_11101000_11001010;
17370        let r = _mm256_mask_loadu_epi8(src, m, black_box(p));
17371        let e = &[
17372            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17373            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17374        ];
17375        let e = _mm256_loadu_epi8(e.as_ptr());
17376        assert_eq_m256i(r, e);
17377    }
17378
17379    #[simd_test(enable = "avx512bw,avx512vl")]
17380    const unsafe fn test_mm256_maskz_loadu_epi8() {
17381        let a = &[
17382            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17383            24, 25, 26, 27, 28, 29, 30, 31, 32,
17384        ];
17385        let p = a.as_ptr();
17386        let m = 0b10101010_11001100_11101000_11001010;
17387        let r = _mm256_maskz_loadu_epi8(m, black_box(p));
17388        let e = &[
17389            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
17390            26, 0, 28, 0, 30, 0, 32,
17391        ];
17392        let e = _mm256_loadu_epi8(e.as_ptr());
17393        assert_eq_m256i(r, e);
17394    }
17395
17396    #[simd_test(enable = "avx512bw,avx512vl")]
17397    const unsafe fn test_mm256_mask_storeu_epi8() {
17398        let mut r = [42_i8; 32];
17399        let a = &[
17400            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
17401            24, 25, 26, 27, 28, 29, 30, 31, 32,
17402        ];
17403        let a = _mm256_loadu_epi8(a.as_ptr());
17404        let m = 0b10101010_11001100_11101000_11001010;
17405        _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
17406        let e = &[
17407            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
17408            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
17409        ];
17410        let e = _mm256_loadu_epi8(e.as_ptr());
17411        assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e);
17412    }
17413
17414    #[simd_test(enable = "avx512bw,avx512vl")]
17415    const unsafe fn test_mm_mask_loadu_epi16() {
17416        let src = _mm_set1_epi16(42);
17417        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
17418        let p = a.as_ptr();
17419        let m = 0b11001010;
17420        let r = _mm_mask_loadu_epi16(src, m, black_box(p));
17421        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
17422        let e = _mm_loadu_epi16(e.as_ptr());
17423        assert_eq_m128i(r, e);
17424    }
17425
17426    #[simd_test(enable = "avx512bw,avx512vl")]
17427    const unsafe fn test_mm_maskz_loadu_epi16() {
17428        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
17429        let p = a.as_ptr();
17430        let m = 0b11001010;
17431        let r = _mm_maskz_loadu_epi16(m, black_box(p));
17432        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8];
17433        let e = _mm_loadu_epi16(e.as_ptr());
17434        assert_eq_m128i(r, e);
17435    }
17436
17437    #[simd_test(enable = "avx512bw,avx512vl")]
17438    const unsafe fn test_mm_mask_storeu_epi16() {
17439        let mut r = [42_i16; 8];
17440        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
17441        let a = _mm_loadu_epi16(a.as_ptr());
17442        let m = 0b11001010;
17443        _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a);
17444        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
17445        let e = _mm_loadu_epi16(e.as_ptr());
17446        assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e);
17447    }
17448
17449    #[simd_test(enable = "avx512bw,avx512vl")]
17450    const unsafe fn test_mm_mask_loadu_epi8() {
17451        let src = _mm_set1_epi8(42);
17452        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17453        let p = a.as_ptr();
17454        let m = 0b11101000_11001010;
17455        let r = _mm_mask_loadu_epi8(src, m, black_box(p));
17456        let e = &[
17457            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17458        ];
17459        let e = _mm_loadu_epi8(e.as_ptr());
17460        assert_eq_m128i(r, e);
17461    }
17462
17463    #[simd_test(enable = "avx512bw,avx512vl")]
17464    const unsafe fn test_mm_maskz_loadu_epi8() {
17465        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17466        let p = a.as_ptr();
17467        let m = 0b11101000_11001010;
17468        let r = _mm_maskz_loadu_epi8(m, black_box(p));
17469        let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
17470        let e = _mm_loadu_epi8(e.as_ptr());
17471        assert_eq_m128i(r, e);
17472    }
17473
17474    #[simd_test(enable = "avx512bw,avx512vl")]
17475    const unsafe fn test_mm_mask_storeu_epi8() {
17476        let mut r = [42_i8; 16];
17477        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
17478        let a = _mm_loadu_epi8(a.as_ptr());
17479        let m = 0b11101000_11001010;
17480        _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a);
17481        let e = &[
17482            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
17483        ];
17484        let e = _mm_loadu_epi8(e.as_ptr());
17485        assert_eq_m128i(_mm_loadu_epi8(r.as_ptr()), e);
17486    }
17487
17488    #[simd_test(enable = "avx512bw")]
17489    const unsafe fn test_mm512_madd_epi16() {
17490        let a = _mm512_set1_epi16(1);
17491        let b = _mm512_set1_epi16(1);
17492        let r = _mm512_madd_epi16(a, b);
17493        let e = _mm512_set1_epi32(2);
17494        assert_eq_m512i(r, e);
17495    }
17496
17497    #[simd_test(enable = "avx512bw")]
17498    const unsafe fn test_mm512_mask_madd_epi16() {
17499        let a = _mm512_set1_epi16(1);
17500        let b = _mm512_set1_epi16(1);
17501        let r = _mm512_mask_madd_epi16(a, 0, a, b);
17502        assert_eq_m512i(r, a);
17503        let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b);
17504        let e = _mm512_set_epi32(
17505            1 << 16 | 1,
17506            1 << 16 | 1,
17507            1 << 16 | 1,
17508            1 << 16 | 1,
17509            1 << 16 | 1,
17510            1 << 16 | 1,
17511            1 << 16 | 1,
17512            1 << 16 | 1,
17513            1 << 16 | 1,
17514            1 << 16 | 1,
17515            1 << 16 | 1,
17516            1 << 16 | 1,
17517            2,
17518            2,
17519            2,
17520            2,
17521        );
17522        assert_eq_m512i(r, e);
17523    }
17524
17525    #[simd_test(enable = "avx512bw")]
17526    const unsafe fn test_mm512_maskz_madd_epi16() {
17527        let a = _mm512_set1_epi16(1);
17528        let b = _mm512_set1_epi16(1);
17529        let r = _mm512_maskz_madd_epi16(0, a, b);
17530        assert_eq_m512i(r, _mm512_setzero_si512());
17531        let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b);
17532        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2);
17533        assert_eq_m512i(r, e);
17534    }
17535
17536    #[simd_test(enable = "avx512bw,avx512vl")]
17537    const unsafe fn test_mm256_mask_madd_epi16() {
17538        let a = _mm256_set1_epi16(1);
17539        let b = _mm256_set1_epi16(1);
17540        let r = _mm256_mask_madd_epi16(a, 0, a, b);
17541        assert_eq_m256i(r, a);
17542        let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b);
17543        let e = _mm256_set_epi32(
17544            1 << 16 | 1,
17545            1 << 16 | 1,
17546            1 << 16 | 1,
17547            1 << 16 | 1,
17548            2,
17549            2,
17550            2,
17551            2,
17552        );
17553        assert_eq_m256i(r, e);
17554    }
17555
17556    #[simd_test(enable = "avx512bw,avx512vl")]
17557    const unsafe fn test_mm256_maskz_madd_epi16() {
17558        let a = _mm256_set1_epi16(1);
17559        let b = _mm256_set1_epi16(1);
17560        let r = _mm256_maskz_madd_epi16(0, a, b);
17561        assert_eq_m256i(r, _mm256_setzero_si256());
17562        let r = _mm256_maskz_madd_epi16(0b00001111, a, b);
17563        let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2);
17564        assert_eq_m256i(r, e);
17565    }
17566
17567    #[simd_test(enable = "avx512bw,avx512vl")]
17568    const unsafe fn test_mm_mask_madd_epi16() {
17569        let a = _mm_set1_epi16(1);
17570        let b = _mm_set1_epi16(1);
17571        let r = _mm_mask_madd_epi16(a, 0, a, b);
17572        assert_eq_m128i(r, a);
17573        let r = _mm_mask_madd_epi16(a, 0b00001111, a, b);
17574        let e = _mm_set_epi32(2, 2, 2, 2);
17575        assert_eq_m128i(r, e);
17576    }
17577
17578    #[simd_test(enable = "avx512bw,avx512vl")]
17579    const unsafe fn test_mm_maskz_madd_epi16() {
17580        let a = _mm_set1_epi16(1);
17581        let b = _mm_set1_epi16(1);
17582        let r = _mm_maskz_madd_epi16(0, a, b);
17583        assert_eq_m128i(r, _mm_setzero_si128());
17584        let r = _mm_maskz_madd_epi16(0b00001111, a, b);
17585        let e = _mm_set_epi32(2, 2, 2, 2);
17586        assert_eq_m128i(r, e);
17587    }
17588
17589    #[simd_test(enable = "avx512bw")]
17590    unsafe fn test_mm512_maddubs_epi16() {
17591        let a = _mm512_set1_epi8(1);
17592        let b = _mm512_set1_epi8(1);
17593        let r = _mm512_maddubs_epi16(a, b);
17594        let e = _mm512_set1_epi16(2);
17595        assert_eq_m512i(r, e);
17596    }
17597
17598    #[simd_test(enable = "avx512bw")]
17599    unsafe fn test_mm512_mask_maddubs_epi16() {
17600        let a = _mm512_set1_epi8(1);
17601        let b = _mm512_set1_epi8(1);
17602        let src = _mm512_set1_epi16(1);
17603        let r = _mm512_mask_maddubs_epi16(src, 0, a, b);
17604        assert_eq_m512i(r, src);
17605        let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b);
17606        #[rustfmt::skip]
17607        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17608                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2);
17609        assert_eq_m512i(r, e);
17610    }
17611
17612    #[simd_test(enable = "avx512bw")]
17613    unsafe fn test_mm512_maskz_maddubs_epi16() {
17614        let a = _mm512_set1_epi8(1);
17615        let b = _mm512_set1_epi8(1);
17616        let r = _mm512_maskz_maddubs_epi16(0, a, b);
17617        assert_eq_m512i(r, _mm512_setzero_si512());
17618        let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b);
17619        #[rustfmt::skip]
17620        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
17621                                 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
17622        assert_eq_m512i(r, e);
17623    }
17624
17625    #[simd_test(enable = "avx512bw,avx512vl")]
17626    unsafe fn test_mm256_mask_maddubs_epi16() {
17627        let a = _mm256_set1_epi8(1);
17628        let b = _mm256_set1_epi8(1);
17629        let src = _mm256_set1_epi16(1);
17630        let r = _mm256_mask_maddubs_epi16(src, 0, a, b);
17631        assert_eq_m256i(r, src);
17632        let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b);
17633        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
17634        assert_eq_m256i(r, e);
17635    }
17636
17637    #[simd_test(enable = "avx512bw,avx512vl")]
17638    unsafe fn test_mm256_maskz_maddubs_epi16() {
17639        let a = _mm256_set1_epi8(1);
17640        let b = _mm256_set1_epi8(1);
17641        let r = _mm256_maskz_maddubs_epi16(0, a, b);
17642        assert_eq_m256i(r, _mm256_setzero_si256());
17643        let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b);
17644        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
17645        assert_eq_m256i(r, e);
17646    }
17647
17648    #[simd_test(enable = "avx512bw,avx512vl")]
17649    unsafe fn test_mm_mask_maddubs_epi16() {
17650        let a = _mm_set1_epi8(1);
17651        let b = _mm_set1_epi8(1);
17652        let src = _mm_set1_epi16(1);
17653        let r = _mm_mask_maddubs_epi16(src, 0, a, b);
17654        assert_eq_m128i(r, src);
17655        let r = _mm_mask_add_epi16(src, 0b00000001, a, b);
17656        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
17657        assert_eq_m128i(r, e);
17658    }
17659
17660    #[simd_test(enable = "avx512bw,avx512vl")]
17661    unsafe fn test_mm_maskz_maddubs_epi16() {
17662        let a = _mm_set1_epi8(1);
17663        let b = _mm_set1_epi8(1);
17664        let r = _mm_maskz_maddubs_epi16(0, a, b);
17665        assert_eq_m128i(r, _mm_setzero_si128());
17666        let r = _mm_maskz_maddubs_epi16(0b00001111, a, b);
17667        let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2);
17668        assert_eq_m128i(r, e);
17669    }
17670
17671    #[simd_test(enable = "avx512bw")]
17672    unsafe fn test_mm512_packs_epi32() {
17673        let a = _mm512_set1_epi32(i32::MAX);
17674        let b = _mm512_set1_epi32(1);
17675        let r = _mm512_packs_epi32(a, b);
17676        #[rustfmt::skip]
17677        let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX,
17678                                 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17679        assert_eq_m512i(r, e);
17680    }
17681
17682    #[simd_test(enable = "avx512bw")]
17683    unsafe fn test_mm512_mask_packs_epi32() {
17684        let a = _mm512_set1_epi32(i32::MAX);
17685        let b = _mm512_set1_epi32(1 << 16 | 1);
17686        let r = _mm512_mask_packs_epi32(a, 0, a, b);
17687        assert_eq_m512i(r, a);
17688        let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
17689        #[rustfmt::skip]
17690        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17691                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17692        assert_eq_m512i(r, e);
17693    }
17694
17695    #[simd_test(enable = "avx512bw")]
17696    unsafe fn test_mm512_maskz_packs_epi32() {
17697        let a = _mm512_set1_epi32(i32::MAX);
17698        let b = _mm512_set1_epi32(1);
17699        let r = _mm512_maskz_packs_epi32(0, a, b);
17700        assert_eq_m512i(r, _mm512_setzero_si512());
17701        let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b);
17702        #[rustfmt::skip]
17703        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17704                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17705        assert_eq_m512i(r, e);
17706    }
17707
17708    #[simd_test(enable = "avx512bw,avx512vl")]
17709    unsafe fn test_mm256_mask_packs_epi32() {
17710        let a = _mm256_set1_epi32(i32::MAX);
17711        let b = _mm256_set1_epi32(1 << 16 | 1);
17712        let r = _mm256_mask_packs_epi32(a, 0, a, b);
17713        assert_eq_m256i(r, a);
17714        let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b);
17715        #[rustfmt::skip]
17716        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17717        assert_eq_m256i(r, e);
17718    }
17719
17720    #[simd_test(enable = "avx512bw,avx512vl")]
17721    unsafe fn test_mm256_maskz_packs_epi32() {
17722        let a = _mm256_set1_epi32(i32::MAX);
17723        let b = _mm256_set1_epi32(1);
17724        let r = _mm256_maskz_packs_epi32(0, a, b);
17725        assert_eq_m256i(r, _mm256_setzero_si256());
17726        let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b);
17727        #[rustfmt::skip]
17728        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17729        assert_eq_m256i(r, e);
17730    }
17731
17732    #[simd_test(enable = "avx512bw,avx512vl")]
17733    unsafe fn test_mm_mask_packs_epi32() {
17734        let a = _mm_set1_epi32(i32::MAX);
17735        let b = _mm_set1_epi32(1 << 16 | 1);
17736        let r = _mm_mask_packs_epi32(a, 0, a, b);
17737        assert_eq_m128i(r, a);
17738        let r = _mm_mask_packs_epi32(b, 0b00001111, a, b);
17739        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17740        assert_eq_m128i(r, e);
17741    }
17742
17743    #[simd_test(enable = "avx512bw,avx512vl")]
17744    unsafe fn test_mm_maskz_packs_epi32() {
17745        let a = _mm_set1_epi32(i32::MAX);
17746        let b = _mm_set1_epi32(1);
17747        let r = _mm_maskz_packs_epi32(0, a, b);
17748        assert_eq_m128i(r, _mm_setzero_si128());
17749        let r = _mm_maskz_packs_epi32(0b00001111, a, b);
17750        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
17751        assert_eq_m128i(r, e);
17752    }
17753
17754    #[simd_test(enable = "avx512bw")]
17755    unsafe fn test_mm512_packs_epi16() {
17756        let a = _mm512_set1_epi16(i16::MAX);
17757        let b = _mm512_set1_epi16(1);
17758        let r = _mm512_packs_epi16(a, b);
17759        #[rustfmt::skip]
17760        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17761                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17762                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
17763                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17764        assert_eq_m512i(r, e);
17765    }
17766
17767    #[simd_test(enable = "avx512bw")]
17768    unsafe fn test_mm512_mask_packs_epi16() {
17769        let a = _mm512_set1_epi16(i16::MAX);
17770        let b = _mm512_set1_epi16(1 << 8 | 1);
17771        let r = _mm512_mask_packs_epi16(a, 0, a, b);
17772        assert_eq_m512i(r, a);
17773        let r = _mm512_mask_packs_epi16(
17774            b,
17775            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17776            a,
17777            b,
17778        );
17779        #[rustfmt::skip]
17780        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17781                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17782                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17783                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17784        assert_eq_m512i(r, e);
17785    }
17786
17787    #[simd_test(enable = "avx512bw")]
17788    unsafe fn test_mm512_maskz_packs_epi16() {
17789        let a = _mm512_set1_epi16(i16::MAX);
17790        let b = _mm512_set1_epi16(1);
17791        let r = _mm512_maskz_packs_epi16(0, a, b);
17792        assert_eq_m512i(r, _mm512_setzero_si512());
17793        let r = _mm512_maskz_packs_epi16(
17794            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17795            a,
17796            b,
17797        );
17798        #[rustfmt::skip]
17799        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17800                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17801                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17802                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17803        assert_eq_m512i(r, e);
17804    }
17805
17806    #[simd_test(enable = "avx512bw,avx512vl")]
17807    unsafe fn test_mm256_mask_packs_epi16() {
17808        let a = _mm256_set1_epi16(i16::MAX);
17809        let b = _mm256_set1_epi16(1 << 8 | 1);
17810        let r = _mm256_mask_packs_epi16(a, 0, a, b);
17811        assert_eq_m256i(r, a);
17812        let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
17813        #[rustfmt::skip]
17814        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17815                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17816        assert_eq_m256i(r, e);
17817    }
17818
17819    #[simd_test(enable = "avx512bw,avx512vl")]
17820    unsafe fn test_mm256_maskz_packs_epi16() {
17821        let a = _mm256_set1_epi16(i16::MAX);
17822        let b = _mm256_set1_epi16(1);
17823        let r = _mm256_maskz_packs_epi16(0, a, b);
17824        assert_eq_m256i(r, _mm256_setzero_si256());
17825        let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b);
17826        #[rustfmt::skip]
17827        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17828                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17829        assert_eq_m256i(r, e);
17830    }
17831
17832    #[simd_test(enable = "avx512bw,avx512vl")]
17833    unsafe fn test_mm_mask_packs_epi16() {
17834        let a = _mm_set1_epi16(i16::MAX);
17835        let b = _mm_set1_epi16(1 << 8 | 1);
17836        let r = _mm_mask_packs_epi16(a, 0, a, b);
17837        assert_eq_m128i(r, a);
17838        let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b);
17839        #[rustfmt::skip]
17840        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17841        assert_eq_m128i(r, e);
17842    }
17843
17844    #[simd_test(enable = "avx512bw,avx512vl")]
17845    unsafe fn test_mm_maskz_packs_epi16() {
17846        let a = _mm_set1_epi16(i16::MAX);
17847        let b = _mm_set1_epi16(1);
17848        let r = _mm_maskz_packs_epi16(0, a, b);
17849        assert_eq_m128i(r, _mm_setzero_si128());
17850        let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b);
17851        #[rustfmt::skip]
17852        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17853        assert_eq_m128i(r, e);
17854    }
17855
17856    #[simd_test(enable = "avx512bw")]
17857    unsafe fn test_mm512_packus_epi32() {
17858        let a = _mm512_set1_epi32(-1);
17859        let b = _mm512_set1_epi32(1);
17860        let r = _mm512_packus_epi32(a, b);
17861        #[rustfmt::skip]
17862        let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
17863                                 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0);
17864        assert_eq_m512i(r, e);
17865    }
17866
17867    #[simd_test(enable = "avx512bw")]
17868    unsafe fn test_mm512_mask_packus_epi32() {
17869        let a = _mm512_set1_epi32(-1);
17870        let b = _mm512_set1_epi32(1 << 16 | 1);
17871        let r = _mm512_mask_packus_epi32(a, 0, a, b);
17872        assert_eq_m512i(r, a);
17873        let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
17874        #[rustfmt::skip]
17875        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17876                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17877        assert_eq_m512i(r, e);
17878    }
17879
17880    #[simd_test(enable = "avx512bw")]
17881    unsafe fn test_mm512_maskz_packus_epi32() {
17882        let a = _mm512_set1_epi32(-1);
17883        let b = _mm512_set1_epi32(1);
17884        let r = _mm512_maskz_packus_epi32(0, a, b);
17885        assert_eq_m512i(r, _mm512_setzero_si512());
17886        let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b);
17887        #[rustfmt::skip]
17888        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17889                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17890        assert_eq_m512i(r, e);
17891    }
17892
17893    #[simd_test(enable = "avx512bw,avx512vl")]
17894    unsafe fn test_mm256_mask_packus_epi32() {
17895        let a = _mm256_set1_epi32(-1);
17896        let b = _mm256_set1_epi32(1 << 16 | 1);
17897        let r = _mm256_mask_packus_epi32(a, 0, a, b);
17898        assert_eq_m256i(r, a);
17899        let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b);
17900        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17901        assert_eq_m256i(r, e);
17902    }
17903
17904    #[simd_test(enable = "avx512bw,avx512vl")]
17905    unsafe fn test_mm256_maskz_packus_epi32() {
17906        let a = _mm256_set1_epi32(-1);
17907        let b = _mm256_set1_epi32(1);
17908        let r = _mm256_maskz_packus_epi32(0, a, b);
17909        assert_eq_m256i(r, _mm256_setzero_si256());
17910        let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b);
17911        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17912        assert_eq_m256i(r, e);
17913    }
17914
17915    #[simd_test(enable = "avx512bw,avx512vl")]
17916    unsafe fn test_mm_mask_packus_epi32() {
17917        let a = _mm_set1_epi32(-1);
17918        let b = _mm_set1_epi32(1 << 16 | 1);
17919        let r = _mm_mask_packus_epi32(a, 0, a, b);
17920        assert_eq_m128i(r, a);
17921        let r = _mm_mask_packus_epi32(b, 0b00001111, a, b);
17922        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
17923        assert_eq_m128i(r, e);
17924    }
17925
17926    #[simd_test(enable = "avx512bw,avx512vl")]
17927    unsafe fn test_mm_maskz_packus_epi32() {
17928        let a = _mm_set1_epi32(-1);
17929        let b = _mm_set1_epi32(1);
17930        let r = _mm_maskz_packus_epi32(0, a, b);
17931        assert_eq_m128i(r, _mm_setzero_si128());
17932        let r = _mm_maskz_packus_epi32(0b00001111, a, b);
17933        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
17934        assert_eq_m128i(r, e);
17935    }
17936
17937    #[simd_test(enable = "avx512bw")]
17938    unsafe fn test_mm512_packus_epi16() {
17939        let a = _mm512_set1_epi16(-1);
17940        let b = _mm512_set1_epi16(1);
17941        let r = _mm512_packus_epi16(a, b);
17942        #[rustfmt::skip]
17943        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17944                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17945                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
17946                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
17947        assert_eq_m512i(r, e);
17948    }
17949
17950    #[simd_test(enable = "avx512bw")]
17951    unsafe fn test_mm512_mask_packus_epi16() {
17952        let a = _mm512_set1_epi16(-1);
17953        let b = _mm512_set1_epi16(1 << 8 | 1);
17954        let r = _mm512_mask_packus_epi16(a, 0, a, b);
17955        assert_eq_m512i(r, a);
17956        let r = _mm512_mask_packus_epi16(
17957            b,
17958            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17959            a,
17960            b,
17961        );
17962        #[rustfmt::skip]
17963        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17964                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17965                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17966                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17967        assert_eq_m512i(r, e);
17968    }
17969
17970    #[simd_test(enable = "avx512bw")]
17971    unsafe fn test_mm512_maskz_packus_epi16() {
17972        let a = _mm512_set1_epi16(-1);
17973        let b = _mm512_set1_epi16(1);
17974        let r = _mm512_maskz_packus_epi16(0, a, b);
17975        assert_eq_m512i(r, _mm512_setzero_si512());
17976        let r = _mm512_maskz_packus_epi16(
17977            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
17978            a,
17979            b,
17980        );
17981        #[rustfmt::skip]
17982        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17983                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17984                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17985                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17986        assert_eq_m512i(r, e);
17987    }
17988
17989    #[simd_test(enable = "avx512bw,avx512vl")]
17990    unsafe fn test_mm256_mask_packus_epi16() {
17991        let a = _mm256_set1_epi16(-1);
17992        let b = _mm256_set1_epi16(1 << 8 | 1);
17993        let r = _mm256_mask_packus_epi16(a, 0, a, b);
17994        assert_eq_m256i(r, a);
17995        let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
17996        #[rustfmt::skip]
17997        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17998                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
17999        assert_eq_m256i(r, e);
18000    }
18001
18002    #[simd_test(enable = "avx512bw,avx512vl")]
18003    unsafe fn test_mm256_maskz_packus_epi16() {
18004        let a = _mm256_set1_epi16(-1);
18005        let b = _mm256_set1_epi16(1);
18006        let r = _mm256_maskz_packus_epi16(0, a, b);
18007        assert_eq_m256i(r, _mm256_setzero_si256());
18008        let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b);
18009        #[rustfmt::skip]
18010        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18011                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
18012        assert_eq_m256i(r, e);
18013    }
18014
18015    #[simd_test(enable = "avx512bw,avx512vl")]
18016    unsafe fn test_mm_mask_packus_epi16() {
18017        let a = _mm_set1_epi16(-1);
18018        let b = _mm_set1_epi16(1 << 8 | 1);
18019        let r = _mm_mask_packus_epi16(a, 0, a, b);
18020        assert_eq_m128i(r, a);
18021        let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b);
18022        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
18023        assert_eq_m128i(r, e);
18024    }
18025
18026    #[simd_test(enable = "avx512bw,avx512vl")]
18027    unsafe fn test_mm_maskz_packus_epi16() {
18028        let a = _mm_set1_epi16(-1);
18029        let b = _mm_set1_epi16(1);
18030        let r = _mm_maskz_packus_epi16(0, a, b);
18031        assert_eq_m128i(r, _mm_setzero_si128());
18032        let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b);
18033        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
18034        assert_eq_m128i(r, e);
18035    }
18036
18037    #[simd_test(enable = "avx512bw")]
18038    const unsafe fn test_mm512_avg_epu16() {
18039        let a = _mm512_set1_epi16(1);
18040        let b = _mm512_set1_epi16(1);
18041        let r = _mm512_avg_epu16(a, b);
18042        let e = _mm512_set1_epi16(1);
18043        assert_eq_m512i(r, e);
18044    }
18045
18046    #[simd_test(enable = "avx512bw")]
18047    const unsafe fn test_mm512_mask_avg_epu16() {
18048        let a = _mm512_set1_epi16(1);
18049        let b = _mm512_set1_epi16(1);
18050        let r = _mm512_mask_avg_epu16(a, 0, a, b);
18051        assert_eq_m512i(r, a);
18052        let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
18053        #[rustfmt::skip]
18054        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18055                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18056        assert_eq_m512i(r, e);
18057    }
18058
18059    #[simd_test(enable = "avx512bw")]
18060    const unsafe fn test_mm512_maskz_avg_epu16() {
18061        let a = _mm512_set1_epi16(1);
18062        let b = _mm512_set1_epi16(1);
18063        let r = _mm512_maskz_avg_epu16(0, a, b);
18064        assert_eq_m512i(r, _mm512_setzero_si512());
18065        let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b);
18066        #[rustfmt::skip]
18067        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18068                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18069        assert_eq_m512i(r, e);
18070    }
18071
18072    #[simd_test(enable = "avx512bw,avx512vl")]
18073    const unsafe fn test_mm256_mask_avg_epu16() {
18074        let a = _mm256_set1_epi16(1);
18075        let b = _mm256_set1_epi16(1);
18076        let r = _mm256_mask_avg_epu16(a, 0, a, b);
18077        assert_eq_m256i(r, a);
18078        let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b);
18079        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18080        assert_eq_m256i(r, e);
18081    }
18082
18083    #[simd_test(enable = "avx512bw,avx512vl")]
18084    const unsafe fn test_mm256_maskz_avg_epu16() {
18085        let a = _mm256_set1_epi16(1);
18086        let b = _mm256_set1_epi16(1);
18087        let r = _mm256_maskz_avg_epu16(0, a, b);
18088        assert_eq_m256i(r, _mm256_setzero_si256());
18089        let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b);
18090        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18091        assert_eq_m256i(r, e);
18092    }
18093
18094    #[simd_test(enable = "avx512bw,avx512vl")]
18095    const unsafe fn test_mm_mask_avg_epu16() {
18096        let a = _mm_set1_epi16(1);
18097        let b = _mm_set1_epi16(1);
18098        let r = _mm_mask_avg_epu16(a, 0, a, b);
18099        assert_eq_m128i(r, a);
18100        let r = _mm_mask_avg_epu16(a, 0b00001111, a, b);
18101        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
18102        assert_eq_m128i(r, e);
18103    }
18104
18105    #[simd_test(enable = "avx512bw,avx512vl")]
18106    const unsafe fn test_mm_maskz_avg_epu16() {
18107        let a = _mm_set1_epi16(1);
18108        let b = _mm_set1_epi16(1);
18109        let r = _mm_maskz_avg_epu16(0, a, b);
18110        assert_eq_m128i(r, _mm_setzero_si128());
18111        let r = _mm_maskz_avg_epu16(0b00001111, a, b);
18112        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
18113        assert_eq_m128i(r, e);
18114    }
18115
18116    #[simd_test(enable = "avx512bw")]
18117    const unsafe fn test_mm512_avg_epu8() {
18118        let a = _mm512_set1_epi8(1);
18119        let b = _mm512_set1_epi8(1);
18120        let r = _mm512_avg_epu8(a, b);
18121        let e = _mm512_set1_epi8(1);
18122        assert_eq_m512i(r, e);
18123    }
18124
18125    #[simd_test(enable = "avx512bw")]
18126    const unsafe fn test_mm512_mask_avg_epu8() {
18127        let a = _mm512_set1_epi8(1);
18128        let b = _mm512_set1_epi8(1);
18129        let r = _mm512_mask_avg_epu8(a, 0, a, b);
18130        assert_eq_m512i(r, a);
18131        let r = _mm512_mask_avg_epu8(
18132            a,
18133            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
18134            a,
18135            b,
18136        );
18137        #[rustfmt::skip]
18138        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18139                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18140                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18141                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18142        assert_eq_m512i(r, e);
18143    }
18144
18145    #[simd_test(enable = "avx512bw")]
18146    const unsafe fn test_mm512_maskz_avg_epu8() {
18147        let a = _mm512_set1_epi8(1);
18148        let b = _mm512_set1_epi8(1);
18149        let r = _mm512_maskz_avg_epu8(0, a, b);
18150        assert_eq_m512i(r, _mm512_setzero_si512());
18151        let r = _mm512_maskz_avg_epu8(
18152            0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111,
18153            a,
18154            b,
18155        );
18156        #[rustfmt::skip]
18157        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18158                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18159                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18160                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18161        assert_eq_m512i(r, e);
18162    }
18163
18164    #[simd_test(enable = "avx512bw,avx512vl")]
18165    const unsafe fn test_mm256_mask_avg_epu8() {
18166        let a = _mm256_set1_epi8(1);
18167        let b = _mm256_set1_epi8(1);
18168        let r = _mm256_mask_avg_epu8(a, 0, a, b);
18169        assert_eq_m256i(r, a);
18170        let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
18171        #[rustfmt::skip]
18172        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
18173                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18174        assert_eq_m256i(r, e);
18175    }
18176
18177    #[simd_test(enable = "avx512bw,avx512vl")]
18178    const unsafe fn test_mm256_maskz_avg_epu8() {
18179        let a = _mm256_set1_epi8(1);
18180        let b = _mm256_set1_epi8(1);
18181        let r = _mm256_maskz_avg_epu8(0, a, b);
18182        assert_eq_m256i(r, _mm256_setzero_si256());
18183        let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b);
18184        #[rustfmt::skip]
18185        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18186                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18187        assert_eq_m256i(r, e);
18188    }
18189
18190    #[simd_test(enable = "avx512bw,avx512vl")]
18191    const unsafe fn test_mm_mask_avg_epu8() {
18192        let a = _mm_set1_epi8(1);
18193        let b = _mm_set1_epi8(1);
18194        let r = _mm_mask_avg_epu8(a, 0, a, b);
18195        assert_eq_m128i(r, a);
18196        let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b);
18197        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
18198        assert_eq_m128i(r, e);
18199    }
18200
18201    #[simd_test(enable = "avx512bw,avx512vl")]
18202    const unsafe fn test_mm_maskz_avg_epu8() {
18203        let a = _mm_set1_epi8(1);
18204        let b = _mm_set1_epi8(1);
18205        let r = _mm_maskz_avg_epu8(0, a, b);
18206        assert_eq_m128i(r, _mm_setzero_si128());
18207        let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b);
18208        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
18209        assert_eq_m128i(r, e);
18210    }
18211
18212    #[simd_test(enable = "avx512bw")]
18213    unsafe fn test_mm512_sll_epi16() {
18214        let a = _mm512_set1_epi16(1 << 15);
18215        let count = _mm_set1_epi16(2);
18216        let r = _mm512_sll_epi16(a, count);
18217        let e = _mm512_set1_epi16(0);
18218        assert_eq_m512i(r, e);
18219    }
18220
18221    #[simd_test(enable = "avx512bw")]
18222    unsafe fn test_mm512_mask_sll_epi16() {
18223        let a = _mm512_set1_epi16(1 << 15);
18224        let count = _mm_set1_epi16(2);
18225        let r = _mm512_mask_sll_epi16(a, 0, a, count);
18226        assert_eq_m512i(r, a);
18227        let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18228        let e = _mm512_set1_epi16(0);
18229        assert_eq_m512i(r, e);
18230    }
18231
18232    #[simd_test(enable = "avx512bw")]
18233    unsafe fn test_mm512_maskz_sll_epi16() {
18234        let a = _mm512_set1_epi16(1 << 15);
18235        let count = _mm_set1_epi16(2);
18236        let r = _mm512_maskz_sll_epi16(0, a, count);
18237        assert_eq_m512i(r, _mm512_setzero_si512());
18238        let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count);
18239        let e = _mm512_set1_epi16(0);
18240        assert_eq_m512i(r, e);
18241    }
18242
18243    #[simd_test(enable = "avx512bw,avx512vl")]
18244    unsafe fn test_mm256_mask_sll_epi16() {
18245        let a = _mm256_set1_epi16(1 << 15);
18246        let count = _mm_set1_epi16(2);
18247        let r = _mm256_mask_sll_epi16(a, 0, a, count);
18248        assert_eq_m256i(r, a);
18249        let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count);
18250        let e = _mm256_set1_epi16(0);
18251        assert_eq_m256i(r, e);
18252    }
18253
18254    #[simd_test(enable = "avx512bw,avx512vl")]
18255    unsafe fn test_mm256_maskz_sll_epi16() {
18256        let a = _mm256_set1_epi16(1 << 15);
18257        let count = _mm_set1_epi16(2);
18258        let r = _mm256_maskz_sll_epi16(0, a, count);
18259        assert_eq_m256i(r, _mm256_setzero_si256());
18260        let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count);
18261        let e = _mm256_set1_epi16(0);
18262        assert_eq_m256i(r, e);
18263    }
18264
18265    #[simd_test(enable = "avx512bw,avx512vl")]
18266    unsafe fn test_mm_mask_sll_epi16() {
18267        let a = _mm_set1_epi16(1 << 15);
18268        let count = _mm_set1_epi16(2);
18269        let r = _mm_mask_sll_epi16(a, 0, a, count);
18270        assert_eq_m128i(r, a);
18271        let r = _mm_mask_sll_epi16(a, 0b11111111, a, count);
18272        let e = _mm_set1_epi16(0);
18273        assert_eq_m128i(r, e);
18274    }
18275
18276    #[simd_test(enable = "avx512bw,avx512vl")]
18277    unsafe fn test_mm_maskz_sll_epi16() {
18278        let a = _mm_set1_epi16(1 << 15);
18279        let count = _mm_set1_epi16(2);
18280        let r = _mm_maskz_sll_epi16(0, a, count);
18281        assert_eq_m128i(r, _mm_setzero_si128());
18282        let r = _mm_maskz_sll_epi16(0b11111111, a, count);
18283        let e = _mm_set1_epi16(0);
18284        assert_eq_m128i(r, e);
18285    }
18286
18287    #[simd_test(enable = "avx512bw")]
18288    const unsafe fn test_mm512_slli_epi16() {
18289        let a = _mm512_set1_epi16(1 << 15);
18290        let r = _mm512_slli_epi16::<1>(a);
18291        let e = _mm512_set1_epi16(0);
18292        assert_eq_m512i(r, e);
18293    }
18294
18295    #[simd_test(enable = "avx512bw")]
18296    const unsafe fn test_mm512_mask_slli_epi16() {
18297        let a = _mm512_set1_epi16(1 << 15);
18298        let r = _mm512_mask_slli_epi16::<1>(a, 0, a);
18299        assert_eq_m512i(r, a);
18300        let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a);
18301        let e = _mm512_set1_epi16(0);
18302        assert_eq_m512i(r, e);
18303    }
18304
18305    #[simd_test(enable = "avx512bw")]
18306    const unsafe fn test_mm512_maskz_slli_epi16() {
18307        let a = _mm512_set1_epi16(1 << 15);
18308        let r = _mm512_maskz_slli_epi16::<1>(0, a);
18309        assert_eq_m512i(r, _mm512_setzero_si512());
18310        let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a);
18311        let e = _mm512_set1_epi16(0);
18312        assert_eq_m512i(r, e);
18313    }
18314
18315    #[simd_test(enable = "avx512bw,avx512vl")]
18316    const unsafe fn test_mm256_mask_slli_epi16() {
18317        let a = _mm256_set1_epi16(1 << 15);
18318        let r = _mm256_mask_slli_epi16::<1>(a, 0, a);
18319        assert_eq_m256i(r, a);
18320        let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a);
18321        let e = _mm256_set1_epi16(0);
18322        assert_eq_m256i(r, e);
18323    }
18324
18325    #[simd_test(enable = "avx512bw,avx512vl")]
18326    const unsafe fn test_mm256_maskz_slli_epi16() {
18327        let a = _mm256_set1_epi16(1 << 15);
18328        let r = _mm256_maskz_slli_epi16::<1>(0, a);
18329        assert_eq_m256i(r, _mm256_setzero_si256());
18330        let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a);
18331        let e = _mm256_set1_epi16(0);
18332        assert_eq_m256i(r, e);
18333    }
18334
18335    #[simd_test(enable = "avx512bw,avx512vl")]
18336    const unsafe fn test_mm_mask_slli_epi16() {
18337        let a = _mm_set1_epi16(1 << 15);
18338        let r = _mm_mask_slli_epi16::<1>(a, 0, a);
18339        assert_eq_m128i(r, a);
18340        let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a);
18341        let e = _mm_set1_epi16(0);
18342        assert_eq_m128i(r, e);
18343    }
18344
18345    #[simd_test(enable = "avx512bw,avx512vl")]
18346    const unsafe fn test_mm_maskz_slli_epi16() {
18347        let a = _mm_set1_epi16(1 << 15);
18348        let r = _mm_maskz_slli_epi16::<1>(0, a);
18349        assert_eq_m128i(r, _mm_setzero_si128());
18350        let r = _mm_maskz_slli_epi16::<1>(0b11111111, a);
18351        let e = _mm_set1_epi16(0);
18352        assert_eq_m128i(r, e);
18353    }
18354
18355    #[simd_test(enable = "avx512bw")]
18356    const unsafe fn test_mm512_sllv_epi16() {
18357        let a = _mm512_set1_epi16(1 << 15);
18358        let count = _mm512_set1_epi16(2);
18359        let r = _mm512_sllv_epi16(a, count);
18360        let e = _mm512_set1_epi16(0);
18361        assert_eq_m512i(r, e);
18362    }
18363
18364    #[simd_test(enable = "avx512bw")]
18365    const unsafe fn test_mm512_mask_sllv_epi16() {
18366        let a = _mm512_set1_epi16(1 << 15);
18367        let count = _mm512_set1_epi16(2);
18368        let r = _mm512_mask_sllv_epi16(a, 0, a, count);
18369        assert_eq_m512i(r, a);
18370        let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18371        let e = _mm512_set1_epi16(0);
18372        assert_eq_m512i(r, e);
18373    }
18374
18375    #[simd_test(enable = "avx512bw")]
18376    const unsafe fn test_mm512_maskz_sllv_epi16() {
18377        let a = _mm512_set1_epi16(1 << 15);
18378        let count = _mm512_set1_epi16(2);
18379        let r = _mm512_maskz_sllv_epi16(0, a, count);
18380        assert_eq_m512i(r, _mm512_setzero_si512());
18381        let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count);
18382        let e = _mm512_set1_epi16(0);
18383        assert_eq_m512i(r, e);
18384    }
18385
18386    #[simd_test(enable = "avx512bw,avx512vl")]
18387    const unsafe fn test_mm256_sllv_epi16() {
18388        let a = _mm256_set1_epi16(1 << 15);
18389        let count = _mm256_set1_epi16(2);
18390        let r = _mm256_sllv_epi16(a, count);
18391        let e = _mm256_set1_epi16(0);
18392        assert_eq_m256i(r, e);
18393    }
18394
18395    #[simd_test(enable = "avx512bw,avx512vl")]
18396    const unsafe fn test_mm256_mask_sllv_epi16() {
18397        let a = _mm256_set1_epi16(1 << 15);
18398        let count = _mm256_set1_epi16(2);
18399        let r = _mm256_mask_sllv_epi16(a, 0, a, count);
18400        assert_eq_m256i(r, a);
18401        let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count);
18402        let e = _mm256_set1_epi16(0);
18403        assert_eq_m256i(r, e);
18404    }
18405
18406    #[simd_test(enable = "avx512bw,avx512vl")]
18407    const unsafe fn test_mm256_maskz_sllv_epi16() {
18408        let a = _mm256_set1_epi16(1 << 15);
18409        let count = _mm256_set1_epi16(2);
18410        let r = _mm256_maskz_sllv_epi16(0, a, count);
18411        assert_eq_m256i(r, _mm256_setzero_si256());
18412        let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count);
18413        let e = _mm256_set1_epi16(0);
18414        assert_eq_m256i(r, e);
18415    }
18416
18417    #[simd_test(enable = "avx512bw,avx512vl")]
18418    const unsafe fn test_mm_sllv_epi16() {
18419        let a = _mm_set1_epi16(1 << 15);
18420        let count = _mm_set1_epi16(2);
18421        let r = _mm_sllv_epi16(a, count);
18422        let e = _mm_set1_epi16(0);
18423        assert_eq_m128i(r, e);
18424    }
18425
18426    #[simd_test(enable = "avx512bw,avx512vl")]
18427    const unsafe fn test_mm_mask_sllv_epi16() {
18428        let a = _mm_set1_epi16(1 << 15);
18429        let count = _mm_set1_epi16(2);
18430        let r = _mm_mask_sllv_epi16(a, 0, a, count);
18431        assert_eq_m128i(r, a);
18432        let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count);
18433        let e = _mm_set1_epi16(0);
18434        assert_eq_m128i(r, e);
18435    }
18436
18437    #[simd_test(enable = "avx512bw,avx512vl")]
18438    const unsafe fn test_mm_maskz_sllv_epi16() {
18439        let a = _mm_set1_epi16(1 << 15);
18440        let count = _mm_set1_epi16(2);
18441        let r = _mm_maskz_sllv_epi16(0, a, count);
18442        assert_eq_m128i(r, _mm_setzero_si128());
18443        let r = _mm_maskz_sllv_epi16(0b11111111, a, count);
18444        let e = _mm_set1_epi16(0);
18445        assert_eq_m128i(r, e);
18446    }
18447
18448    #[simd_test(enable = "avx512bw")]
18449    unsafe fn test_mm512_srl_epi16() {
18450        let a = _mm512_set1_epi16(1 << 1);
18451        let count = _mm_set1_epi16(2);
18452        let r = _mm512_srl_epi16(a, count);
18453        let e = _mm512_set1_epi16(0);
18454        assert_eq_m512i(r, e);
18455    }
18456
18457    #[simd_test(enable = "avx512bw")]
18458    unsafe fn test_mm512_mask_srl_epi16() {
18459        let a = _mm512_set1_epi16(1 << 1);
18460        let count = _mm_set1_epi16(2);
18461        let r = _mm512_mask_srl_epi16(a, 0, a, count);
18462        assert_eq_m512i(r, a);
18463        let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18464        let e = _mm512_set1_epi16(0);
18465        assert_eq_m512i(r, e);
18466    }
18467
18468    #[simd_test(enable = "avx512bw")]
18469    unsafe fn test_mm512_maskz_srl_epi16() {
18470        let a = _mm512_set1_epi16(1 << 1);
18471        let count = _mm_set1_epi16(2);
18472        let r = _mm512_maskz_srl_epi16(0, a, count);
18473        assert_eq_m512i(r, _mm512_setzero_si512());
18474        let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count);
18475        let e = _mm512_set1_epi16(0);
18476        assert_eq_m512i(r, e);
18477    }
18478
18479    #[simd_test(enable = "avx512bw,avx512vl")]
18480    unsafe fn test_mm256_mask_srl_epi16() {
18481        let a = _mm256_set1_epi16(1 << 1);
18482        let count = _mm_set1_epi16(2);
18483        let r = _mm256_mask_srl_epi16(a, 0, a, count);
18484        assert_eq_m256i(r, a);
18485        let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count);
18486        let e = _mm256_set1_epi16(0);
18487        assert_eq_m256i(r, e);
18488    }
18489
18490    #[simd_test(enable = "avx512bw,avx512vl")]
18491    unsafe fn test_mm256_maskz_srl_epi16() {
18492        let a = _mm256_set1_epi16(1 << 1);
18493        let count = _mm_set1_epi16(2);
18494        let r = _mm256_maskz_srl_epi16(0, a, count);
18495        assert_eq_m256i(r, _mm256_setzero_si256());
18496        let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count);
18497        let e = _mm256_set1_epi16(0);
18498        assert_eq_m256i(r, e);
18499    }
18500
18501    #[simd_test(enable = "avx512bw,avx512vl")]
18502    unsafe fn test_mm_mask_srl_epi16() {
18503        let a = _mm_set1_epi16(1 << 1);
18504        let count = _mm_set1_epi16(2);
18505        let r = _mm_mask_srl_epi16(a, 0, a, count);
18506        assert_eq_m128i(r, a);
18507        let r = _mm_mask_srl_epi16(a, 0b11111111, a, count);
18508        let e = _mm_set1_epi16(0);
18509        assert_eq_m128i(r, e);
18510    }
18511
18512    #[simd_test(enable = "avx512bw,avx512vl")]
18513    unsafe fn test_mm_maskz_srl_epi16() {
18514        let a = _mm_set1_epi16(1 << 1);
18515        let count = _mm_set1_epi16(2);
18516        let r = _mm_maskz_srl_epi16(0, a, count);
18517        assert_eq_m128i(r, _mm_setzero_si128());
18518        let r = _mm_maskz_srl_epi16(0b11111111, a, count);
18519        let e = _mm_set1_epi16(0);
18520        assert_eq_m128i(r, e);
18521    }
18522
18523    #[simd_test(enable = "avx512bw")]
18524    const unsafe fn test_mm512_srli_epi16() {
18525        let a = _mm512_set1_epi16(1 << 1);
18526        let r = _mm512_srli_epi16::<2>(a);
18527        let e = _mm512_set1_epi16(0);
18528        assert_eq_m512i(r, e);
18529    }
18530
18531    #[simd_test(enable = "avx512bw")]
18532    const unsafe fn test_mm512_mask_srli_epi16() {
18533        let a = _mm512_set1_epi16(1 << 1);
18534        let r = _mm512_mask_srli_epi16::<2>(a, 0, a);
18535        assert_eq_m512i(r, a);
18536        let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
18537        let e = _mm512_set1_epi16(0);
18538        assert_eq_m512i(r, e);
18539    }
18540
18541    #[simd_test(enable = "avx512bw")]
18542    const unsafe fn test_mm512_maskz_srli_epi16() {
18543        let a = _mm512_set1_epi16(1 << 1);
18544        let r = _mm512_maskz_srli_epi16::<2>(0, a);
18545        assert_eq_m512i(r, _mm512_setzero_si512());
18546        let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
18547        let e = _mm512_set1_epi16(0);
18548        assert_eq_m512i(r, e);
18549    }
18550
18551    #[simd_test(enable = "avx512bw,avx512vl")]
18552    const unsafe fn test_mm256_mask_srli_epi16() {
18553        let a = _mm256_set1_epi16(1 << 1);
18554        let r = _mm256_mask_srli_epi16::<2>(a, 0, a);
18555        assert_eq_m256i(r, a);
18556        let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a);
18557        let e = _mm256_set1_epi16(0);
18558        assert_eq_m256i(r, e);
18559    }
18560
18561    #[simd_test(enable = "avx512bw,avx512vl")]
18562    const unsafe fn test_mm256_maskz_srli_epi16() {
18563        let a = _mm256_set1_epi16(1 << 1);
18564        let r = _mm256_maskz_srli_epi16::<2>(0, a);
18565        assert_eq_m256i(r, _mm256_setzero_si256());
18566        let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a);
18567        let e = _mm256_set1_epi16(0);
18568        assert_eq_m256i(r, e);
18569    }
18570
18571    #[simd_test(enable = "avx512bw,avx512vl")]
18572    const unsafe fn test_mm_mask_srli_epi16() {
18573        let a = _mm_set1_epi16(1 << 1);
18574        let r = _mm_mask_srli_epi16::<2>(a, 0, a);
18575        assert_eq_m128i(r, a);
18576        let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a);
18577        let e = _mm_set1_epi16(0);
18578        assert_eq_m128i(r, e);
18579    }
18580
18581    #[simd_test(enable = "avx512bw,avx512vl")]
18582    const unsafe fn test_mm_maskz_srli_epi16() {
18583        let a = _mm_set1_epi16(1 << 1);
18584        let r = _mm_maskz_srli_epi16::<2>(0, a);
18585        assert_eq_m128i(r, _mm_setzero_si128());
18586        let r = _mm_maskz_srli_epi16::<2>(0b11111111, a);
18587        let e = _mm_set1_epi16(0);
18588        assert_eq_m128i(r, e);
18589    }
18590
18591    #[simd_test(enable = "avx512bw")]
18592    const unsafe fn test_mm512_srlv_epi16() {
18593        let a = _mm512_set1_epi16(1 << 1);
18594        let count = _mm512_set1_epi16(2);
18595        let r = _mm512_srlv_epi16(a, count);
18596        let e = _mm512_set1_epi16(0);
18597        assert_eq_m512i(r, e);
18598    }
18599
18600    #[simd_test(enable = "avx512bw")]
18601    const unsafe fn test_mm512_mask_srlv_epi16() {
18602        let a = _mm512_set1_epi16(1 << 1);
18603        let count = _mm512_set1_epi16(2);
18604        let r = _mm512_mask_srlv_epi16(a, 0, a, count);
18605        assert_eq_m512i(r, a);
18606        let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18607        let e = _mm512_set1_epi16(0);
18608        assert_eq_m512i(r, e);
18609    }
18610
18611    #[simd_test(enable = "avx512bw")]
18612    const unsafe fn test_mm512_maskz_srlv_epi16() {
18613        let a = _mm512_set1_epi16(1 << 1);
18614        let count = _mm512_set1_epi16(2);
18615        let r = _mm512_maskz_srlv_epi16(0, a, count);
18616        assert_eq_m512i(r, _mm512_setzero_si512());
18617        let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count);
18618        let e = _mm512_set1_epi16(0);
18619        assert_eq_m512i(r, e);
18620    }
18621
18622    #[simd_test(enable = "avx512bw,avx512vl")]
18623    const unsafe fn test_mm256_srlv_epi16() {
18624        let a = _mm256_set1_epi16(1 << 1);
18625        let count = _mm256_set1_epi16(2);
18626        let r = _mm256_srlv_epi16(a, count);
18627        let e = _mm256_set1_epi16(0);
18628        assert_eq_m256i(r, e);
18629    }
18630
18631    #[simd_test(enable = "avx512bw,avx512vl")]
18632    const unsafe fn test_mm256_mask_srlv_epi16() {
18633        let a = _mm256_set1_epi16(1 << 1);
18634        let count = _mm256_set1_epi16(2);
18635        let r = _mm256_mask_srlv_epi16(a, 0, a, count);
18636        assert_eq_m256i(r, a);
18637        let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count);
18638        let e = _mm256_set1_epi16(0);
18639        assert_eq_m256i(r, e);
18640    }
18641
18642    #[simd_test(enable = "avx512bw,avx512vl")]
18643    const unsafe fn test_mm256_maskz_srlv_epi16() {
18644        let a = _mm256_set1_epi16(1 << 1);
18645        let count = _mm256_set1_epi16(2);
18646        let r = _mm256_maskz_srlv_epi16(0, a, count);
18647        assert_eq_m256i(r, _mm256_setzero_si256());
18648        let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count);
18649        let e = _mm256_set1_epi16(0);
18650        assert_eq_m256i(r, e);
18651    }
18652
18653    #[simd_test(enable = "avx512bw,avx512vl")]
18654    const unsafe fn test_mm_srlv_epi16() {
18655        let a = _mm_set1_epi16(1 << 1);
18656        let count = _mm_set1_epi16(2);
18657        let r = _mm_srlv_epi16(a, count);
18658        let e = _mm_set1_epi16(0);
18659        assert_eq_m128i(r, e);
18660    }
18661
18662    #[simd_test(enable = "avx512bw,avx512vl")]
18663    const unsafe fn test_mm_mask_srlv_epi16() {
18664        let a = _mm_set1_epi16(1 << 1);
18665        let count = _mm_set1_epi16(2);
18666        let r = _mm_mask_srlv_epi16(a, 0, a, count);
18667        assert_eq_m128i(r, a);
18668        let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count);
18669        let e = _mm_set1_epi16(0);
18670        assert_eq_m128i(r, e);
18671    }
18672
18673    #[simd_test(enable = "avx512bw,avx512vl")]
18674    const unsafe fn test_mm_maskz_srlv_epi16() {
18675        let a = _mm_set1_epi16(1 << 1);
18676        let count = _mm_set1_epi16(2);
18677        let r = _mm_maskz_srlv_epi16(0, a, count);
18678        assert_eq_m128i(r, _mm_setzero_si128());
18679        let r = _mm_maskz_srlv_epi16(0b11111111, a, count);
18680        let e = _mm_set1_epi16(0);
18681        assert_eq_m128i(r, e);
18682    }
18683
18684    #[simd_test(enable = "avx512bw")]
18685    unsafe fn test_mm512_sra_epi16() {
18686        let a = _mm512_set1_epi16(8);
18687        let count = _mm_set1_epi16(1);
18688        let r = _mm512_sra_epi16(a, count);
18689        let e = _mm512_set1_epi16(0);
18690        assert_eq_m512i(r, e);
18691    }
18692
18693    #[simd_test(enable = "avx512bw")]
18694    unsafe fn test_mm512_mask_sra_epi16() {
18695        let a = _mm512_set1_epi16(8);
18696        let count = _mm_set1_epi16(1);
18697        let r = _mm512_mask_sra_epi16(a, 0, a, count);
18698        assert_eq_m512i(r, a);
18699        let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18700        let e = _mm512_set1_epi16(0);
18701        assert_eq_m512i(r, e);
18702    }
18703
18704    #[simd_test(enable = "avx512bw")]
18705    unsafe fn test_mm512_maskz_sra_epi16() {
18706        let a = _mm512_set1_epi16(8);
18707        let count = _mm_set1_epi16(1);
18708        let r = _mm512_maskz_sra_epi16(0, a, count);
18709        assert_eq_m512i(r, _mm512_setzero_si512());
18710        let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count);
18711        let e = _mm512_set1_epi16(0);
18712        assert_eq_m512i(r, e);
18713    }
18714
18715    #[simd_test(enable = "avx512bw,avx512vl")]
18716    unsafe fn test_mm256_mask_sra_epi16() {
18717        let a = _mm256_set1_epi16(8);
18718        let count = _mm_set1_epi16(1);
18719        let r = _mm256_mask_sra_epi16(a, 0, a, count);
18720        assert_eq_m256i(r, a);
18721        let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count);
18722        let e = _mm256_set1_epi16(0);
18723        assert_eq_m256i(r, e);
18724    }
18725
18726    #[simd_test(enable = "avx512bw,avx512vl")]
18727    unsafe fn test_mm256_maskz_sra_epi16() {
18728        let a = _mm256_set1_epi16(8);
18729        let count = _mm_set1_epi16(1);
18730        let r = _mm256_maskz_sra_epi16(0, a, count);
18731        assert_eq_m256i(r, _mm256_setzero_si256());
18732        let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count);
18733        let e = _mm256_set1_epi16(0);
18734        assert_eq_m256i(r, e);
18735    }
18736
18737    #[simd_test(enable = "avx512bw,avx512vl")]
18738    unsafe fn test_mm_mask_sra_epi16() {
18739        let a = _mm_set1_epi16(8);
18740        let count = _mm_set1_epi16(1);
18741        let r = _mm_mask_sra_epi16(a, 0, a, count);
18742        assert_eq_m128i(r, a);
18743        let r = _mm_mask_sra_epi16(a, 0b11111111, a, count);
18744        let e = _mm_set1_epi16(0);
18745        assert_eq_m128i(r, e);
18746    }
18747
18748    #[simd_test(enable = "avx512bw,avx512vl")]
18749    unsafe fn test_mm_maskz_sra_epi16() {
18750        let a = _mm_set1_epi16(8);
18751        let count = _mm_set1_epi16(1);
18752        let r = _mm_maskz_sra_epi16(0, a, count);
18753        assert_eq_m128i(r, _mm_setzero_si128());
18754        let r = _mm_maskz_sra_epi16(0b11111111, a, count);
18755        let e = _mm_set1_epi16(0);
18756        assert_eq_m128i(r, e);
18757    }
18758
18759    #[simd_test(enable = "avx512bw")]
18760    const unsafe fn test_mm512_srai_epi16() {
18761        let a = _mm512_set1_epi16(8);
18762        let r = _mm512_srai_epi16::<2>(a);
18763        let e = _mm512_set1_epi16(2);
18764        assert_eq_m512i(r, e);
18765    }
18766
18767    #[simd_test(enable = "avx512bw")]
18768    const unsafe fn test_mm512_mask_srai_epi16() {
18769        let a = _mm512_set1_epi16(8);
18770        let r = _mm512_mask_srai_epi16::<2>(a, 0, a);
18771        assert_eq_m512i(r, a);
18772        let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
18773        let e = _mm512_set1_epi16(2);
18774        assert_eq_m512i(r, e);
18775    }
18776
18777    #[simd_test(enable = "avx512bw")]
18778    const unsafe fn test_mm512_maskz_srai_epi16() {
18779        let a = _mm512_set1_epi16(8);
18780        let r = _mm512_maskz_srai_epi16::<2>(0, a);
18781        assert_eq_m512i(r, _mm512_setzero_si512());
18782        let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
18783        let e = _mm512_set1_epi16(2);
18784        assert_eq_m512i(r, e);
18785    }
18786
18787    #[simd_test(enable = "avx512bw,avx512vl")]
18788    const unsafe fn test_mm256_mask_srai_epi16() {
18789        let a = _mm256_set1_epi16(8);
18790        let r = _mm256_mask_srai_epi16::<2>(a, 0, a);
18791        assert_eq_m256i(r, a);
18792        let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a);
18793        let e = _mm256_set1_epi16(2);
18794        assert_eq_m256i(r, e);
18795    }
18796
18797    #[simd_test(enable = "avx512bw,avx512vl")]
18798    const unsafe fn test_mm256_maskz_srai_epi16() {
18799        let a = _mm256_set1_epi16(8);
18800        let r = _mm256_maskz_srai_epi16::<2>(0, a);
18801        assert_eq_m256i(r, _mm256_setzero_si256());
18802        let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a);
18803        let e = _mm256_set1_epi16(2);
18804        assert_eq_m256i(r, e);
18805    }
18806
18807    #[simd_test(enable = "avx512bw,avx512vl")]
18808    const unsafe fn test_mm_mask_srai_epi16() {
18809        let a = _mm_set1_epi16(8);
18810        let r = _mm_mask_srai_epi16::<2>(a, 0, a);
18811        assert_eq_m128i(r, a);
18812        let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a);
18813        let e = _mm_set1_epi16(2);
18814        assert_eq_m128i(r, e);
18815    }
18816
18817    #[simd_test(enable = "avx512bw,avx512vl")]
18818    const unsafe fn test_mm_maskz_srai_epi16() {
18819        let a = _mm_set1_epi16(8);
18820        let r = _mm_maskz_srai_epi16::<2>(0, a);
18821        assert_eq_m128i(r, _mm_setzero_si128());
18822        let r = _mm_maskz_srai_epi16::<2>(0b11111111, a);
18823        let e = _mm_set1_epi16(2);
18824        assert_eq_m128i(r, e);
18825    }
18826
18827    #[simd_test(enable = "avx512bw")]
18828    const unsafe fn test_mm512_srav_epi16() {
18829        let a = _mm512_set1_epi16(8);
18830        let count = _mm512_set1_epi16(2);
18831        let r = _mm512_srav_epi16(a, count);
18832        let e = _mm512_set1_epi16(2);
18833        assert_eq_m512i(r, e);
18834    }
18835
18836    #[simd_test(enable = "avx512bw")]
18837    const unsafe fn test_mm512_mask_srav_epi16() {
18838        let a = _mm512_set1_epi16(8);
18839        let count = _mm512_set1_epi16(2);
18840        let r = _mm512_mask_srav_epi16(a, 0, a, count);
18841        assert_eq_m512i(r, a);
18842        let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
18843        let e = _mm512_set1_epi16(2);
18844        assert_eq_m512i(r, e);
18845    }
18846
18847    #[simd_test(enable = "avx512bw")]
18848    const unsafe fn test_mm512_maskz_srav_epi16() {
18849        let a = _mm512_set1_epi16(8);
18850        let count = _mm512_set1_epi16(2);
18851        let r = _mm512_maskz_srav_epi16(0, a, count);
18852        assert_eq_m512i(r, _mm512_setzero_si512());
18853        let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count);
18854        let e = _mm512_set1_epi16(2);
18855        assert_eq_m512i(r, e);
18856    }
18857
18858    #[simd_test(enable = "avx512bw,avx512vl")]
18859    const unsafe fn test_mm256_srav_epi16() {
18860        let a = _mm256_set1_epi16(8);
18861        let count = _mm256_set1_epi16(2);
18862        let r = _mm256_srav_epi16(a, count);
18863        let e = _mm256_set1_epi16(2);
18864        assert_eq_m256i(r, e);
18865    }
18866
18867    #[simd_test(enable = "avx512bw,avx512vl")]
18868    const unsafe fn test_mm256_mask_srav_epi16() {
18869        let a = _mm256_set1_epi16(8);
18870        let count = _mm256_set1_epi16(2);
18871        let r = _mm256_mask_srav_epi16(a, 0, a, count);
18872        assert_eq_m256i(r, a);
18873        let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count);
18874        let e = _mm256_set1_epi16(2);
18875        assert_eq_m256i(r, e);
18876    }
18877
18878    #[simd_test(enable = "avx512bw,avx512vl")]
18879    const unsafe fn test_mm256_maskz_srav_epi16() {
18880        let a = _mm256_set1_epi16(8);
18881        let count = _mm256_set1_epi16(2);
18882        let r = _mm256_maskz_srav_epi16(0, a, count);
18883        assert_eq_m256i(r, _mm256_setzero_si256());
18884        let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count);
18885        let e = _mm256_set1_epi16(2);
18886        assert_eq_m256i(r, e);
18887    }
18888
18889    #[simd_test(enable = "avx512bw,avx512vl")]
18890    const unsafe fn test_mm_srav_epi16() {
18891        let a = _mm_set1_epi16(8);
18892        let count = _mm_set1_epi16(2);
18893        let r = _mm_srav_epi16(a, count);
18894        let e = _mm_set1_epi16(2);
18895        assert_eq_m128i(r, e);
18896    }
18897
18898    #[simd_test(enable = "avx512bw,avx512vl")]
18899    const unsafe fn test_mm_mask_srav_epi16() {
18900        let a = _mm_set1_epi16(8);
18901        let count = _mm_set1_epi16(2);
18902        let r = _mm_mask_srav_epi16(a, 0, a, count);
18903        assert_eq_m128i(r, a);
18904        let r = _mm_mask_srav_epi16(a, 0b11111111, a, count);
18905        let e = _mm_set1_epi16(2);
18906        assert_eq_m128i(r, e);
18907    }
18908
18909    #[simd_test(enable = "avx512bw,avx512vl")]
18910    const unsafe fn test_mm_maskz_srav_epi16() {
18911        let a = _mm_set1_epi16(8);
18912        let count = _mm_set1_epi16(2);
18913        let r = _mm_maskz_srav_epi16(0, a, count);
18914        assert_eq_m128i(r, _mm_setzero_si128());
18915        let r = _mm_maskz_srav_epi16(0b11111111, a, count);
18916        let e = _mm_set1_epi16(2);
18917        assert_eq_m128i(r, e);
18918    }
18919
18920    #[simd_test(enable = "avx512bw")]
18921    unsafe fn test_mm512_permutex2var_epi16() {
18922        #[rustfmt::skip]
18923        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18924                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18925        #[rustfmt::skip]
18926        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18927                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18928        let b = _mm512_set1_epi16(100);
18929        let r = _mm512_permutex2var_epi16(a, idx, b);
18930        #[rustfmt::skip]
18931        let e = _mm512_set_epi16(
18932            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18933            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18934        );
18935        assert_eq_m512i(r, e);
18936    }
18937
18938    #[simd_test(enable = "avx512bw")]
18939    unsafe fn test_mm512_mask_permutex2var_epi16() {
18940        #[rustfmt::skip]
18941        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18942                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18943        #[rustfmt::skip]
18944        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18945                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18946        let b = _mm512_set1_epi16(100);
18947        let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b);
18948        assert_eq_m512i(r, a);
18949        let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b);
18950        #[rustfmt::skip]
18951        let e = _mm512_set_epi16(
18952            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18953            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18954        );
18955        assert_eq_m512i(r, e);
18956    }
18957
18958    #[simd_test(enable = "avx512bw")]
18959    unsafe fn test_mm512_maskz_permutex2var_epi16() {
18960        #[rustfmt::skip]
18961        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18962                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18963        #[rustfmt::skip]
18964        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18965                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18966        let b = _mm512_set1_epi16(100);
18967        let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b);
18968        assert_eq_m512i(r, _mm512_setzero_si512());
18969        let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b);
18970        #[rustfmt::skip]
18971        let e = _mm512_set_epi16(
18972            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18973            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18974        );
18975        assert_eq_m512i(r, e);
18976    }
18977
18978    #[simd_test(enable = "avx512bw")]
18979    unsafe fn test_mm512_mask2_permutex2var_epi16() {
18980        #[rustfmt::skip]
18981        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
18982                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
18983        #[rustfmt::skip]
18984        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
18985                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
18986        let b = _mm512_set1_epi16(100);
18987        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b);
18988        assert_eq_m512i(r, idx);
18989        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b);
18990        #[rustfmt::skip]
18991        let e = _mm512_set_epi16(
18992            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
18993            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
18994        );
18995        assert_eq_m512i(r, e);
18996    }
18997
18998    #[simd_test(enable = "avx512bw,avx512vl")]
18999    unsafe fn test_mm256_permutex2var_epi16() {
19000        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19001        #[rustfmt::skip]
19002        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19003        let b = _mm256_set1_epi16(100);
19004        let r = _mm256_permutex2var_epi16(a, idx, b);
19005        let e = _mm256_set_epi16(
19006            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19007        );
19008        assert_eq_m256i(r, e);
19009    }
19010
19011    #[simd_test(enable = "avx512bw,avx512vl")]
19012    unsafe fn test_mm256_mask_permutex2var_epi16() {
19013        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19014        #[rustfmt::skip]
19015        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19016        let b = _mm256_set1_epi16(100);
19017        let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b);
19018        assert_eq_m256i(r, a);
19019        let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b);
19020        let e = _mm256_set_epi16(
19021            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19022        );
19023        assert_eq_m256i(r, e);
19024    }
19025
19026    #[simd_test(enable = "avx512bw,avx512vl")]
19027    unsafe fn test_mm256_maskz_permutex2var_epi16() {
19028        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19029        #[rustfmt::skip]
19030        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19031        let b = _mm256_set1_epi16(100);
19032        let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b);
19033        assert_eq_m256i(r, _mm256_setzero_si256());
19034        let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b);
19035        let e = _mm256_set_epi16(
19036            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19037        );
19038        assert_eq_m256i(r, e);
19039    }
19040
19041    #[simd_test(enable = "avx512bw,avx512vl")]
19042    unsafe fn test_mm256_mask2_permutex2var_epi16() {
19043        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19044        #[rustfmt::skip]
19045        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
19046        let b = _mm256_set1_epi16(100);
19047        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b);
19048        assert_eq_m256i(r, idx);
19049        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b);
19050        #[rustfmt::skip]
19051        let e = _mm256_set_epi16(
19052            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
19053        );
19054        assert_eq_m256i(r, e);
19055    }
19056
19057    #[simd_test(enable = "avx512bw,avx512vl")]
19058    unsafe fn test_mm_permutex2var_epi16() {
19059        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19060        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19061        let b = _mm_set1_epi16(100);
19062        let r = _mm_permutex2var_epi16(a, idx, b);
19063        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19064        assert_eq_m128i(r, e);
19065    }
19066
19067    #[simd_test(enable = "avx512bw,avx512vl")]
19068    unsafe fn test_mm_mask_permutex2var_epi16() {
19069        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19070        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19071        let b = _mm_set1_epi16(100);
19072        let r = _mm_mask_permutex2var_epi16(a, 0, idx, b);
19073        assert_eq_m128i(r, a);
19074        let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b);
19075        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19076        assert_eq_m128i(r, e);
19077    }
19078
19079    #[simd_test(enable = "avx512bw,avx512vl")]
19080    unsafe fn test_mm_maskz_permutex2var_epi16() {
19081        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19082        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19083        let b = _mm_set1_epi16(100);
19084        let r = _mm_maskz_permutex2var_epi16(0, a, idx, b);
19085        assert_eq_m128i(r, _mm_setzero_si128());
19086        let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b);
19087        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19088        assert_eq_m128i(r, e);
19089    }
19090
19091    #[simd_test(enable = "avx512bw,avx512vl")]
19092    unsafe fn test_mm_mask2_permutex2var_epi16() {
19093        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19094        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
19095        let b = _mm_set1_epi16(100);
19096        let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b);
19097        assert_eq_m128i(r, idx);
19098        let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b);
19099        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
19100        assert_eq_m128i(r, e);
19101    }
19102
19103    #[simd_test(enable = "avx512bw")]
19104    unsafe fn test_mm512_permutexvar_epi16() {
19105        let idx = _mm512_set1_epi16(1);
19106        #[rustfmt::skip]
19107        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19108                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19109        let r = _mm512_permutexvar_epi16(idx, a);
19110        let e = _mm512_set1_epi16(30);
19111        assert_eq_m512i(r, e);
19112    }
19113
19114    #[simd_test(enable = "avx512bw")]
19115    unsafe fn test_mm512_mask_permutexvar_epi16() {
19116        let idx = _mm512_set1_epi16(1);
19117        #[rustfmt::skip]
19118        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19119                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19120        let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a);
19121        assert_eq_m512i(r, a);
19122        let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a);
19123        let e = _mm512_set1_epi16(30);
19124        assert_eq_m512i(r, e);
19125    }
19126
19127    #[simd_test(enable = "avx512bw")]
19128    unsafe fn test_mm512_maskz_permutexvar_epi16() {
19129        let idx = _mm512_set1_epi16(1);
19130        #[rustfmt::skip]
19131        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19132                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19133        let r = _mm512_maskz_permutexvar_epi16(0, idx, a);
19134        assert_eq_m512i(r, _mm512_setzero_si512());
19135        let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a);
19136        let e = _mm512_set1_epi16(30);
19137        assert_eq_m512i(r, e);
19138    }
19139
19140    #[simd_test(enable = "avx512bw,avx512vl")]
19141    unsafe fn test_mm256_permutexvar_epi16() {
19142        let idx = _mm256_set1_epi16(1);
19143        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19144        let r = _mm256_permutexvar_epi16(idx, a);
19145        let e = _mm256_set1_epi16(14);
19146        assert_eq_m256i(r, e);
19147    }
19148
19149    #[simd_test(enable = "avx512bw,avx512vl")]
19150    unsafe fn test_mm256_mask_permutexvar_epi16() {
19151        let idx = _mm256_set1_epi16(1);
19152        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19153        let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a);
19154        assert_eq_m256i(r, a);
19155        let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a);
19156        let e = _mm256_set1_epi16(14);
19157        assert_eq_m256i(r, e);
19158    }
19159
19160    #[simd_test(enable = "avx512bw,avx512vl")]
19161    unsafe fn test_mm256_maskz_permutexvar_epi16() {
19162        let idx = _mm256_set1_epi16(1);
19163        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19164        let r = _mm256_maskz_permutexvar_epi16(0, idx, a);
19165        assert_eq_m256i(r, _mm256_setzero_si256());
19166        let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a);
19167        let e = _mm256_set1_epi16(14);
19168        assert_eq_m256i(r, e);
19169    }
19170
19171    #[simd_test(enable = "avx512bw,avx512vl")]
19172    unsafe fn test_mm_permutexvar_epi16() {
19173        let idx = _mm_set1_epi16(1);
19174        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19175        let r = _mm_permutexvar_epi16(idx, a);
19176        let e = _mm_set1_epi16(6);
19177        assert_eq_m128i(r, e);
19178    }
19179
19180    #[simd_test(enable = "avx512bw,avx512vl")]
19181    unsafe fn test_mm_mask_permutexvar_epi16() {
19182        let idx = _mm_set1_epi16(1);
19183        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19184        let r = _mm_mask_permutexvar_epi16(a, 0, idx, a);
19185        assert_eq_m128i(r, a);
19186        let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a);
19187        let e = _mm_set1_epi16(6);
19188        assert_eq_m128i(r, e);
19189    }
19190
19191    #[simd_test(enable = "avx512bw,avx512vl")]
19192    unsafe fn test_mm_maskz_permutexvar_epi16() {
19193        let idx = _mm_set1_epi16(1);
19194        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19195        let r = _mm_maskz_permutexvar_epi16(0, idx, a);
19196        assert_eq_m128i(r, _mm_setzero_si128());
19197        let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a);
19198        let e = _mm_set1_epi16(6);
19199        assert_eq_m128i(r, e);
19200    }
19201
19202    #[simd_test(enable = "avx512bw")]
19203    const unsafe fn test_mm512_mask_blend_epi16() {
19204        let a = _mm512_set1_epi16(1);
19205        let b = _mm512_set1_epi16(2);
19206        let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b);
19207        #[rustfmt::skip]
19208        let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19209                                 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19210        assert_eq_m512i(r, e);
19211    }
19212
19213    #[simd_test(enable = "avx512bw,avx512vl")]
19214    const unsafe fn test_mm256_mask_blend_epi16() {
19215        let a = _mm256_set1_epi16(1);
19216        let b = _mm256_set1_epi16(2);
19217        let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b);
19218        let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19219        assert_eq_m256i(r, e);
19220    }
19221
19222    #[simd_test(enable = "avx512bw,avx512vl")]
19223    const unsafe fn test_mm_mask_blend_epi16() {
19224        let a = _mm_set1_epi16(1);
19225        let b = _mm_set1_epi16(2);
19226        let r = _mm_mask_blend_epi16(0b11110000, a, b);
19227        let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
19228        assert_eq_m128i(r, e);
19229    }
19230
19231    #[simd_test(enable = "avx512bw")]
19232    const unsafe fn test_mm512_mask_blend_epi8() {
19233        let a = _mm512_set1_epi8(1);
19234        let b = _mm512_set1_epi8(2);
19235        let r = _mm512_mask_blend_epi8(
19236            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000,
19237            a,
19238            b,
19239        );
19240        #[rustfmt::skip]
19241        let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19242                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19243                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19244                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19245        assert_eq_m512i(r, e);
19246    }
19247
19248    #[simd_test(enable = "avx512bw,avx512vl")]
19249    const unsafe fn test_mm256_mask_blend_epi8() {
19250        let a = _mm256_set1_epi8(1);
19251        let b = _mm256_set1_epi8(2);
19252        let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b);
19253        #[rustfmt::skip]
19254        let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
19255                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19256        assert_eq_m256i(r, e);
19257    }
19258
19259    #[simd_test(enable = "avx512bw,avx512vl")]
19260    const unsafe fn test_mm_mask_blend_epi8() {
19261        let a = _mm_set1_epi8(1);
19262        let b = _mm_set1_epi8(2);
19263        let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b);
19264        let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
19265        assert_eq_m128i(r, e);
19266    }
19267
19268    #[simd_test(enable = "avx512bw")]
19269    const unsafe fn test_mm512_broadcastw_epi16() {
19270        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19271        let r = _mm512_broadcastw_epi16(a);
19272        let e = _mm512_set1_epi16(24);
19273        assert_eq_m512i(r, e);
19274    }
19275
19276    #[simd_test(enable = "avx512bw")]
19277    const unsafe fn test_mm512_mask_broadcastw_epi16() {
19278        let src = _mm512_set1_epi16(1);
19279        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19280        let r = _mm512_mask_broadcastw_epi16(src, 0, a);
19281        assert_eq_m512i(r, src);
19282        let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19283        let e = _mm512_set1_epi16(24);
19284        assert_eq_m512i(r, e);
19285    }
19286
19287    #[simd_test(enable = "avx512bw")]
19288    const unsafe fn test_mm512_maskz_broadcastw_epi16() {
19289        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19290        let r = _mm512_maskz_broadcastw_epi16(0, a);
19291        assert_eq_m512i(r, _mm512_setzero_si512());
19292        let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a);
19293        let e = _mm512_set1_epi16(24);
19294        assert_eq_m512i(r, e);
19295    }
19296
19297    #[simd_test(enable = "avx512bw,avx512vl")]
19298    const unsafe fn test_mm256_mask_broadcastw_epi16() {
19299        let src = _mm256_set1_epi16(1);
19300        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19301        let r = _mm256_mask_broadcastw_epi16(src, 0, a);
19302        assert_eq_m256i(r, src);
19303        let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a);
19304        let e = _mm256_set1_epi16(24);
19305        assert_eq_m256i(r, e);
19306    }
19307
19308    #[simd_test(enable = "avx512bw,avx512vl")]
19309    const unsafe fn test_mm256_maskz_broadcastw_epi16() {
19310        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19311        let r = _mm256_maskz_broadcastw_epi16(0, a);
19312        assert_eq_m256i(r, _mm256_setzero_si256());
19313        let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a);
19314        let e = _mm256_set1_epi16(24);
19315        assert_eq_m256i(r, e);
19316    }
19317
19318    #[simd_test(enable = "avx512bw,avx512vl")]
19319    const unsafe fn test_mm_mask_broadcastw_epi16() {
19320        let src = _mm_set1_epi16(1);
19321        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19322        let r = _mm_mask_broadcastw_epi16(src, 0, a);
19323        assert_eq_m128i(r, src);
19324        let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a);
19325        let e = _mm_set1_epi16(24);
19326        assert_eq_m128i(r, e);
19327    }
19328
19329    #[simd_test(enable = "avx512bw,avx512vl")]
19330    const unsafe fn test_mm_maskz_broadcastw_epi16() {
19331        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
19332        let r = _mm_maskz_broadcastw_epi16(0, a);
19333        assert_eq_m128i(r, _mm_setzero_si128());
19334        let r = _mm_maskz_broadcastw_epi16(0b11111111, a);
19335        let e = _mm_set1_epi16(24);
19336        assert_eq_m128i(r, e);
19337    }
19338
19339    #[simd_test(enable = "avx512bw")]
19340    const unsafe fn test_mm512_broadcastb_epi8() {
19341        let a = _mm_set_epi8(
19342            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19343        );
19344        let r = _mm512_broadcastb_epi8(a);
19345        let e = _mm512_set1_epi8(32);
19346        assert_eq_m512i(r, e);
19347    }
19348
19349    #[simd_test(enable = "avx512bw")]
19350    const unsafe fn test_mm512_mask_broadcastb_epi8() {
19351        let src = _mm512_set1_epi8(1);
19352        let a = _mm_set_epi8(
19353            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19354        );
19355        let r = _mm512_mask_broadcastb_epi8(src, 0, a);
19356        assert_eq_m512i(r, src);
19357        let r = _mm512_mask_broadcastb_epi8(
19358            src,
19359            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19360            a,
19361        );
19362        let e = _mm512_set1_epi8(32);
19363        assert_eq_m512i(r, e);
19364    }
19365
19366    #[simd_test(enable = "avx512bw")]
19367    const unsafe fn test_mm512_maskz_broadcastb_epi8() {
19368        let a = _mm_set_epi8(
19369            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19370        );
19371        let r = _mm512_maskz_broadcastb_epi8(0, a);
19372        assert_eq_m512i(r, _mm512_setzero_si512());
19373        let r = _mm512_maskz_broadcastb_epi8(
19374            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19375            a,
19376        );
19377        let e = _mm512_set1_epi8(32);
19378        assert_eq_m512i(r, e);
19379    }
19380
19381    #[simd_test(enable = "avx512bw,avx512vl")]
19382    const unsafe fn test_mm256_mask_broadcastb_epi8() {
19383        let src = _mm256_set1_epi8(1);
19384        let a = _mm_set_epi8(
19385            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19386        );
19387        let r = _mm256_mask_broadcastb_epi8(src, 0, a);
19388        assert_eq_m256i(r, src);
19389        let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19390        let e = _mm256_set1_epi8(32);
19391        assert_eq_m256i(r, e);
19392    }
19393
19394    #[simd_test(enable = "avx512bw,avx512vl")]
19395    const unsafe fn test_mm256_maskz_broadcastb_epi8() {
19396        let a = _mm_set_epi8(
19397            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19398        );
19399        let r = _mm256_maskz_broadcastb_epi8(0, a);
19400        assert_eq_m256i(r, _mm256_setzero_si256());
19401        let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a);
19402        let e = _mm256_set1_epi8(32);
19403        assert_eq_m256i(r, e);
19404    }
19405
19406    #[simd_test(enable = "avx512bw,avx512vl")]
19407    const unsafe fn test_mm_mask_broadcastb_epi8() {
19408        let src = _mm_set1_epi8(1);
19409        let a = _mm_set_epi8(
19410            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19411        );
19412        let r = _mm_mask_broadcastb_epi8(src, 0, a);
19413        assert_eq_m128i(r, src);
19414        let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a);
19415        let e = _mm_set1_epi8(32);
19416        assert_eq_m128i(r, e);
19417    }
19418
19419    #[simd_test(enable = "avx512bw,avx512vl")]
19420    const unsafe fn test_mm_maskz_broadcastb_epi8() {
19421        let a = _mm_set_epi8(
19422            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19423        );
19424        let r = _mm_maskz_broadcastb_epi8(0, a);
19425        assert_eq_m128i(r, _mm_setzero_si128());
19426        let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a);
19427        let e = _mm_set1_epi8(32);
19428        assert_eq_m128i(r, e);
19429    }
19430
19431    #[simd_test(enable = "avx512bw")]
19432    const unsafe fn test_mm512_unpackhi_epi16() {
19433        #[rustfmt::skip]
19434        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19435                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19436        #[rustfmt::skip]
19437        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19438                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19439        let r = _mm512_unpackhi_epi16(a, b);
19440        #[rustfmt::skip]
19441        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
19442                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
19443        assert_eq_m512i(r, e);
19444    }
19445
19446    #[simd_test(enable = "avx512bw")]
19447    const unsafe fn test_mm512_mask_unpackhi_epi16() {
19448        #[rustfmt::skip]
19449        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19450                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19451        #[rustfmt::skip]
19452        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19453                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19454        let r = _mm512_mask_unpackhi_epi16(a, 0, a, b);
19455        assert_eq_m512i(r, a);
19456        let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
19457        #[rustfmt::skip]
19458        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
19459                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
19460        assert_eq_m512i(r, e);
19461    }
19462
19463    #[simd_test(enable = "avx512bw")]
19464    const unsafe fn test_mm512_maskz_unpackhi_epi16() {
19465        #[rustfmt::skip]
19466        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19467                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19468        #[rustfmt::skip]
19469        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19470                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19471        let r = _mm512_maskz_unpackhi_epi16(0, a, b);
19472        assert_eq_m512i(r, _mm512_setzero_si512());
19473        let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b);
19474        #[rustfmt::skip]
19475        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
19476                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
19477        assert_eq_m512i(r, e);
19478    }
19479
19480    #[simd_test(enable = "avx512bw,avx512vl")]
19481    const unsafe fn test_mm256_mask_unpackhi_epi16() {
19482        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19483        let b = _mm256_set_epi16(
19484            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19485        );
19486        let r = _mm256_mask_unpackhi_epi16(a, 0, a, b);
19487        assert_eq_m256i(r, a);
19488        let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b);
19489        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
19490        assert_eq_m256i(r, e);
19491    }
19492
19493    #[simd_test(enable = "avx512bw,avx512vl")]
19494    const unsafe fn test_mm256_maskz_unpackhi_epi16() {
19495        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19496        let b = _mm256_set_epi16(
19497            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19498        );
19499        let r = _mm256_maskz_unpackhi_epi16(0, a, b);
19500        assert_eq_m256i(r, _mm256_setzero_si256());
19501        let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b);
19502        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
19503        assert_eq_m256i(r, e);
19504    }
19505
19506    #[simd_test(enable = "avx512bw,avx512vl")]
19507    const unsafe fn test_mm_mask_unpackhi_epi16() {
19508        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19509        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19510        let r = _mm_mask_unpackhi_epi16(a, 0, a, b);
19511        assert_eq_m128i(r, a);
19512        let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b);
19513        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
19514        assert_eq_m128i(r, e);
19515    }
19516
19517    #[simd_test(enable = "avx512bw,avx512vl")]
19518    const unsafe fn test_mm_maskz_unpackhi_epi16() {
19519        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19520        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19521        let r = _mm_maskz_unpackhi_epi16(0, a, b);
19522        assert_eq_m128i(r, _mm_setzero_si128());
19523        let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b);
19524        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
19525        assert_eq_m128i(r, e);
19526    }
19527
19528    #[simd_test(enable = "avx512bw")]
19529    const unsafe fn test_mm512_unpackhi_epi8() {
19530        #[rustfmt::skip]
19531        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19532                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19533                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19534                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19535        #[rustfmt::skip]
19536        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19537                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19538                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19539                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19540        let r = _mm512_unpackhi_epi8(a, b);
19541        #[rustfmt::skip]
19542        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19543                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
19544                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
19545                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
19546        assert_eq_m512i(r, e);
19547    }
19548
19549    #[simd_test(enable = "avx512bw")]
19550    const unsafe fn test_mm512_mask_unpackhi_epi8() {
19551        #[rustfmt::skip]
19552        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19553                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19554                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19555                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19556        #[rustfmt::skip]
19557        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19558                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19559                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19560                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19561        let r = _mm512_mask_unpackhi_epi8(a, 0, a, b);
19562        assert_eq_m512i(r, a);
19563        let r = _mm512_mask_unpackhi_epi8(
19564            a,
19565            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19566            a,
19567            b,
19568        );
19569        #[rustfmt::skip]
19570        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19571                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
19572                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
19573                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
19574        assert_eq_m512i(r, e);
19575    }
19576
19577    #[simd_test(enable = "avx512bw")]
19578    const unsafe fn test_mm512_maskz_unpackhi_epi8() {
19579        #[rustfmt::skip]
19580        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19581                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19582                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19583                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19584        #[rustfmt::skip]
19585        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19586                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19587                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19588                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19589        let r = _mm512_maskz_unpackhi_epi8(0, a, b);
19590        assert_eq_m512i(r, _mm512_setzero_si512());
19591        let r = _mm512_maskz_unpackhi_epi8(
19592            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19593            a,
19594            b,
19595        );
19596        #[rustfmt::skip]
19597        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19598                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
19599                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
19600                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
19601        assert_eq_m512i(r, e);
19602    }
19603
19604    #[simd_test(enable = "avx512bw,avx512vl")]
19605    const unsafe fn test_mm256_mask_unpackhi_epi8() {
19606        #[rustfmt::skip]
19607        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19608                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19609        #[rustfmt::skip]
19610        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19611                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19612        let r = _mm256_mask_unpackhi_epi8(a, 0, a, b);
19613        assert_eq_m256i(r, a);
19614        let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19615        #[rustfmt::skip]
19616        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19617                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
19618        assert_eq_m256i(r, e);
19619    }
19620
19621    #[simd_test(enable = "avx512bw,avx512vl")]
19622    const unsafe fn test_mm256_maskz_unpackhi_epi8() {
19623        #[rustfmt::skip]
19624        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19625                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19626        #[rustfmt::skip]
19627        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19628                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19629        let r = _mm256_maskz_unpackhi_epi8(0, a, b);
19630        assert_eq_m256i(r, _mm256_setzero_si256());
19631        let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b);
19632        #[rustfmt::skip]
19633        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
19634                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
19635        assert_eq_m256i(r, e);
19636    }
19637
19638    #[simd_test(enable = "avx512bw,avx512vl")]
19639    const unsafe fn test_mm_mask_unpackhi_epi8() {
19640        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19641        let b = _mm_set_epi8(
19642            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19643        );
19644        let r = _mm_mask_unpackhi_epi8(a, 0, a, b);
19645        assert_eq_m128i(r, a);
19646        let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b);
19647        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
19648        assert_eq_m128i(r, e);
19649    }
19650
19651    #[simd_test(enable = "avx512bw,avx512vl")]
19652    const unsafe fn test_mm_maskz_unpackhi_epi8() {
19653        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19654        let b = _mm_set_epi8(
19655            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19656        );
19657        let r = _mm_maskz_unpackhi_epi8(0, a, b);
19658        assert_eq_m128i(r, _mm_setzero_si128());
19659        let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b);
19660        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
19661        assert_eq_m128i(r, e);
19662    }
19663
19664    #[simd_test(enable = "avx512bw")]
19665    const unsafe fn test_mm512_unpacklo_epi16() {
19666        #[rustfmt::skip]
19667        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19668                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19669        #[rustfmt::skip]
19670        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19671                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19672        let r = _mm512_unpacklo_epi16(a, b);
19673        #[rustfmt::skip]
19674        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19675                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19676        assert_eq_m512i(r, e);
19677    }
19678
19679    #[simd_test(enable = "avx512bw")]
19680    const unsafe fn test_mm512_mask_unpacklo_epi16() {
19681        #[rustfmt::skip]
19682        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19683                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19684        #[rustfmt::skip]
19685        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19686                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19687        let r = _mm512_mask_unpacklo_epi16(a, 0, a, b);
19688        assert_eq_m512i(r, a);
19689        let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
19690        #[rustfmt::skip]
19691        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19692                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19693        assert_eq_m512i(r, e);
19694    }
19695
19696    #[simd_test(enable = "avx512bw")]
19697    const unsafe fn test_mm512_maskz_unpacklo_epi16() {
19698        #[rustfmt::skip]
19699        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19700                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19701        #[rustfmt::skip]
19702        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19703                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19704        let r = _mm512_maskz_unpacklo_epi16(0, a, b);
19705        assert_eq_m512i(r, _mm512_setzero_si512());
19706        let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b);
19707        #[rustfmt::skip]
19708        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
19709                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
19710        assert_eq_m512i(r, e);
19711    }
19712
19713    #[simd_test(enable = "avx512bw,avx512vl")]
19714    const unsafe fn test_mm256_mask_unpacklo_epi16() {
19715        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19716        let b = _mm256_set_epi16(
19717            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19718        );
19719        let r = _mm256_mask_unpacklo_epi16(a, 0, a, b);
19720        assert_eq_m256i(r, a);
19721        let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b);
19722        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
19723        assert_eq_m256i(r, e);
19724    }
19725
19726    #[simd_test(enable = "avx512bw,avx512vl")]
19727    const unsafe fn test_mm256_maskz_unpacklo_epi16() {
19728        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19729        let b = _mm256_set_epi16(
19730            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19731        );
19732        let r = _mm256_maskz_unpacklo_epi16(0, a, b);
19733        assert_eq_m256i(r, _mm256_setzero_si256());
19734        let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b);
19735        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
19736        assert_eq_m256i(r, e);
19737    }
19738
19739    #[simd_test(enable = "avx512bw,avx512vl")]
19740    const unsafe fn test_mm_mask_unpacklo_epi16() {
19741        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19742        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19743        let r = _mm_mask_unpacklo_epi16(a, 0, a, b);
19744        assert_eq_m128i(r, a);
19745        let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b);
19746        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
19747        assert_eq_m128i(r, e);
19748    }
19749
19750    #[simd_test(enable = "avx512bw,avx512vl")]
19751    const unsafe fn test_mm_maskz_unpacklo_epi16() {
19752        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
19753        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
19754        let r = _mm_maskz_unpacklo_epi16(0, a, b);
19755        assert_eq_m128i(r, _mm_setzero_si128());
19756        let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b);
19757        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
19758        assert_eq_m128i(r, e);
19759    }
19760
19761    #[simd_test(enable = "avx512bw")]
19762    const unsafe fn test_mm512_unpacklo_epi8() {
19763        #[rustfmt::skip]
19764        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19765                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19766                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19767                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19768        #[rustfmt::skip]
19769        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19770                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19771                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19772                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19773        let r = _mm512_unpacklo_epi8(a, b);
19774        #[rustfmt::skip]
19775        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19776                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19777                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19778                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19779        assert_eq_m512i(r, e);
19780    }
19781
19782    #[simd_test(enable = "avx512bw")]
19783    const unsafe fn test_mm512_mask_unpacklo_epi8() {
19784        #[rustfmt::skip]
19785        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19786                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19787                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19788                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19789        #[rustfmt::skip]
19790        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19791                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19792                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19793                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19794        let r = _mm512_mask_unpacklo_epi8(a, 0, a, b);
19795        assert_eq_m512i(r, a);
19796        let r = _mm512_mask_unpacklo_epi8(
19797            a,
19798            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19799            a,
19800            b,
19801        );
19802        #[rustfmt::skip]
19803        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19804                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19805                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19806                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19807        assert_eq_m512i(r, e);
19808    }
19809
19810    #[simd_test(enable = "avx512bw")]
19811    const unsafe fn test_mm512_maskz_unpacklo_epi8() {
19812        #[rustfmt::skip]
19813        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19814                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
19815                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
19816                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
19817        #[rustfmt::skip]
19818        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19819                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
19820                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
19821                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
19822        let r = _mm512_maskz_unpacklo_epi8(0, a, b);
19823        assert_eq_m512i(r, _mm512_setzero_si512());
19824        let r = _mm512_maskz_unpacklo_epi8(
19825            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19826            a,
19827            b,
19828        );
19829        #[rustfmt::skip]
19830        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19831                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
19832                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
19833                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
19834        assert_eq_m512i(r, e);
19835    }
19836
19837    #[simd_test(enable = "avx512bw,avx512vl")]
19838    const unsafe fn test_mm256_mask_unpacklo_epi8() {
19839        #[rustfmt::skip]
19840        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19841                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19842        #[rustfmt::skip]
19843        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19844                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19845        let r = _mm256_mask_unpacklo_epi8(a, 0, a, b);
19846        assert_eq_m256i(r, a);
19847        let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19848        #[rustfmt::skip]
19849        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19850                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
19851        assert_eq_m256i(r, e);
19852    }
19853
19854    #[simd_test(enable = "avx512bw,avx512vl")]
19855    const unsafe fn test_mm256_maskz_unpacklo_epi8() {
19856        #[rustfmt::skip]
19857        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
19858                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
19859        #[rustfmt::skip]
19860        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
19861                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
19862        let r = _mm256_maskz_unpacklo_epi8(0, a, b);
19863        assert_eq_m256i(r, _mm256_setzero_si256());
19864        let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b);
19865        #[rustfmt::skip]
19866        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
19867                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
19868        assert_eq_m256i(r, e);
19869    }
19870
19871    #[simd_test(enable = "avx512bw,avx512vl")]
19872    const unsafe fn test_mm_mask_unpacklo_epi8() {
19873        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19874        let b = _mm_set_epi8(
19875            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19876        );
19877        let r = _mm_mask_unpacklo_epi8(a, 0, a, b);
19878        assert_eq_m128i(r, a);
19879        let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b);
19880        let e = _mm_set_epi8(
19881            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
19882        );
19883        assert_eq_m128i(r, e);
19884    }
19885
19886    #[simd_test(enable = "avx512bw,avx512vl")]
19887    const unsafe fn test_mm_maskz_unpacklo_epi8() {
19888        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
19889        let b = _mm_set_epi8(
19890            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
19891        );
19892        let r = _mm_maskz_unpacklo_epi8(0, a, b);
19893        assert_eq_m128i(r, _mm_setzero_si128());
19894        let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b);
19895        let e = _mm_set_epi8(
19896            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
19897        );
19898        assert_eq_m128i(r, e);
19899    }
19900
19901    #[simd_test(enable = "avx512bw")]
19902    const unsafe fn test_mm512_mask_mov_epi16() {
19903        let src = _mm512_set1_epi16(1);
19904        let a = _mm512_set1_epi16(2);
19905        let r = _mm512_mask_mov_epi16(src, 0, a);
19906        assert_eq_m512i(r, src);
19907        let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a);
19908        assert_eq_m512i(r, a);
19909    }
19910
19911    #[simd_test(enable = "avx512bw")]
19912    const unsafe fn test_mm512_maskz_mov_epi16() {
19913        let a = _mm512_set1_epi16(2);
19914        let r = _mm512_maskz_mov_epi16(0, a);
19915        assert_eq_m512i(r, _mm512_setzero_si512());
19916        let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a);
19917        assert_eq_m512i(r, a);
19918    }
19919
19920    #[simd_test(enable = "avx512bw,avx512vl")]
19921    const unsafe fn test_mm256_mask_mov_epi16() {
19922        let src = _mm256_set1_epi16(1);
19923        let a = _mm256_set1_epi16(2);
19924        let r = _mm256_mask_mov_epi16(src, 0, a);
19925        assert_eq_m256i(r, src);
19926        let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a);
19927        assert_eq_m256i(r, a);
19928    }
19929
19930    #[simd_test(enable = "avx512bw,avx512vl")]
19931    const unsafe fn test_mm256_maskz_mov_epi16() {
19932        let a = _mm256_set1_epi16(2);
19933        let r = _mm256_maskz_mov_epi16(0, a);
19934        assert_eq_m256i(r, _mm256_setzero_si256());
19935        let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a);
19936        assert_eq_m256i(r, a);
19937    }
19938
19939    #[simd_test(enable = "avx512bw,avx512vl")]
19940    const unsafe fn test_mm_mask_mov_epi16() {
19941        let src = _mm_set1_epi16(1);
19942        let a = _mm_set1_epi16(2);
19943        let r = _mm_mask_mov_epi16(src, 0, a);
19944        assert_eq_m128i(r, src);
19945        let r = _mm_mask_mov_epi16(src, 0b11111111, a);
19946        assert_eq_m128i(r, a);
19947    }
19948
19949    #[simd_test(enable = "avx512bw,avx512vl")]
19950    const unsafe fn test_mm_maskz_mov_epi16() {
19951        let a = _mm_set1_epi16(2);
19952        let r = _mm_maskz_mov_epi16(0, a);
19953        assert_eq_m128i(r, _mm_setzero_si128());
19954        let r = _mm_maskz_mov_epi16(0b11111111, a);
19955        assert_eq_m128i(r, a);
19956    }
19957
19958    #[simd_test(enable = "avx512bw")]
19959    const unsafe fn test_mm512_mask_mov_epi8() {
19960        let src = _mm512_set1_epi8(1);
19961        let a = _mm512_set1_epi8(2);
19962        let r = _mm512_mask_mov_epi8(src, 0, a);
19963        assert_eq_m512i(r, src);
19964        let r = _mm512_mask_mov_epi8(
19965            src,
19966            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19967            a,
19968        );
19969        assert_eq_m512i(r, a);
19970    }
19971
19972    #[simd_test(enable = "avx512bw")]
19973    const unsafe fn test_mm512_maskz_mov_epi8() {
19974        let a = _mm512_set1_epi8(2);
19975        let r = _mm512_maskz_mov_epi8(0, a);
19976        assert_eq_m512i(r, _mm512_setzero_si512());
19977        let r = _mm512_maskz_mov_epi8(
19978            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19979            a,
19980        );
19981        assert_eq_m512i(r, a);
19982    }
19983
19984    #[simd_test(enable = "avx512bw,avx512vl")]
19985    const unsafe fn test_mm256_mask_mov_epi8() {
19986        let src = _mm256_set1_epi8(1);
19987        let a = _mm256_set1_epi8(2);
19988        let r = _mm256_mask_mov_epi8(src, 0, a);
19989        assert_eq_m256i(r, src);
19990        let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a);
19991        assert_eq_m256i(r, a);
19992    }
19993
19994    #[simd_test(enable = "avx512bw,avx512vl")]
19995    const unsafe fn test_mm256_maskz_mov_epi8() {
19996        let a = _mm256_set1_epi8(2);
19997        let r = _mm256_maskz_mov_epi8(0, a);
19998        assert_eq_m256i(r, _mm256_setzero_si256());
19999        let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a);
20000        assert_eq_m256i(r, a);
20001    }
20002
20003    #[simd_test(enable = "avx512bw,avx512vl")]
20004    const unsafe fn test_mm_mask_mov_epi8() {
20005        let src = _mm_set1_epi8(1);
20006        let a = _mm_set1_epi8(2);
20007        let r = _mm_mask_mov_epi8(src, 0, a);
20008        assert_eq_m128i(r, src);
20009        let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a);
20010        assert_eq_m128i(r, a);
20011    }
20012
20013    #[simd_test(enable = "avx512bw,avx512vl")]
20014    const unsafe fn test_mm_maskz_mov_epi8() {
20015        let a = _mm_set1_epi8(2);
20016        let r = _mm_maskz_mov_epi8(0, a);
20017        assert_eq_m128i(r, _mm_setzero_si128());
20018        let r = _mm_maskz_mov_epi8(0b11111111_11111111, a);
20019        assert_eq_m128i(r, a);
20020    }
20021
20022    #[simd_test(enable = "avx512bw")]
20023    const unsafe fn test_mm512_mask_set1_epi16() {
20024        let src = _mm512_set1_epi16(2);
20025        let a: i16 = 11;
20026        let r = _mm512_mask_set1_epi16(src, 0, a);
20027        assert_eq_m512i(r, src);
20028        let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20029        let e = _mm512_set1_epi16(11);
20030        assert_eq_m512i(r, e);
20031    }
20032
20033    #[simd_test(enable = "avx512bw")]
20034    const unsafe fn test_mm512_maskz_set1_epi16() {
20035        let a: i16 = 11;
20036        let r = _mm512_maskz_set1_epi16(0, a);
20037        assert_eq_m512i(r, _mm512_setzero_si512());
20038        let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a);
20039        let e = _mm512_set1_epi16(11);
20040        assert_eq_m512i(r, e);
20041    }
20042
20043    #[simd_test(enable = "avx512bw,avx512vl")]
20044    const unsafe fn test_mm256_mask_set1_epi16() {
20045        let src = _mm256_set1_epi16(2);
20046        let a: i16 = 11;
20047        let r = _mm256_mask_set1_epi16(src, 0, a);
20048        assert_eq_m256i(r, src);
20049        let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a);
20050        let e = _mm256_set1_epi16(11);
20051        assert_eq_m256i(r, e);
20052    }
20053
20054    #[simd_test(enable = "avx512bw,avx512vl")]
20055    const unsafe fn test_mm256_maskz_set1_epi16() {
20056        let a: i16 = 11;
20057        let r = _mm256_maskz_set1_epi16(0, a);
20058        assert_eq_m256i(r, _mm256_setzero_si256());
20059        let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a);
20060        let e = _mm256_set1_epi16(11);
20061        assert_eq_m256i(r, e);
20062    }
20063
20064    #[simd_test(enable = "avx512bw,avx512vl")]
20065    const unsafe fn test_mm_mask_set1_epi16() {
20066        let src = _mm_set1_epi16(2);
20067        let a: i16 = 11;
20068        let r = _mm_mask_set1_epi16(src, 0, a);
20069        assert_eq_m128i(r, src);
20070        let r = _mm_mask_set1_epi16(src, 0b11111111, a);
20071        let e = _mm_set1_epi16(11);
20072        assert_eq_m128i(r, e);
20073    }
20074
20075    #[simd_test(enable = "avx512bw,avx512vl")]
20076    const unsafe fn test_mm_maskz_set1_epi16() {
20077        let a: i16 = 11;
20078        let r = _mm_maskz_set1_epi16(0, a);
20079        assert_eq_m128i(r, _mm_setzero_si128());
20080        let r = _mm_maskz_set1_epi16(0b11111111, a);
20081        let e = _mm_set1_epi16(11);
20082        assert_eq_m128i(r, e);
20083    }
20084
20085    #[simd_test(enable = "avx512bw")]
20086    const unsafe fn test_mm512_mask_set1_epi8() {
20087        let src = _mm512_set1_epi8(2);
20088        let a: i8 = 11;
20089        let r = _mm512_mask_set1_epi8(src, 0, a);
20090        assert_eq_m512i(r, src);
20091        let r = _mm512_mask_set1_epi8(
20092            src,
20093            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20094            a,
20095        );
20096        let e = _mm512_set1_epi8(11);
20097        assert_eq_m512i(r, e);
20098    }
20099
20100    #[simd_test(enable = "avx512bw")]
20101    const unsafe fn test_mm512_maskz_set1_epi8() {
20102        let a: i8 = 11;
20103        let r = _mm512_maskz_set1_epi8(0, a);
20104        assert_eq_m512i(r, _mm512_setzero_si512());
20105        let r = _mm512_maskz_set1_epi8(
20106            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20107            a,
20108        );
20109        let e = _mm512_set1_epi8(11);
20110        assert_eq_m512i(r, e);
20111    }
20112
20113    #[simd_test(enable = "avx512bw,avx512vl")]
20114    const unsafe fn test_mm256_mask_set1_epi8() {
20115        let src = _mm256_set1_epi8(2);
20116        let a: i8 = 11;
20117        let r = _mm256_mask_set1_epi8(src, 0, a);
20118        assert_eq_m256i(r, src);
20119        let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20120        let e = _mm256_set1_epi8(11);
20121        assert_eq_m256i(r, e);
20122    }
20123
20124    #[simd_test(enable = "avx512bw,avx512vl")]
20125    const unsafe fn test_mm256_maskz_set1_epi8() {
20126        let a: i8 = 11;
20127        let r = _mm256_maskz_set1_epi8(0, a);
20128        assert_eq_m256i(r, _mm256_setzero_si256());
20129        let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a);
20130        let e = _mm256_set1_epi8(11);
20131        assert_eq_m256i(r, e);
20132    }
20133
20134    #[simd_test(enable = "avx512bw,avx512vl")]
20135    const unsafe fn test_mm_mask_set1_epi8() {
20136        let src = _mm_set1_epi8(2);
20137        let a: i8 = 11;
20138        let r = _mm_mask_set1_epi8(src, 0, a);
20139        assert_eq_m128i(r, src);
20140        let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a);
20141        let e = _mm_set1_epi8(11);
20142        assert_eq_m128i(r, e);
20143    }
20144
20145    #[simd_test(enable = "avx512bw,avx512vl")]
20146    const unsafe fn test_mm_maskz_set1_epi8() {
20147        let a: i8 = 11;
20148        let r = _mm_maskz_set1_epi8(0, a);
20149        assert_eq_m128i(r, _mm_setzero_si128());
20150        let r = _mm_maskz_set1_epi8(0b11111111_11111111, a);
20151        let e = _mm_set1_epi8(11);
20152        assert_eq_m128i(r, e);
20153    }
20154
20155    #[simd_test(enable = "avx512bw")]
20156    const unsafe fn test_mm512_shufflelo_epi16() {
20157        #[rustfmt::skip]
20158        let a = _mm512_set_epi16(
20159            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20160            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20161        );
20162        #[rustfmt::skip]
20163        let e = _mm512_set_epi16(
20164            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
20165            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
20166        );
20167        let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a);
20168        assert_eq_m512i(r, e);
20169    }
20170
20171    #[simd_test(enable = "avx512bw")]
20172    const unsafe fn test_mm512_mask_shufflelo_epi16() {
20173        #[rustfmt::skip]
20174        let a = _mm512_set_epi16(
20175            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20176            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20177        );
20178        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
20179        assert_eq_m512i(r, a);
20180        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(
20181            a,
20182            0b11111111_11111111_11111111_11111111,
20183            a,
20184        );
20185        #[rustfmt::skip]
20186        let e = _mm512_set_epi16(
20187            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
20188            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
20189        );
20190        assert_eq_m512i(r, e);
20191    }
20192
20193    #[simd_test(enable = "avx512bw")]
20194    const unsafe fn test_mm512_maskz_shufflelo_epi16() {
20195        #[rustfmt::skip]
20196        let a = _mm512_set_epi16(
20197            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20198            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20199        );
20200        let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
20201        assert_eq_m512i(r, _mm512_setzero_si512());
20202        let r =
20203            _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
20204        #[rustfmt::skip]
20205        let e = _mm512_set_epi16(
20206            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
20207            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
20208        );
20209        assert_eq_m512i(r, e);
20210    }
20211
20212    #[simd_test(enable = "avx512bw,avx512vl")]
20213    const unsafe fn test_mm256_mask_shufflelo_epi16() {
20214        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20215        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
20216        assert_eq_m256i(r, a);
20217        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
20218        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
20219        assert_eq_m256i(r, e);
20220    }
20221
20222    #[simd_test(enable = "avx512bw,avx512vl")]
20223    const unsafe fn test_mm256_maskz_shufflelo_epi16() {
20224        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20225        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
20226        assert_eq_m256i(r, _mm256_setzero_si256());
20227        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
20228        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
20229        assert_eq_m256i(r, e);
20230    }
20231
20232    #[simd_test(enable = "avx512bw,avx512vl")]
20233    const unsafe fn test_mm_mask_shufflelo_epi16() {
20234        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20235        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
20236        assert_eq_m128i(r, a);
20237        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a);
20238        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
20239        assert_eq_m128i(r, e);
20240    }
20241
20242    #[simd_test(enable = "avx512bw,avx512vl")]
20243    const unsafe fn test_mm_maskz_shufflelo_epi16() {
20244        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20245        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
20246        assert_eq_m128i(r, _mm_setzero_si128());
20247        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a);
20248        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
20249        assert_eq_m128i(r, e);
20250    }
20251
20252    #[simd_test(enable = "avx512bw")]
20253    const unsafe fn test_mm512_shufflehi_epi16() {
20254        #[rustfmt::skip]
20255        let a = _mm512_set_epi16(
20256            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20257            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20258        );
20259        #[rustfmt::skip]
20260        let e = _mm512_set_epi16(
20261            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
20262            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
20263        );
20264        let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a);
20265        assert_eq_m512i(r, e);
20266    }
20267
20268    #[simd_test(enable = "avx512bw")]
20269    const unsafe fn test_mm512_mask_shufflehi_epi16() {
20270        #[rustfmt::skip]
20271        let a = _mm512_set_epi16(
20272            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20273            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20274        );
20275        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
20276        assert_eq_m512i(r, a);
20277        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(
20278            a,
20279            0b11111111_11111111_11111111_11111111,
20280            a,
20281        );
20282        #[rustfmt::skip]
20283        let e = _mm512_set_epi16(
20284            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
20285            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
20286        );
20287        assert_eq_m512i(r, e);
20288    }
20289
20290    #[simd_test(enable = "avx512bw")]
20291    const unsafe fn test_mm512_maskz_shufflehi_epi16() {
20292        #[rustfmt::skip]
20293        let a = _mm512_set_epi16(
20294            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
20295            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20296        );
20297        let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
20298        assert_eq_m512i(r, _mm512_setzero_si512());
20299        let r =
20300            _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
20301        #[rustfmt::skip]
20302        let e = _mm512_set_epi16(
20303            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
20304            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
20305        );
20306        assert_eq_m512i(r, e);
20307    }
20308
20309    #[simd_test(enable = "avx512bw,avx512vl")]
20310    const unsafe fn test_mm256_mask_shufflehi_epi16() {
20311        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20312        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
20313        assert_eq_m256i(r, a);
20314        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
20315        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
20316        assert_eq_m256i(r, e);
20317    }
20318
20319    #[simd_test(enable = "avx512bw,avx512vl")]
20320    const unsafe fn test_mm256_maskz_shufflehi_epi16() {
20321        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20322        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
20323        assert_eq_m256i(r, _mm256_setzero_si256());
20324        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
20325        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
20326        assert_eq_m256i(r, e);
20327    }
20328
20329    #[simd_test(enable = "avx512bw,avx512vl")]
20330    const unsafe fn test_mm_mask_shufflehi_epi16() {
20331        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20332        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
20333        assert_eq_m128i(r, a);
20334        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a);
20335        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
20336        assert_eq_m128i(r, e);
20337    }
20338
20339    #[simd_test(enable = "avx512bw,avx512vl")]
20340    const unsafe fn test_mm_maskz_shufflehi_epi16() {
20341        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
20342        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
20343        assert_eq_m128i(r, _mm_setzero_si128());
20344        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a);
20345        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
20346        assert_eq_m128i(r, e);
20347    }
20348
20349    #[simd_test(enable = "avx512bw")]
20350    unsafe fn test_mm512_shuffle_epi8() {
20351        #[rustfmt::skip]
20352        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20353                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20354                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20355                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
20356        let b = _mm512_set1_epi8(1);
20357        let r = _mm512_shuffle_epi8(a, b);
20358        #[rustfmt::skip]
20359        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20360                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
20361                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
20362                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
20363        assert_eq_m512i(r, e);
20364    }
20365
20366    #[simd_test(enable = "avx512bw")]
20367    unsafe fn test_mm512_mask_shuffle_epi8() {
20368        #[rustfmt::skip]
20369        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20370                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20371                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20372                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
20373        let b = _mm512_set1_epi8(1);
20374        let r = _mm512_mask_shuffle_epi8(a, 0, a, b);
20375        assert_eq_m512i(r, a);
20376        let r = _mm512_mask_shuffle_epi8(
20377            a,
20378            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20379            a,
20380            b,
20381        );
20382        #[rustfmt::skip]
20383        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20384                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
20385                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
20386                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
20387        assert_eq_m512i(r, e);
20388    }
20389
20390    #[simd_test(enable = "avx512bw")]
20391    unsafe fn test_mm512_maskz_shuffle_epi8() {
20392        #[rustfmt::skip]
20393        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20394                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
20395                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
20396                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
20397        let b = _mm512_set1_epi8(1);
20398        let r = _mm512_maskz_shuffle_epi8(0, a, b);
20399        assert_eq_m512i(r, _mm512_setzero_si512());
20400        let r = _mm512_maskz_shuffle_epi8(
20401            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20402            a,
20403            b,
20404        );
20405        #[rustfmt::skip]
20406        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20407                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
20408                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
20409                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
20410        assert_eq_m512i(r, e);
20411    }
20412
20413    #[simd_test(enable = "avx512bw,avx512vl")]
20414    unsafe fn test_mm256_mask_shuffle_epi8() {
20415        #[rustfmt::skip]
20416        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20417                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
20418        let b = _mm256_set1_epi8(1);
20419        let r = _mm256_mask_shuffle_epi8(a, 0, a, b);
20420        assert_eq_m256i(r, a);
20421        let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
20422        #[rustfmt::skip]
20423        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20424                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
20425        assert_eq_m256i(r, e);
20426    }
20427
20428    #[simd_test(enable = "avx512bw,avx512vl")]
20429    unsafe fn test_mm256_maskz_shuffle_epi8() {
20430        #[rustfmt::skip]
20431        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
20432                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
20433        let b = _mm256_set1_epi8(1);
20434        let r = _mm256_maskz_shuffle_epi8(0, a, b);
20435        assert_eq_m256i(r, _mm256_setzero_si256());
20436        let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b);
20437        #[rustfmt::skip]
20438        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20439                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
20440        assert_eq_m256i(r, e);
20441    }
20442
20443    #[simd_test(enable = "avx512bw,avx512vl")]
20444    unsafe fn test_mm_mask_shuffle_epi8() {
20445        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
20446        let b = _mm_set1_epi8(1);
20447        let r = _mm_mask_shuffle_epi8(a, 0, a, b);
20448        assert_eq_m128i(r, a);
20449        let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b);
20450        let e = _mm_set_epi8(
20451            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20452        );
20453        assert_eq_m128i(r, e);
20454    }
20455
20456    #[simd_test(enable = "avx512bw,avx512vl")]
20457    unsafe fn test_mm_maskz_shuffle_epi8() {
20458        #[rustfmt::skip]
20459        let a = _mm_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15);
20460        let b = _mm_set1_epi8(1);
20461        let r = _mm_maskz_shuffle_epi8(0, a, b);
20462        assert_eq_m128i(r, _mm_setzero_si128());
20463        let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b);
20464        let e = _mm_set_epi8(
20465            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
20466        );
20467        assert_eq_m128i(r, e);
20468    }
20469
20470    #[simd_test(enable = "avx512bw")]
20471    const unsafe fn test_mm512_test_epi16_mask() {
20472        let a = _mm512_set1_epi16(1 << 0);
20473        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20474        let r = _mm512_test_epi16_mask(a, b);
20475        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20476        assert_eq!(r, e);
20477    }
20478
20479    #[simd_test(enable = "avx512bw")]
20480    const unsafe fn test_mm512_mask_test_epi16_mask() {
20481        let a = _mm512_set1_epi16(1 << 0);
20482        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20483        let r = _mm512_mask_test_epi16_mask(0, a, b);
20484        assert_eq!(r, 0);
20485        let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
20486        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20487        assert_eq!(r, e);
20488    }
20489
20490    #[simd_test(enable = "avx512bw,avx512vl")]
20491    const unsafe fn test_mm256_test_epi16_mask() {
20492        let a = _mm256_set1_epi16(1 << 0);
20493        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20494        let r = _mm256_test_epi16_mask(a, b);
20495        let e: __mmask16 = 0b11111111_11111111;
20496        assert_eq!(r, e);
20497    }
20498
20499    #[simd_test(enable = "avx512bw,avx512vl")]
20500    const unsafe fn test_mm256_mask_test_epi16_mask() {
20501        let a = _mm256_set1_epi16(1 << 0);
20502        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20503        let r = _mm256_mask_test_epi16_mask(0, a, b);
20504        assert_eq!(r, 0);
20505        let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b);
20506        let e: __mmask16 = 0b11111111_11111111;
20507        assert_eq!(r, e);
20508    }
20509
20510    #[simd_test(enable = "avx512bw,avx512vl")]
20511    const unsafe fn test_mm_test_epi16_mask() {
20512        let a = _mm_set1_epi16(1 << 0);
20513        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20514        let r = _mm_test_epi16_mask(a, b);
20515        let e: __mmask8 = 0b11111111;
20516        assert_eq!(r, e);
20517    }
20518
20519    #[simd_test(enable = "avx512bw,avx512vl")]
20520    const unsafe fn test_mm_mask_test_epi16_mask() {
20521        let a = _mm_set1_epi16(1 << 0);
20522        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20523        let r = _mm_mask_test_epi16_mask(0, a, b);
20524        assert_eq!(r, 0);
20525        let r = _mm_mask_test_epi16_mask(0b11111111, a, b);
20526        let e: __mmask8 = 0b11111111;
20527        assert_eq!(r, e);
20528    }
20529
20530    #[simd_test(enable = "avx512bw")]
20531    const unsafe fn test_mm512_test_epi8_mask() {
20532        let a = _mm512_set1_epi8(1 << 0);
20533        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20534        let r = _mm512_test_epi8_mask(a, b);
20535        let e: __mmask64 =
20536            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20537        assert_eq!(r, e);
20538    }
20539
20540    #[simd_test(enable = "avx512bw")]
20541    const unsafe fn test_mm512_mask_test_epi8_mask() {
20542        let a = _mm512_set1_epi8(1 << 0);
20543        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20544        let r = _mm512_mask_test_epi8_mask(0, a, b);
20545        assert_eq!(r, 0);
20546        let r = _mm512_mask_test_epi8_mask(
20547            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20548            a,
20549            b,
20550        );
20551        let e: __mmask64 =
20552            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20553        assert_eq!(r, e);
20554    }
20555
20556    #[simd_test(enable = "avx512bw,avx512vl")]
20557    const unsafe fn test_mm256_test_epi8_mask() {
20558        let a = _mm256_set1_epi8(1 << 0);
20559        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20560        let r = _mm256_test_epi8_mask(a, b);
20561        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20562        assert_eq!(r, e);
20563    }
20564
20565    #[simd_test(enable = "avx512bw,avx512vl")]
20566    const unsafe fn test_mm256_mask_test_epi8_mask() {
20567        let a = _mm256_set1_epi8(1 << 0);
20568        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20569        let r = _mm256_mask_test_epi8_mask(0, a, b);
20570        assert_eq!(r, 0);
20571        let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
20572        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20573        assert_eq!(r, e);
20574    }
20575
20576    #[simd_test(enable = "avx512bw,avx512vl")]
20577    const unsafe fn test_mm_test_epi8_mask() {
20578        let a = _mm_set1_epi8(1 << 0);
20579        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20580        let r = _mm_test_epi8_mask(a, b);
20581        let e: __mmask16 = 0b11111111_11111111;
20582        assert_eq!(r, e);
20583    }
20584
20585    #[simd_test(enable = "avx512bw,avx512vl")]
20586    const unsafe fn test_mm_mask_test_epi8_mask() {
20587        let a = _mm_set1_epi8(1 << 0);
20588        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20589        let r = _mm_mask_test_epi8_mask(0, a, b);
20590        assert_eq!(r, 0);
20591        let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b);
20592        let e: __mmask16 = 0b11111111_11111111;
20593        assert_eq!(r, e);
20594    }
20595
20596    #[simd_test(enable = "avx512bw")]
20597    const unsafe fn test_mm512_testn_epi16_mask() {
20598        let a = _mm512_set1_epi16(1 << 0);
20599        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20600        let r = _mm512_testn_epi16_mask(a, b);
20601        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20602        assert_eq!(r, e);
20603    }
20604
20605    #[simd_test(enable = "avx512bw")]
20606    const unsafe fn test_mm512_mask_testn_epi16_mask() {
20607        let a = _mm512_set1_epi16(1 << 0);
20608        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
20609        let r = _mm512_mask_testn_epi16_mask(0, a, b);
20610        assert_eq!(r, 0);
20611        let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
20612        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20613        assert_eq!(r, e);
20614    }
20615
20616    #[simd_test(enable = "avx512bw,avx512vl")]
20617    const unsafe fn test_mm256_testn_epi16_mask() {
20618        let a = _mm256_set1_epi16(1 << 0);
20619        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20620        let r = _mm256_testn_epi16_mask(a, b);
20621        let e: __mmask16 = 0b00000000_00000000;
20622        assert_eq!(r, e);
20623    }
20624
20625    #[simd_test(enable = "avx512bw,avx512vl")]
20626    const unsafe fn test_mm256_mask_testn_epi16_mask() {
20627        let a = _mm256_set1_epi16(1 << 0);
20628        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
20629        let r = _mm256_mask_testn_epi16_mask(0, a, b);
20630        assert_eq!(r, 0);
20631        let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b);
20632        let e: __mmask16 = 0b00000000_00000000;
20633        assert_eq!(r, e);
20634    }
20635
20636    #[simd_test(enable = "avx512bw,avx512vl")]
20637    const unsafe fn test_mm_testn_epi16_mask() {
20638        let a = _mm_set1_epi16(1 << 0);
20639        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20640        let r = _mm_testn_epi16_mask(a, b);
20641        let e: __mmask8 = 0b00000000;
20642        assert_eq!(r, e);
20643    }
20644
20645    #[simd_test(enable = "avx512bw,avx512vl")]
20646    const unsafe fn test_mm_mask_testn_epi16_mask() {
20647        let a = _mm_set1_epi16(1 << 0);
20648        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
20649        let r = _mm_mask_testn_epi16_mask(0, a, b);
20650        assert_eq!(r, 0);
20651        let r = _mm_mask_testn_epi16_mask(0b11111111, a, b);
20652        let e: __mmask8 = 0b00000000;
20653        assert_eq!(r, e);
20654    }
20655
20656    #[simd_test(enable = "avx512bw")]
20657    const unsafe fn test_mm512_testn_epi8_mask() {
20658        let a = _mm512_set1_epi8(1 << 0);
20659        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20660        let r = _mm512_testn_epi8_mask(a, b);
20661        let e: __mmask64 =
20662            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20663        assert_eq!(r, e);
20664    }
20665
20666    #[simd_test(enable = "avx512bw")]
20667    const unsafe fn test_mm512_mask_testn_epi8_mask() {
20668        let a = _mm512_set1_epi8(1 << 0);
20669        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
20670        let r = _mm512_mask_testn_epi8_mask(0, a, b);
20671        assert_eq!(r, 0);
20672        let r = _mm512_mask_testn_epi8_mask(
20673            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20674            a,
20675            b,
20676        );
20677        let e: __mmask64 =
20678            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
20679        assert_eq!(r, e);
20680    }
20681
20682    #[simd_test(enable = "avx512bw,avx512vl")]
20683    const unsafe fn test_mm256_testn_epi8_mask() {
20684        let a = _mm256_set1_epi8(1 << 0);
20685        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20686        let r = _mm256_testn_epi8_mask(a, b);
20687        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20688        assert_eq!(r, e);
20689    }
20690
20691    #[simd_test(enable = "avx512bw,avx512vl")]
20692    const unsafe fn test_mm256_mask_testn_epi8_mask() {
20693        let a = _mm256_set1_epi8(1 << 0);
20694        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
20695        let r = _mm256_mask_testn_epi8_mask(0, a, b);
20696        assert_eq!(r, 0);
20697        let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
20698        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
20699        assert_eq!(r, e);
20700    }
20701
20702    #[simd_test(enable = "avx512bw,avx512vl")]
20703    const unsafe fn test_mm_testn_epi8_mask() {
20704        let a = _mm_set1_epi8(1 << 0);
20705        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20706        let r = _mm_testn_epi8_mask(a, b);
20707        let e: __mmask16 = 0b00000000_00000000;
20708        assert_eq!(r, e);
20709    }
20710
20711    #[simd_test(enable = "avx512bw,avx512vl")]
20712    const unsafe fn test_mm_mask_testn_epi8_mask() {
20713        let a = _mm_set1_epi8(1 << 0);
20714        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
20715        let r = _mm_mask_testn_epi8_mask(0, a, b);
20716        assert_eq!(r, 0);
20717        let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b);
20718        let e: __mmask16 = 0b00000000_00000000;
20719        assert_eq!(r, e);
20720    }
20721
20722    #[simd_test(enable = "avx512bw")]
20723    const unsafe fn test_store_mask64() {
20724        let a: __mmask64 =
20725            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20726        let mut r = 0;
20727        _store_mask64(&mut r, a);
20728        assert_eq!(r, a);
20729    }
20730
20731    #[simd_test(enable = "avx512bw")]
20732    const unsafe fn test_store_mask32() {
20733        let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
20734        let mut r = 0;
20735        _store_mask32(&mut r, a);
20736        assert_eq!(r, a);
20737    }
20738
20739    #[simd_test(enable = "avx512bw")]
20740    const unsafe fn test_load_mask64() {
20741        let p: __mmask64 =
20742            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20743        let r = _load_mask64(&p);
20744        let e: __mmask64 =
20745            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
20746        assert_eq!(r, e);
20747    }
20748
20749    #[simd_test(enable = "avx512bw")]
20750    const unsafe fn test_load_mask32() {
20751        let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
20752        let r = _load_mask32(&p);
20753        let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
20754        assert_eq!(r, e);
20755    }
20756
20757    #[simd_test(enable = "avx512bw")]
20758    unsafe fn test_mm512_sad_epu8() {
20759        let a = _mm512_set1_epi8(2);
20760        let b = _mm512_set1_epi8(4);
20761        let r = _mm512_sad_epu8(a, b);
20762        let e = _mm512_set1_epi64(16);
20763        assert_eq_m512i(r, e);
20764    }
20765
20766    #[simd_test(enable = "avx512bw")]
20767    unsafe fn test_mm512_dbsad_epu8() {
20768        let a = _mm512_set1_epi8(2);
20769        let b = _mm512_set1_epi8(4);
20770        let r = _mm512_dbsad_epu8::<0>(a, b);
20771        let e = _mm512_set1_epi16(8);
20772        assert_eq_m512i(r, e);
20773    }
20774
20775    #[simd_test(enable = "avx512bw")]
20776    unsafe fn test_mm512_mask_dbsad_epu8() {
20777        let src = _mm512_set1_epi16(1);
20778        let a = _mm512_set1_epi8(2);
20779        let b = _mm512_set1_epi8(4);
20780        let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b);
20781        assert_eq_m512i(r, src);
20782        let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b);
20783        let e = _mm512_set1_epi16(8);
20784        assert_eq_m512i(r, e);
20785    }
20786
20787    #[simd_test(enable = "avx512bw")]
20788    unsafe fn test_mm512_maskz_dbsad_epu8() {
20789        let a = _mm512_set1_epi8(2);
20790        let b = _mm512_set1_epi8(4);
20791        let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b);
20792        assert_eq_m512i(r, _mm512_setzero_si512());
20793        let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b);
20794        let e = _mm512_set1_epi16(8);
20795        assert_eq_m512i(r, e);
20796    }
20797
20798    #[simd_test(enable = "avx512bw,avx512vl")]
20799    unsafe fn test_mm256_dbsad_epu8() {
20800        let a = _mm256_set1_epi8(2);
20801        let b = _mm256_set1_epi8(4);
20802        let r = _mm256_dbsad_epu8::<0>(a, b);
20803        let e = _mm256_set1_epi16(8);
20804        assert_eq_m256i(r, e);
20805    }
20806
20807    #[simd_test(enable = "avx512bw,avx512vl")]
20808    unsafe fn test_mm256_mask_dbsad_epu8() {
20809        let src = _mm256_set1_epi16(1);
20810        let a = _mm256_set1_epi8(2);
20811        let b = _mm256_set1_epi8(4);
20812        let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b);
20813        assert_eq_m256i(r, src);
20814        let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b);
20815        let e = _mm256_set1_epi16(8);
20816        assert_eq_m256i(r, e);
20817    }
20818
20819    #[simd_test(enable = "avx512bw,avx512vl")]
20820    unsafe fn test_mm256_maskz_dbsad_epu8() {
20821        let a = _mm256_set1_epi8(2);
20822        let b = _mm256_set1_epi8(4);
20823        let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b);
20824        assert_eq_m256i(r, _mm256_setzero_si256());
20825        let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b);
20826        let e = _mm256_set1_epi16(8);
20827        assert_eq_m256i(r, e);
20828    }
20829
20830    #[simd_test(enable = "avx512bw,avx512vl")]
20831    unsafe fn test_mm_dbsad_epu8() {
20832        let a = _mm_set1_epi8(2);
20833        let b = _mm_set1_epi8(4);
20834        let r = _mm_dbsad_epu8::<0>(a, b);
20835        let e = _mm_set1_epi16(8);
20836        assert_eq_m128i(r, e);
20837    }
20838
20839    #[simd_test(enable = "avx512bw,avx512vl")]
20840    unsafe fn test_mm_mask_dbsad_epu8() {
20841        let src = _mm_set1_epi16(1);
20842        let a = _mm_set1_epi8(2);
20843        let b = _mm_set1_epi8(4);
20844        let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b);
20845        assert_eq_m128i(r, src);
20846        let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b);
20847        let e = _mm_set1_epi16(8);
20848        assert_eq_m128i(r, e);
20849    }
20850
20851    #[simd_test(enable = "avx512bw,avx512vl")]
20852    unsafe fn test_mm_maskz_dbsad_epu8() {
20853        let a = _mm_set1_epi8(2);
20854        let b = _mm_set1_epi8(4);
20855        let r = _mm_maskz_dbsad_epu8::<0>(0, a, b);
20856        assert_eq_m128i(r, _mm_setzero_si128());
20857        let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b);
20858        let e = _mm_set1_epi16(8);
20859        assert_eq_m128i(r, e);
20860    }
20861
20862    #[simd_test(enable = "avx512bw")]
20863    const unsafe fn test_mm512_movepi16_mask() {
20864        let a = _mm512_set1_epi16(1 << 15);
20865        let r = _mm512_movepi16_mask(a);
20866        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20867        assert_eq!(r, e);
20868    }
20869
20870    #[simd_test(enable = "avx512bw,avx512vl")]
20871    const unsafe fn test_mm256_movepi16_mask() {
20872        let a = _mm256_set1_epi16(1 << 15);
20873        let r = _mm256_movepi16_mask(a);
20874        let e: __mmask16 = 0b11111111_11111111;
20875        assert_eq!(r, e);
20876    }
20877
20878    #[simd_test(enable = "avx512bw,avx512vl")]
20879    const unsafe fn test_mm_movepi16_mask() {
20880        let a = _mm_set1_epi16(1 << 15);
20881        let r = _mm_movepi16_mask(a);
20882        let e: __mmask8 = 0b11111111;
20883        assert_eq!(r, e);
20884    }
20885
20886    #[simd_test(enable = "avx512bw")]
20887    const unsafe fn test_mm512_movepi8_mask() {
20888        let a = _mm512_set1_epi8(1 << 7);
20889        let r = _mm512_movepi8_mask(a);
20890        let e: __mmask64 =
20891            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20892        assert_eq!(r, e);
20893    }
20894
20895    #[simd_test(enable = "avx512bw,avx512vl")]
20896    const unsafe fn test_mm256_movepi8_mask() {
20897        let a = _mm256_set1_epi8(1 << 7);
20898        let r = _mm256_movepi8_mask(a);
20899        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
20900        assert_eq!(r, e);
20901    }
20902
20903    #[simd_test(enable = "avx512bw,avx512vl")]
20904    const unsafe fn test_mm_movepi8_mask() {
20905        let a = _mm_set1_epi8(1 << 7);
20906        let r = _mm_movepi8_mask(a);
20907        let e: __mmask16 = 0b11111111_11111111;
20908        assert_eq!(r, e);
20909    }
20910
20911    #[simd_test(enable = "avx512bw")]
20912    const unsafe fn test_mm512_movm_epi16() {
20913        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20914        let r = _mm512_movm_epi16(a);
20915        let e = _mm512_set1_epi16(
20916            1 << 15
20917                | 1 << 14
20918                | 1 << 13
20919                | 1 << 12
20920                | 1 << 11
20921                | 1 << 10
20922                | 1 << 9
20923                | 1 << 8
20924                | 1 << 7
20925                | 1 << 6
20926                | 1 << 5
20927                | 1 << 4
20928                | 1 << 3
20929                | 1 << 2
20930                | 1 << 1
20931                | 1 << 0,
20932        );
20933        assert_eq_m512i(r, e);
20934    }
20935
20936    #[simd_test(enable = "avx512bw,avx512vl")]
20937    const unsafe fn test_mm256_movm_epi16() {
20938        let a: __mmask16 = 0b11111111_11111111;
20939        let r = _mm256_movm_epi16(a);
20940        let e = _mm256_set1_epi16(
20941            1 << 15
20942                | 1 << 14
20943                | 1 << 13
20944                | 1 << 12
20945                | 1 << 11
20946                | 1 << 10
20947                | 1 << 9
20948                | 1 << 8
20949                | 1 << 7
20950                | 1 << 6
20951                | 1 << 5
20952                | 1 << 4
20953                | 1 << 3
20954                | 1 << 2
20955                | 1 << 1
20956                | 1 << 0,
20957        );
20958        assert_eq_m256i(r, e);
20959    }
20960
20961    #[simd_test(enable = "avx512bw,avx512vl")]
20962    const unsafe fn test_mm_movm_epi16() {
20963        let a: __mmask8 = 0b11111111;
20964        let r = _mm_movm_epi16(a);
20965        let e = _mm_set1_epi16(
20966            1 << 15
20967                | 1 << 14
20968                | 1 << 13
20969                | 1 << 12
20970                | 1 << 11
20971                | 1 << 10
20972                | 1 << 9
20973                | 1 << 8
20974                | 1 << 7
20975                | 1 << 6
20976                | 1 << 5
20977                | 1 << 4
20978                | 1 << 3
20979                | 1 << 2
20980                | 1 << 1
20981                | 1 << 0,
20982        );
20983        assert_eq_m128i(r, e);
20984    }
20985
20986    #[simd_test(enable = "avx512bw")]
20987    const unsafe fn test_mm512_movm_epi8() {
20988        let a: __mmask64 =
20989            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
20990        let r = _mm512_movm_epi8(a);
20991        let e =
20992            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
20993        assert_eq_m512i(r, e);
20994    }
20995
20996    #[simd_test(enable = "avx512bw,avx512vl")]
20997    const unsafe fn test_mm256_movm_epi8() {
20998        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
20999        let r = _mm256_movm_epi8(a);
21000        let e =
21001            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
21002        assert_eq_m256i(r, e);
21003    }
21004
21005    #[simd_test(enable = "avx512bw,avx512vl")]
21006    const unsafe fn test_mm_movm_epi8() {
21007        let a: __mmask16 = 0b11111111_11111111;
21008        let r = _mm_movm_epi8(a);
21009        let e =
21010            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
21011        assert_eq_m128i(r, e);
21012    }
21013
21014    #[simd_test(enable = "avx512bw")]
21015    const unsafe fn test_cvtmask32_u32() {
21016        let a: __mmask32 = 0b11001100_00110011_01100110_10011001;
21017        let r = _cvtmask32_u32(a);
21018        let e: u32 = 0b11001100_00110011_01100110_10011001;
21019        assert_eq!(r, e);
21020    }
21021
21022    #[simd_test(enable = "avx512bw")]
21023    const unsafe fn test_cvtu32_mask32() {
21024        let a: u32 = 0b11001100_00110011_01100110_10011001;
21025        let r = _cvtu32_mask32(a);
21026        let e: __mmask32 = 0b11001100_00110011_01100110_10011001;
21027        assert_eq!(r, e);
21028    }
21029
21030    #[simd_test(enable = "avx512bw")]
21031    const unsafe fn test_kadd_mask32() {
21032        let a: __mmask32 = 11;
21033        let b: __mmask32 = 22;
21034        let r = _kadd_mask32(a, b);
21035        let e: __mmask32 = 33;
21036        assert_eq!(r, e);
21037    }
21038
21039    #[simd_test(enable = "avx512bw")]
21040    const unsafe fn test_kadd_mask64() {
21041        let a: __mmask64 = 11;
21042        let b: __mmask64 = 22;
21043        let r = _kadd_mask64(a, b);
21044        let e: __mmask64 = 33;
21045        assert_eq!(r, e);
21046    }
21047
21048    #[simd_test(enable = "avx512bw")]
21049    const unsafe fn test_kand_mask32() {
21050        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
21051        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21052        let r = _kand_mask32(a, b);
21053        let e: __mmask32 = 0b11001100_00110011_11001100_00110011;
21054        assert_eq!(r, e);
21055    }
21056
21057    #[simd_test(enable = "avx512bw")]
21058    const unsafe fn test_kand_mask64() {
21059        let a: __mmask64 =
21060            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21061        let b: __mmask64 =
21062            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21063        let r = _kand_mask64(a, b);
21064        let e: __mmask64 =
21065            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21066        assert_eq!(r, e);
21067    }
21068
21069    #[simd_test(enable = "avx512bw")]
21070    const unsafe fn test_knot_mask32() {
21071        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
21072        let r = _knot_mask32(a);
21073        let e: __mmask32 = 0b00110011_11001100_00110011_11001100;
21074        assert_eq!(r, e);
21075    }
21076
21077    #[simd_test(enable = "avx512bw")]
21078    const unsafe fn test_knot_mask64() {
21079        let a: __mmask64 =
21080            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21081        let r = _knot_mask64(a);
21082        let e: __mmask64 =
21083            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21084        assert_eq!(r, e);
21085    }
21086
21087    #[simd_test(enable = "avx512bw")]
21088    const unsafe fn test_kandn_mask32() {
21089        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
21090        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21091        let r = _kandn_mask32(a, b);
21092        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
21093        assert_eq!(r, e);
21094    }
21095
21096    #[simd_test(enable = "avx512bw")]
21097    const unsafe fn test_kandn_mask64() {
21098        let a: __mmask64 =
21099            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21100        let b: __mmask64 =
21101            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21102        let r = _kandn_mask64(a, b);
21103        let e: __mmask64 =
21104            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
21105        assert_eq!(r, e);
21106    }
21107
21108    #[simd_test(enable = "avx512bw")]
21109    const unsafe fn test_kor_mask32() {
21110        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
21111        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21112        let r = _kor_mask32(a, b);
21113        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
21114        assert_eq!(r, e);
21115    }
21116
21117    #[simd_test(enable = "avx512bw")]
21118    const unsafe fn test_kor_mask64() {
21119        let a: __mmask64 =
21120            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21121        let b: __mmask64 =
21122            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21123        let r = _kor_mask64(a, b);
21124        let e: __mmask64 =
21125            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
21126        assert_eq!(r, e);
21127    }
21128
21129    #[simd_test(enable = "avx512bw")]
21130    const unsafe fn test_kxor_mask32() {
21131        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
21132        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21133        let r = _kxor_mask32(a, b);
21134        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
21135        assert_eq!(r, e);
21136    }
21137
21138    #[simd_test(enable = "avx512bw")]
21139    const unsafe fn test_kxor_mask64() {
21140        let a: __mmask64 =
21141            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21142        let b: __mmask64 =
21143            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21144        let r = _kxor_mask64(a, b);
21145        let e: __mmask64 =
21146            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
21147        assert_eq!(r, e);
21148    }
21149
21150    #[simd_test(enable = "avx512bw")]
21151    const unsafe fn test_kxnor_mask32() {
21152        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
21153        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
21154        let r = _kxnor_mask32(a, b);
21155        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
21156        assert_eq!(r, e);
21157    }
21158
21159    #[simd_test(enable = "avx512bw")]
21160    const unsafe fn test_kxnor_mask64() {
21161        let a: __mmask64 =
21162            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
21163        let b: __mmask64 =
21164            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
21165        let r = _kxnor_mask64(a, b);
21166        let e: __mmask64 =
21167            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
21168        assert_eq!(r, e);
21169    }
21170
21171    #[simd_test(enable = "avx512bw")]
21172    const unsafe fn test_kortest_mask32_u8() {
21173        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21174        let b: __mmask32 = 0b1011011010110110_1011011010110110;
21175        let mut all_ones: u8 = 0;
21176        let r = _kortest_mask32_u8(a, b, &mut all_ones);
21177        assert_eq!(r, 0);
21178        assert_eq!(all_ones, 1);
21179    }
21180
21181    #[simd_test(enable = "avx512bw")]
21182    const unsafe fn test_kortest_mask64_u8() {
21183        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21184        let b: __mmask64 = 0b1011011010110110_1011011010110110;
21185        let mut all_ones: u8 = 0;
21186        let r = _kortest_mask64_u8(a, b, &mut all_ones);
21187        assert_eq!(r, 0);
21188        assert_eq!(all_ones, 0);
21189    }
21190
21191    #[simd_test(enable = "avx512bw")]
21192    const unsafe fn test_kortestc_mask32_u8() {
21193        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21194        let b: __mmask32 = 0b1011011010110110_1011011010110110;
21195        let r = _kortestc_mask32_u8(a, b);
21196        assert_eq!(r, 1);
21197    }
21198
21199    #[simd_test(enable = "avx512bw")]
21200    const unsafe fn test_kortestc_mask64_u8() {
21201        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21202        let b: __mmask64 = 0b1011011010110110_1011011010110110;
21203        let r = _kortestc_mask64_u8(a, b);
21204        assert_eq!(r, 0);
21205    }
21206
21207    #[simd_test(enable = "avx512bw")]
21208    const unsafe fn test_kortestz_mask32_u8() {
21209        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21210        let b: __mmask32 = 0b1011011010110110_1011011010110110;
21211        let r = _kortestz_mask32_u8(a, b);
21212        assert_eq!(r, 0);
21213    }
21214
21215    #[simd_test(enable = "avx512bw")]
21216    const unsafe fn test_kortestz_mask64_u8() {
21217        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21218        let b: __mmask64 = 0b1011011010110110_1011011010110110;
21219        let r = _kortestz_mask64_u8(a, b);
21220        assert_eq!(r, 0);
21221    }
21222
21223    #[simd_test(enable = "avx512bw")]
21224    const unsafe fn test_kshiftli_mask32() {
21225        let a: __mmask32 = 0b0110100101101001_0110100101101001;
21226        let r = _kshiftli_mask32::<3>(a);
21227        let e: __mmask32 = 0b0100101101001011_0100101101001000;
21228        assert_eq!(r, e);
21229
21230        let r = _kshiftli_mask32::<31>(a);
21231        let e: __mmask32 = 0b1000000000000000_0000000000000000;
21232        assert_eq!(r, e);
21233
21234        let r = _kshiftli_mask32::<32>(a);
21235        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21236        assert_eq!(r, e);
21237
21238        let r = _kshiftli_mask32::<33>(a);
21239        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21240        assert_eq!(r, e);
21241    }
21242
21243    #[simd_test(enable = "avx512bw")]
21244    const unsafe fn test_kshiftli_mask64() {
21245        let a: __mmask64 = 0b0110100101101001_0110100101101001;
21246        let r = _kshiftli_mask64::<3>(a);
21247        let e: __mmask64 = 0b0110100101101001011_0100101101001000;
21248        assert_eq!(r, e);
21249
21250        let r = _kshiftli_mask64::<63>(a);
21251        let e: __mmask64 = 0b1000000000000000_0000000000000000_0000000000000000_0000000000000000;
21252        assert_eq!(r, e);
21253
21254        let r = _kshiftli_mask64::<64>(a);
21255        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21256        assert_eq!(r, e);
21257
21258        let r = _kshiftli_mask64::<65>(a);
21259        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21260        assert_eq!(r, e);
21261    }
21262
21263    #[simd_test(enable = "avx512bw")]
21264    const unsafe fn test_kshiftri_mask32() {
21265        let a: __mmask32 = 0b1010100101101001_0110100101101001;
21266        let r = _kshiftri_mask32::<3>(a);
21267        let e: __mmask32 = 0b0001010100101101_0010110100101101;
21268        assert_eq!(r, e);
21269
21270        let r = _kshiftri_mask32::<31>(a);
21271        let e: __mmask32 = 0b0000000000000000_0000000000000001;
21272        assert_eq!(r, e);
21273
21274        let r = _kshiftri_mask32::<32>(a);
21275        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21276        assert_eq!(r, e);
21277
21278        let r = _kshiftri_mask32::<33>(a);
21279        let e: __mmask32 = 0b0000000000000000_0000000000000000;
21280        assert_eq!(r, e);
21281    }
21282
21283    #[simd_test(enable = "avx512bw")]
21284    const unsafe fn test_kshiftri_mask64() {
21285        let a: __mmask64 = 0b1010100101101001011_0100101101001000;
21286        let r = _kshiftri_mask64::<3>(a);
21287        let e: __mmask64 = 0b1010100101101001_0110100101101001;
21288        assert_eq!(r, e);
21289
21290        let r = _kshiftri_mask64::<34>(a);
21291        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000001;
21292        assert_eq!(r, e);
21293
21294        let r = _kshiftri_mask64::<35>(a);
21295        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21296        assert_eq!(r, e);
21297
21298        let r = _kshiftri_mask64::<64>(a);
21299        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21300        assert_eq!(r, e);
21301
21302        let r = _kshiftri_mask64::<65>(a);
21303        let e: __mmask64 = 0b0000000000000000_0000000000000000_0000000000000000_0000000000000000;
21304        assert_eq!(r, e);
21305    }
21306
21307    #[simd_test(enable = "avx512bw")]
21308    const unsafe fn test_ktest_mask32_u8() {
21309        let a: __mmask32 = 0b0110100100111100_0110100100111100;
21310        let b: __mmask32 = 0b1001011011000011_1001011011000011;
21311        let mut and_not: u8 = 0;
21312        let r = _ktest_mask32_u8(a, b, &mut and_not);
21313        assert_eq!(r, 1);
21314        assert_eq!(and_not, 0);
21315    }
21316
21317    #[simd_test(enable = "avx512bw")]
21318    const unsafe fn test_ktestc_mask32_u8() {
21319        let a: __mmask32 = 0b0110100100111100_0110100100111100;
21320        let b: __mmask32 = 0b1001011011000011_1001011011000011;
21321        let r = _ktestc_mask32_u8(a, b);
21322        assert_eq!(r, 0);
21323    }
21324
21325    #[simd_test(enable = "avx512bw")]
21326    const unsafe fn test_ktestz_mask32_u8() {
21327        let a: __mmask32 = 0b0110100100111100_0110100100111100;
21328        let b: __mmask32 = 0b1001011011000011_1001011011000011;
21329        let r = _ktestz_mask32_u8(a, b);
21330        assert_eq!(r, 1);
21331    }
21332
21333    #[simd_test(enable = "avx512bw")]
21334    const unsafe fn test_ktest_mask64_u8() {
21335        let a: __mmask64 = 0b0110100100111100_0110100100111100;
21336        let b: __mmask64 = 0b1001011011000011_1001011011000011;
21337        let mut and_not: u8 = 0;
21338        let r = _ktest_mask64_u8(a, b, &mut and_not);
21339        assert_eq!(r, 1);
21340        assert_eq!(and_not, 0);
21341    }
21342
21343    #[simd_test(enable = "avx512bw")]
21344    const unsafe fn test_ktestc_mask64_u8() {
21345        let a: __mmask64 = 0b0110100100111100_0110100100111100;
21346        let b: __mmask64 = 0b1001011011000011_1001011011000011;
21347        let r = _ktestc_mask64_u8(a, b);
21348        assert_eq!(r, 0);
21349    }
21350
21351    #[simd_test(enable = "avx512bw")]
21352    const unsafe fn test_ktestz_mask64_u8() {
21353        let a: __mmask64 = 0b0110100100111100_0110100100111100;
21354        let b: __mmask64 = 0b1001011011000011_1001011011000011;
21355        let r = _ktestz_mask64_u8(a, b);
21356        assert_eq!(r, 1);
21357    }
21358
21359    #[simd_test(enable = "avx512bw")]
21360    const unsafe fn test_mm512_kunpackw() {
21361        let a: u32 = 0x00110011;
21362        let b: u32 = 0x00001011;
21363        let r = _mm512_kunpackw(a, b);
21364        let e: u32 = 0x00111011;
21365        assert_eq!(r, e);
21366    }
21367
21368    #[simd_test(enable = "avx512bw")]
21369    const unsafe fn test_mm512_kunpackd() {
21370        let a: u64 = 0x11001100_00110011;
21371        let b: u64 = 0x00101110_00001011;
21372        let r = _mm512_kunpackd(a, b);
21373        let e: u64 = 0x00110011_00001011;
21374        assert_eq!(r, e);
21375    }
21376
21377    #[simd_test(enable = "avx512bw")]
21378    const unsafe fn test_mm512_cvtepi16_epi8() {
21379        let a = _mm512_set1_epi16(2);
21380        let r = _mm512_cvtepi16_epi8(a);
21381        let e = _mm256_set1_epi8(2);
21382        assert_eq_m256i(r, e);
21383    }
21384
21385    #[simd_test(enable = "avx512bw")]
21386    const unsafe fn test_mm512_mask_cvtepi16_epi8() {
21387        let src = _mm256_set1_epi8(1);
21388        let a = _mm512_set1_epi16(2);
21389        let r = _mm512_mask_cvtepi16_epi8(src, 0, a);
21390        assert_eq_m256i(r, src);
21391        let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
21392        let e = _mm256_set1_epi8(2);
21393        assert_eq_m256i(r, e);
21394    }
21395
21396    #[simd_test(enable = "avx512bw")]
21397    const unsafe fn test_mm512_maskz_cvtepi16_epi8() {
21398        let a = _mm512_set1_epi16(2);
21399        let r = _mm512_maskz_cvtepi16_epi8(0, a);
21400        assert_eq_m256i(r, _mm256_setzero_si256());
21401        let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a);
21402        let e = _mm256_set1_epi8(2);
21403        assert_eq_m256i(r, e);
21404    }
21405
21406    #[simd_test(enable = "avx512bw,avx512vl")]
21407    const unsafe fn test_mm256_cvtepi16_epi8() {
21408        let a = _mm256_set1_epi16(2);
21409        let r = _mm256_cvtepi16_epi8(a);
21410        let e = _mm_set1_epi8(2);
21411        assert_eq_m128i(r, e);
21412    }
21413
21414    #[simd_test(enable = "avx512bw,avx512vl")]
21415    const unsafe fn test_mm256_mask_cvtepi16_epi8() {
21416        let src = _mm_set1_epi8(1);
21417        let a = _mm256_set1_epi16(2);
21418        let r = _mm256_mask_cvtepi16_epi8(src, 0, a);
21419        assert_eq_m128i(r, src);
21420        let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a);
21421        let e = _mm_set1_epi8(2);
21422        assert_eq_m128i(r, e);
21423    }
21424
21425    #[simd_test(enable = "avx512bw,avx512vl")]
21426    const unsafe fn test_mm256_maskz_cvtepi16_epi8() {
21427        let a = _mm256_set1_epi16(2);
21428        let r = _mm256_maskz_cvtepi16_epi8(0, a);
21429        assert_eq_m128i(r, _mm_setzero_si128());
21430        let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a);
21431        let e = _mm_set1_epi8(2);
21432        assert_eq_m128i(r, e);
21433    }
21434
21435    #[simd_test(enable = "avx512bw,avx512vl")]
21436    const unsafe fn test_mm_cvtepi16_epi8() {
21437        let a = _mm_set1_epi16(2);
21438        let r = _mm_cvtepi16_epi8(a);
21439        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
21440        assert_eq_m128i(r, e);
21441    }
21442
21443    #[simd_test(enable = "avx512bw,avx512vl")]
21444    const unsafe fn test_mm_mask_cvtepi16_epi8() {
21445        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
21446        let a = _mm_set1_epi16(2);
21447        let r = _mm_mask_cvtepi16_epi8(src, 0, a);
21448        assert_eq_m128i(r, src);
21449        let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a);
21450        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
21451        assert_eq_m128i(r, e);
21452    }
21453
21454    #[simd_test(enable = "avx512bw,avx512vl")]
21455    const unsafe fn test_mm_maskz_cvtepi16_epi8() {
21456        let a = _mm_set1_epi16(2);
21457        let r = _mm_maskz_cvtepi16_epi8(0, a);
21458        assert_eq_m128i(r, _mm_setzero_si128());
21459        let r = _mm_maskz_cvtepi16_epi8(0b11111111, a);
21460        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
21461        assert_eq_m128i(r, e);
21462    }
21463
21464    #[simd_test(enable = "avx512bw")]
21465    unsafe fn test_mm512_cvtsepi16_epi8() {
21466        let a = _mm512_set1_epi16(i16::MAX);
21467        let r = _mm512_cvtsepi16_epi8(a);
21468        let e = _mm256_set1_epi8(i8::MAX);
21469        assert_eq_m256i(r, e);
21470    }
21471
21472    #[simd_test(enable = "avx512bw")]
21473    unsafe fn test_mm512_mask_cvtsepi16_epi8() {
21474        let src = _mm256_set1_epi8(1);
21475        let a = _mm512_set1_epi16(i16::MAX);
21476        let r = _mm512_mask_cvtsepi16_epi8(src, 0, a);
21477        assert_eq_m256i(r, src);
21478        let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
21479        let e = _mm256_set1_epi8(i8::MAX);
21480        assert_eq_m256i(r, e);
21481    }
21482
21483    #[simd_test(enable = "avx512bw,avx512vl")]
21484    unsafe fn test_mm256_cvtsepi16_epi8() {
21485        let a = _mm256_set1_epi16(i16::MAX);
21486        let r = _mm256_cvtsepi16_epi8(a);
21487        let e = _mm_set1_epi8(i8::MAX);
21488        assert_eq_m128i(r, e);
21489    }
21490
21491    #[simd_test(enable = "avx512bw,avx512vl")]
21492    unsafe fn test_mm256_mask_cvtsepi16_epi8() {
21493        let src = _mm_set1_epi8(1);
21494        let a = _mm256_set1_epi16(i16::MAX);
21495        let r = _mm256_mask_cvtsepi16_epi8(src, 0, a);
21496        assert_eq_m128i(r, src);
21497        let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a);
21498        let e = _mm_set1_epi8(i8::MAX);
21499        assert_eq_m128i(r, e);
21500    }
21501
21502    #[simd_test(enable = "avx512bw,avx512vl")]
21503    unsafe fn test_mm256_maskz_cvtsepi16_epi8() {
21504        let a = _mm256_set1_epi16(i16::MAX);
21505        let r = _mm256_maskz_cvtsepi16_epi8(0, a);
21506        assert_eq_m128i(r, _mm_setzero_si128());
21507        let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a);
21508        let e = _mm_set1_epi8(i8::MAX);
21509        assert_eq_m128i(r, e);
21510    }
21511
21512    #[simd_test(enable = "avx512bw,avx512vl")]
21513    unsafe fn test_mm_cvtsepi16_epi8() {
21514        let a = _mm_set1_epi16(i16::MAX);
21515        let r = _mm_cvtsepi16_epi8(a);
21516        #[rustfmt::skip]
21517        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
21518        assert_eq_m128i(r, e);
21519    }
21520
21521    #[simd_test(enable = "avx512bw,avx512vl")]
21522    unsafe fn test_mm_mask_cvtsepi16_epi8() {
21523        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
21524        let a = _mm_set1_epi16(i16::MAX);
21525        let r = _mm_mask_cvtsepi16_epi8(src, 0, a);
21526        assert_eq_m128i(r, src);
21527        let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a);
21528        #[rustfmt::skip]
21529        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
21530        assert_eq_m128i(r, e);
21531    }
21532
21533    #[simd_test(enable = "avx512bw,avx512vl")]
21534    unsafe fn test_mm_maskz_cvtsepi16_epi8() {
21535        let a = _mm_set1_epi16(i16::MAX);
21536        let r = _mm_maskz_cvtsepi16_epi8(0, a);
21537        assert_eq_m128i(r, _mm_setzero_si128());
21538        let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a);
21539        #[rustfmt::skip]
21540        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
21541        assert_eq_m128i(r, e);
21542    }
21543
21544    #[simd_test(enable = "avx512bw")]
21545    unsafe fn test_mm512_maskz_cvtsepi16_epi8() {
21546        let a = _mm512_set1_epi16(i16::MAX);
21547        let r = _mm512_maskz_cvtsepi16_epi8(0, a);
21548        assert_eq_m256i(r, _mm256_setzero_si256());
21549        let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a);
21550        let e = _mm256_set1_epi8(i8::MAX);
21551        assert_eq_m256i(r, e);
21552    }
21553
21554    #[simd_test(enable = "avx512bw")]
21555    unsafe fn test_mm512_cvtusepi16_epi8() {
21556        let a = _mm512_set1_epi16(i16::MIN);
21557        let r = _mm512_cvtusepi16_epi8(a);
21558        let e = _mm256_set1_epi8(-1);
21559        assert_eq_m256i(r, e);
21560    }
21561
21562    #[simd_test(enable = "avx512bw")]
21563    unsafe fn test_mm512_mask_cvtusepi16_epi8() {
21564        let src = _mm256_set1_epi8(1);
21565        let a = _mm512_set1_epi16(i16::MIN);
21566        let r = _mm512_mask_cvtusepi16_epi8(src, 0, a);
21567        assert_eq_m256i(r, src);
21568        let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
21569        let e = _mm256_set1_epi8(-1);
21570        assert_eq_m256i(r, e);
21571    }
21572
21573    #[simd_test(enable = "avx512bw")]
21574    unsafe fn test_mm512_maskz_cvtusepi16_epi8() {
21575        let a = _mm512_set1_epi16(i16::MIN);
21576        let r = _mm512_maskz_cvtusepi16_epi8(0, a);
21577        assert_eq_m256i(r, _mm256_setzero_si256());
21578        let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a);
21579        let e = _mm256_set1_epi8(-1);
21580        assert_eq_m256i(r, e);
21581    }
21582
21583    #[simd_test(enable = "avx512bw,avx512vl")]
21584    unsafe fn test_mm256_cvtusepi16_epi8() {
21585        let a = _mm256_set1_epi16(i16::MIN);
21586        let r = _mm256_cvtusepi16_epi8(a);
21587        let e = _mm_set1_epi8(-1);
21588        assert_eq_m128i(r, e);
21589    }
21590
21591    #[simd_test(enable = "avx512bw,avx512vl")]
21592    unsafe fn test_mm256_mask_cvtusepi16_epi8() {
21593        let src = _mm_set1_epi8(1);
21594        let a = _mm256_set1_epi16(i16::MIN);
21595        let r = _mm256_mask_cvtusepi16_epi8(src, 0, a);
21596        assert_eq_m128i(r, src);
21597        let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a);
21598        let e = _mm_set1_epi8(-1);
21599        assert_eq_m128i(r, e);
21600    }
21601
21602    #[simd_test(enable = "avx512bw,avx512vl")]
21603    unsafe fn test_mm256_maskz_cvtusepi16_epi8() {
21604        let a = _mm256_set1_epi16(i16::MIN);
21605        let r = _mm256_maskz_cvtusepi16_epi8(0, a);
21606        assert_eq_m128i(r, _mm_setzero_si128());
21607        let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a);
21608        let e = _mm_set1_epi8(-1);
21609        assert_eq_m128i(r, e);
21610    }
21611
21612    #[simd_test(enable = "avx512bw,avx512vl")]
21613    unsafe fn test_mm_cvtusepi16_epi8() {
21614        let a = _mm_set1_epi16(i16::MIN);
21615        let r = _mm_cvtusepi16_epi8(a);
21616        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
21617        assert_eq_m128i(r, e);
21618    }
21619
21620    #[simd_test(enable = "avx512bw,avx512vl")]
21621    unsafe fn test_mm_mask_cvtusepi16_epi8() {
21622        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
21623        let a = _mm_set1_epi16(i16::MIN);
21624        let r = _mm_mask_cvtusepi16_epi8(src, 0, a);
21625        assert_eq_m128i(r, src);
21626        let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a);
21627        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
21628        assert_eq_m128i(r, e);
21629    }
21630
21631    #[simd_test(enable = "avx512bw,avx512vl")]
21632    unsafe fn test_mm_maskz_cvtusepi16_epi8() {
21633        let a = _mm_set1_epi16(i16::MIN);
21634        let r = _mm_maskz_cvtusepi16_epi8(0, a);
21635        assert_eq_m128i(r, _mm_setzero_si128());
21636        let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a);
21637        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
21638        assert_eq_m128i(r, e);
21639    }
21640
21641    #[simd_test(enable = "avx512bw")]
21642    const unsafe fn test_mm512_cvtepi8_epi16() {
21643        let a = _mm256_set1_epi8(2);
21644        let r = _mm512_cvtepi8_epi16(a);
21645        let e = _mm512_set1_epi16(2);
21646        assert_eq_m512i(r, e);
21647    }
21648
21649    #[simd_test(enable = "avx512bw")]
21650    const unsafe fn test_mm512_mask_cvtepi8_epi16() {
21651        let src = _mm512_set1_epi16(1);
21652        let a = _mm256_set1_epi8(2);
21653        let r = _mm512_mask_cvtepi8_epi16(src, 0, a);
21654        assert_eq_m512i(r, src);
21655        let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
21656        let e = _mm512_set1_epi16(2);
21657        assert_eq_m512i(r, e);
21658    }
21659
21660    #[simd_test(enable = "avx512bw")]
21661    const unsafe fn test_mm512_maskz_cvtepi8_epi16() {
21662        let a = _mm256_set1_epi8(2);
21663        let r = _mm512_maskz_cvtepi8_epi16(0, a);
21664        assert_eq_m512i(r, _mm512_setzero_si512());
21665        let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a);
21666        let e = _mm512_set1_epi16(2);
21667        assert_eq_m512i(r, e);
21668    }
21669
21670    #[simd_test(enable = "avx512bw,avx512vl")]
21671    const unsafe fn test_mm256_mask_cvtepi8_epi16() {
21672        let src = _mm256_set1_epi16(1);
21673        let a = _mm_set1_epi8(2);
21674        let r = _mm256_mask_cvtepi8_epi16(src, 0, a);
21675        assert_eq_m256i(r, src);
21676        let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a);
21677        let e = _mm256_set1_epi16(2);
21678        assert_eq_m256i(r, e);
21679    }
21680
21681    #[simd_test(enable = "avx512bw,avx512vl")]
21682    const unsafe fn test_mm256_maskz_cvtepi8_epi16() {
21683        let a = _mm_set1_epi8(2);
21684        let r = _mm256_maskz_cvtepi8_epi16(0, a);
21685        assert_eq_m256i(r, _mm256_setzero_si256());
21686        let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a);
21687        let e = _mm256_set1_epi16(2);
21688        assert_eq_m256i(r, e);
21689    }
21690
21691    #[simd_test(enable = "avx512bw,avx512vl")]
21692    const unsafe fn test_mm_mask_cvtepi8_epi16() {
21693        let src = _mm_set1_epi16(1);
21694        let a = _mm_set1_epi8(2);
21695        let r = _mm_mask_cvtepi8_epi16(src, 0, a);
21696        assert_eq_m128i(r, src);
21697        let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a);
21698        let e = _mm_set1_epi16(2);
21699        assert_eq_m128i(r, e);
21700    }
21701
21702    #[simd_test(enable = "avx512bw,avx512vl")]
21703    const unsafe fn test_mm_maskz_cvtepi8_epi16() {
21704        let a = _mm_set1_epi8(2);
21705        let r = _mm_maskz_cvtepi8_epi16(0, a);
21706        assert_eq_m128i(r, _mm_setzero_si128());
21707        let r = _mm_maskz_cvtepi8_epi16(0b11111111, a);
21708        let e = _mm_set1_epi16(2);
21709        assert_eq_m128i(r, e);
21710    }
21711
21712    #[simd_test(enable = "avx512bw")]
21713    const unsafe fn test_mm512_cvtepu8_epi16() {
21714        let a = _mm256_set1_epi8(2);
21715        let r = _mm512_cvtepu8_epi16(a);
21716        let e = _mm512_set1_epi16(2);
21717        assert_eq_m512i(r, e);
21718    }
21719
21720    #[simd_test(enable = "avx512bw")]
21721    const unsafe fn test_mm512_mask_cvtepu8_epi16() {
21722        let src = _mm512_set1_epi16(1);
21723        let a = _mm256_set1_epi8(2);
21724        let r = _mm512_mask_cvtepu8_epi16(src, 0, a);
21725        assert_eq_m512i(r, src);
21726        let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
21727        let e = _mm512_set1_epi16(2);
21728        assert_eq_m512i(r, e);
21729    }
21730
21731    #[simd_test(enable = "avx512bw")]
21732    const unsafe fn test_mm512_maskz_cvtepu8_epi16() {
21733        let a = _mm256_set1_epi8(2);
21734        let r = _mm512_maskz_cvtepu8_epi16(0, a);
21735        assert_eq_m512i(r, _mm512_setzero_si512());
21736        let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a);
21737        let e = _mm512_set1_epi16(2);
21738        assert_eq_m512i(r, e);
21739    }
21740
21741    #[simd_test(enable = "avx512bw,avx512vl")]
21742    const unsafe fn test_mm256_mask_cvtepu8_epi16() {
21743        let src = _mm256_set1_epi16(1);
21744        let a = _mm_set1_epi8(2);
21745        let r = _mm256_mask_cvtepu8_epi16(src, 0, a);
21746        assert_eq_m256i(r, src);
21747        let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a);
21748        let e = _mm256_set1_epi16(2);
21749        assert_eq_m256i(r, e);
21750    }
21751
21752    #[simd_test(enable = "avx512bw,avx512vl")]
21753    const unsafe fn test_mm256_maskz_cvtepu8_epi16() {
21754        let a = _mm_set1_epi8(2);
21755        let r = _mm256_maskz_cvtepu8_epi16(0, a);
21756        assert_eq_m256i(r, _mm256_setzero_si256());
21757        let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a);
21758        let e = _mm256_set1_epi16(2);
21759        assert_eq_m256i(r, e);
21760    }
21761
21762    #[simd_test(enable = "avx512bw,avx512vl")]
21763    const unsafe fn test_mm_mask_cvtepu8_epi16() {
21764        let src = _mm_set1_epi16(1);
21765        let a = _mm_set1_epi8(2);
21766        let r = _mm_mask_cvtepu8_epi16(src, 0, a);
21767        assert_eq_m128i(r, src);
21768        let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a);
21769        let e = _mm_set1_epi16(2);
21770        assert_eq_m128i(r, e);
21771    }
21772
21773    #[simd_test(enable = "avx512bw,avx512vl")]
21774    const unsafe fn test_mm_maskz_cvtepu8_epi16() {
21775        let a = _mm_set1_epi8(2);
21776        let r = _mm_maskz_cvtepu8_epi16(0, a);
21777        assert_eq_m128i(r, _mm_setzero_si128());
21778        let r = _mm_maskz_cvtepu8_epi16(0b11111111, a);
21779        let e = _mm_set1_epi16(2);
21780        assert_eq_m128i(r, e);
21781    }
21782
21783    #[simd_test(enable = "avx512bw")]
21784    const unsafe fn test_mm512_bslli_epi128() {
21785        #[rustfmt::skip]
21786        let a = _mm512_set_epi8(
21787            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21788            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21789            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21790            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21791        );
21792        let r = _mm512_bslli_epi128::<9>(a);
21793        #[rustfmt::skip]
21794        let e = _mm512_set_epi8(
21795            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21796            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21797            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21798            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21799        );
21800        assert_eq_m512i(r, e);
21801    }
21802
21803    #[simd_test(enable = "avx512bw")]
21804    const unsafe fn test_mm512_bsrli_epi128() {
21805        #[rustfmt::skip]
21806        let a = _mm512_set_epi8(
21807            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
21808            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
21809            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
21810            49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
21811        );
21812        let r = _mm512_bsrli_epi128::<3>(a);
21813        #[rustfmt::skip]
21814        let e = _mm512_set_epi8(
21815            0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
21816            0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
21817            0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
21818            0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
21819        );
21820        assert_eq_m512i(r, e);
21821    }
21822
21823    #[simd_test(enable = "avx512bw")]
21824    const unsafe fn test_mm512_alignr_epi8() {
21825        #[rustfmt::skip]
21826        let a = _mm512_set_epi8(
21827            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21828            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21829            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21830            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21831        );
21832        let b = _mm512_set1_epi8(1);
21833        let r = _mm512_alignr_epi8::<14>(a, b);
21834        #[rustfmt::skip]
21835        let e = _mm512_set_epi8(
21836            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21837            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21838            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21839            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21840        );
21841        assert_eq_m512i(r, e);
21842    }
21843
21844    #[simd_test(enable = "avx512bw")]
21845    const unsafe fn test_mm512_mask_alignr_epi8() {
21846        #[rustfmt::skip]
21847        let a = _mm512_set_epi8(
21848            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21849            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21850            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21851            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21852        );
21853        let b = _mm512_set1_epi8(1);
21854        let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b);
21855        assert_eq_m512i(r, a);
21856        let r = _mm512_mask_alignr_epi8::<14>(
21857            a,
21858            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
21859            a,
21860            b,
21861        );
21862        #[rustfmt::skip]
21863        let e = _mm512_set_epi8(
21864            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21865            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21866            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21867            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21868        );
21869        assert_eq_m512i(r, e);
21870    }
21871
21872    #[simd_test(enable = "avx512bw")]
21873    const unsafe fn test_mm512_maskz_alignr_epi8() {
21874        #[rustfmt::skip]
21875        let a = _mm512_set_epi8(
21876            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21877            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21878            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21879            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21880        );
21881        let b = _mm512_set1_epi8(1);
21882        let r = _mm512_maskz_alignr_epi8::<14>(0, a, b);
21883        assert_eq_m512i(r, _mm512_setzero_si512());
21884        let r = _mm512_maskz_alignr_epi8::<14>(
21885            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
21886            a,
21887            b,
21888        );
21889        #[rustfmt::skip]
21890        let e = _mm512_set_epi8(
21891            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21892            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21893            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21894            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21895        );
21896        assert_eq_m512i(r, e);
21897    }
21898
21899    #[simd_test(enable = "avx512bw,avx512vl")]
21900    const unsafe fn test_mm256_mask_alignr_epi8() {
21901        #[rustfmt::skip]
21902        let a = _mm256_set_epi8(
21903            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21904            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21905        );
21906        let b = _mm256_set1_epi8(1);
21907        let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b);
21908        assert_eq_m256i(r, a);
21909        let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b);
21910        #[rustfmt::skip]
21911        let e = _mm256_set_epi8(
21912            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21913            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21914        );
21915        assert_eq_m256i(r, e);
21916    }
21917
21918    #[simd_test(enable = "avx512bw,avx512vl")]
21919    const unsafe fn test_mm256_maskz_alignr_epi8() {
21920        #[rustfmt::skip]
21921        let a = _mm256_set_epi8(
21922            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21923            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
21924        );
21925        let b = _mm256_set1_epi8(1);
21926        let r = _mm256_maskz_alignr_epi8::<14>(0, a, b);
21927        assert_eq_m256i(r, _mm256_setzero_si256());
21928        let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b);
21929        #[rustfmt::skip]
21930        let e = _mm256_set_epi8(
21931            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21932            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
21933        );
21934        assert_eq_m256i(r, e);
21935    }
21936
21937    #[simd_test(enable = "avx512bw,avx512vl")]
21938    const unsafe fn test_mm_mask_alignr_epi8() {
21939        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21940        let b = _mm_set1_epi8(1);
21941        let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b);
21942        assert_eq_m128i(r, a);
21943        let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b);
21944        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21945        assert_eq_m128i(r, e);
21946    }
21947
21948    #[simd_test(enable = "avx512bw,avx512vl")]
21949    const unsafe fn test_mm_maskz_alignr_epi8() {
21950        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
21951        let b = _mm_set1_epi8(1);
21952        let r = _mm_maskz_alignr_epi8::<14>(0, a, b);
21953        assert_eq_m128i(r, _mm_setzero_si128());
21954        let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b);
21955        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
21956        assert_eq_m128i(r, e);
21957    }
21958
21959    #[simd_test(enable = "avx512bw")]
21960    unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
21961        let a = _mm512_set1_epi16(i16::MAX);
21962        let mut r = _mm256_undefined_si256();
21963        _mm512_mask_cvtsepi16_storeu_epi8(
21964            &mut r as *mut _ as *mut i8,
21965            0b11111111_11111111_11111111_11111111,
21966            a,
21967        );
21968        let e = _mm256_set1_epi8(i8::MAX);
21969        assert_eq_m256i(r, e);
21970    }
21971
21972    #[simd_test(enable = "avx512bw,avx512vl")]
21973    unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
21974        let a = _mm256_set1_epi16(i16::MAX);
21975        let mut r = _mm_undefined_si128();
21976        _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
21977        let e = _mm_set1_epi8(i8::MAX);
21978        assert_eq_m128i(r, e);
21979    }
21980
21981    #[simd_test(enable = "avx512bw,avx512vl")]
21982    unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
21983        let a = _mm_set1_epi16(i16::MAX);
21984        let mut r = _mm_set1_epi8(0);
21985        _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
21986        #[rustfmt::skip]
21987        let e = _mm_set_epi8(
21988            0, 0, 0, 0, 0, 0, 0, 0,
21989            i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
21990        );
21991        assert_eq_m128i(r, e);
21992    }
21993
21994    #[simd_test(enable = "avx512bw")]
21995    unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
21996        let a = _mm512_set1_epi16(8);
21997        let mut r = _mm256_undefined_si256();
21998        _mm512_mask_cvtepi16_storeu_epi8(
21999            &mut r as *mut _ as *mut i8,
22000            0b11111111_11111111_11111111_11111111,
22001            a,
22002        );
22003        let e = _mm256_set1_epi8(8);
22004        assert_eq_m256i(r, e);
22005    }
22006
22007    #[simd_test(enable = "avx512bw,avx512vl")]
22008    unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
22009        let a = _mm256_set1_epi16(8);
22010        let mut r = _mm_undefined_si128();
22011        _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
22012        let e = _mm_set1_epi8(8);
22013        assert_eq_m128i(r, e);
22014    }
22015
22016    #[simd_test(enable = "avx512bw,avx512vl")]
22017    unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
22018        let a = _mm_set1_epi16(8);
22019        let mut r = _mm_set1_epi8(0);
22020        _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
22021        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
22022        assert_eq_m128i(r, e);
22023    }
22024
22025    #[simd_test(enable = "avx512bw")]
22026    unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
22027        let a = _mm512_set1_epi16(i16::MAX);
22028        let mut r = _mm256_undefined_si256();
22029        _mm512_mask_cvtusepi16_storeu_epi8(
22030            &mut r as *mut _ as *mut i8,
22031            0b11111111_11111111_11111111_11111111,
22032            a,
22033        );
22034        let e = _mm256_set1_epi8(u8::MAX as i8);
22035        assert_eq_m256i(r, e);
22036    }
22037
22038    #[simd_test(enable = "avx512bw,avx512vl")]
22039    unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
22040        let a = _mm256_set1_epi16(i16::MAX);
22041        let mut r = _mm_undefined_si128();
22042        _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
22043        let e = _mm_set1_epi8(u8::MAX as i8);
22044        assert_eq_m128i(r, e);
22045    }
22046
22047    #[simd_test(enable = "avx512bw,avx512vl")]
22048    unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
22049        let a = _mm_set1_epi16(i16::MAX);
22050        let mut r = _mm_set1_epi8(0);
22051        _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
22052        #[rustfmt::skip]
22053        let e = _mm_set_epi8(
22054            0, 0, 0, 0,
22055            0, 0, 0, 0,
22056            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
22057            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
22058        );
22059        assert_eq_m128i(r, e);
22060    }
22061}