core/stdarch/crates/core_arch/src/x86/
avx512dq.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4    mem::transmute,
5};
6
7// And //
8
9/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
10/// and store the results in dst using writemask k (elements are copied from src if the corresponding
11/// bit is not set).
12///
13/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_pd&ig_expand=288)
14#[inline]
15#[target_feature(enable = "avx512dq,avx512vl")]
16#[cfg_attr(test, assert_instr(vandpd))]
17#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19pub const fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
20    unsafe {
21        let and = _mm_and_pd(a, b).as_f64x2();
22        transmute(simd_select_bitmask(k, and, src.as_f64x2()))
23    }
24}
25
26/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
27/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
28///
29/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_pd&ig_expand=289)
30#[inline]
31#[target_feature(enable = "avx512dq,avx512vl")]
32#[cfg_attr(test, assert_instr(vandpd))]
33#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35pub const fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36    unsafe {
37        let and = _mm_and_pd(a, b).as_f64x2();
38        transmute(simd_select_bitmask(k, and, f64x2::ZERO))
39    }
40}
41
42/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
43/// and store the results in dst using writemask k (elements are copied from src if the corresponding
44/// bit is not set).
45///
46/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_pd&ig_expand=291)
47#[inline]
48#[target_feature(enable = "avx512dq,avx512vl")]
49#[cfg_attr(test, assert_instr(vandpd))]
50#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
51#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
52pub const fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
53    unsafe {
54        let and = _mm256_and_pd(a, b).as_f64x4();
55        transmute(simd_select_bitmask(k, and, src.as_f64x4()))
56    }
57}
58
59/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
60/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
61///
62/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_pd&ig_expand=292)
63#[inline]
64#[target_feature(enable = "avx512dq,avx512vl")]
65#[cfg_attr(test, assert_instr(vandpd))]
66#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
67#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
68pub const fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
69    unsafe {
70        let and = _mm256_and_pd(a, b).as_f64x4();
71        transmute(simd_select_bitmask(k, and, f64x4::ZERO))
72    }
73}
74
75/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
76/// and store the results in dst.
77///
78/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_pd&ig_expand=293)
79#[inline]
80#[target_feature(enable = "avx512dq")]
81#[cfg_attr(test, assert_instr(vandp))]
82#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
83#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
84pub const fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d {
85    unsafe { transmute(simd_and(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) }
86}
87
88/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
89/// and store the results in dst using writemask k (elements are copied from src if the corresponding
90/// bit is not set).
91///
92/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_pd&ig_expand=294)
93#[inline]
94#[target_feature(enable = "avx512dq")]
95#[cfg_attr(test, assert_instr(vandpd))]
96#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
97#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
98pub const fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
99    unsafe {
100        let and = _mm512_and_pd(a, b).as_f64x8();
101        transmute(simd_select_bitmask(k, and, src.as_f64x8()))
102    }
103}
104
105/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
106/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
107///
108/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_pd&ig_expand=295)
109#[inline]
110#[target_feature(enable = "avx512dq")]
111#[cfg_attr(test, assert_instr(vandpd))]
112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
113#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
114pub const fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
115    unsafe {
116        let and = _mm512_and_pd(a, b).as_f64x8();
117        transmute(simd_select_bitmask(k, and, f64x8::ZERO))
118    }
119}
120
121/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
122/// and store the results in dst using writemask k (elements are copied from src if the corresponding
123/// bit is not set).
124///
125/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_ps&ig_expand=297)
126#[inline]
127#[target_feature(enable = "avx512dq,avx512vl")]
128#[cfg_attr(test, assert_instr(vandps))]
129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
131pub const fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
132    unsafe {
133        let and = _mm_and_ps(a, b).as_f32x4();
134        transmute(simd_select_bitmask(k, and, src.as_f32x4()))
135    }
136}
137
138/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
139/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
140///
141/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_ps&ig_expand=298)
142#[inline]
143#[target_feature(enable = "avx512dq,avx512vl")]
144#[cfg_attr(test, assert_instr(vandps))]
145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
147pub const fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
148    unsafe {
149        let and = _mm_and_ps(a, b).as_f32x4();
150        transmute(simd_select_bitmask(k, and, f32x4::ZERO))
151    }
152}
153
154/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
155/// and store the results in dst using writemask k (elements are copied from src if the corresponding
156/// bit is not set).
157///
158/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_ps&ig_expand=300)
159#[inline]
160#[target_feature(enable = "avx512dq,avx512vl")]
161#[cfg_attr(test, assert_instr(vandps))]
162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
163#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
164pub const fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
165    unsafe {
166        let and = _mm256_and_ps(a, b).as_f32x8();
167        transmute(simd_select_bitmask(k, and, src.as_f32x8()))
168    }
169}
170
171/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
172/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
173///
174/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_ps&ig_expand=301)
175#[inline]
176#[target_feature(enable = "avx512dq,avx512vl")]
177#[cfg_attr(test, assert_instr(vandps))]
178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
179#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
180pub const fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
181    unsafe {
182        let and = _mm256_and_ps(a, b).as_f32x8();
183        transmute(simd_select_bitmask(k, and, f32x8::ZERO))
184    }
185}
186
187/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
188/// and store the results in dst.
189///
190/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_ps&ig_expand=303)
191#[inline]
192#[target_feature(enable = "avx512dq")]
193#[cfg_attr(test, assert_instr(vandps))]
194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
195#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
196pub const fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 {
197    unsafe {
198        transmute(simd_and(
199            transmute::<_, u32x16>(a),
200            transmute::<_, u32x16>(b),
201        ))
202    }
203}
204
205/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
206/// and store the results in dst using writemask k (elements are copied from src if the corresponding
207/// bit is not set).
208///
209/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_ps&ig_expand=304)
210#[inline]
211#[target_feature(enable = "avx512dq")]
212#[cfg_attr(test, assert_instr(vandps))]
213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
215pub const fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
216    unsafe {
217        let and = _mm512_and_ps(a, b).as_f32x16();
218        transmute(simd_select_bitmask(k, and, src.as_f32x16()))
219    }
220}
221
222/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
223/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
224///
225/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_ps&ig_expand=305)
226#[inline]
227#[target_feature(enable = "avx512dq")]
228#[cfg_attr(test, assert_instr(vandps))]
229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
230#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
231pub const fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
232    unsafe {
233        let and = _mm512_and_ps(a, b).as_f32x16();
234        transmute(simd_select_bitmask(k, and, f32x16::ZERO))
235    }
236}
237
238// Andnot
239
240/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
241/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
242/// corresponding bit is not set).
243///
244/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_pd&ig_expand=326)
245#[inline]
246#[target_feature(enable = "avx512dq,avx512vl")]
247#[cfg_attr(test, assert_instr(vandnpd))]
248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
249#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
250pub const fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
251    unsafe {
252        let andnot = _mm_andnot_pd(a, b).as_f64x2();
253        transmute(simd_select_bitmask(k, andnot, src.as_f64x2()))
254    }
255}
256
257/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
258/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
259/// corresponding bit is not set).
260///
261/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_pd&ig_expand=327)
262#[inline]
263#[target_feature(enable = "avx512dq,avx512vl")]
264#[cfg_attr(test, assert_instr(vandnpd))]
265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
266#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
267pub const fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
268    unsafe {
269        let andnot = _mm_andnot_pd(a, b).as_f64x2();
270        transmute(simd_select_bitmask(k, andnot, f64x2::ZERO))
271    }
272}
273
274/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
275/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
276/// corresponding bit is not set).
277///
278/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_pd&ig_expand=329)
279#[inline]
280#[target_feature(enable = "avx512dq,avx512vl")]
281#[cfg_attr(test, assert_instr(vandnpd))]
282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
284pub const fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
285    unsafe {
286        let andnot = _mm256_andnot_pd(a, b).as_f64x4();
287        transmute(simd_select_bitmask(k, andnot, src.as_f64x4()))
288    }
289}
290
291/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
292/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
293/// corresponding bit is not set).
294///
295/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_pd&ig_expand=330)
296#[inline]
297#[target_feature(enable = "avx512dq,avx512vl")]
298#[cfg_attr(test, assert_instr(vandnpd))]
299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
300#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
301pub const fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
302    unsafe {
303        let andnot = _mm256_andnot_pd(a, b).as_f64x4();
304        transmute(simd_select_bitmask(k, andnot, f64x4::ZERO))
305    }
306}
307
308/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
309/// bitwise AND with b and store the results in dst.
310///
311/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_pd&ig_expand=331)
312#[inline]
313#[target_feature(enable = "avx512dq")]
314#[cfg_attr(test, assert_instr(vandnp))]
315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317pub const fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d {
318    unsafe { _mm512_and_pd(_mm512_xor_pd(a, transmute(_mm512_set1_epi64(-1))), b) }
319}
320
321/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
322/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
323/// corresponding bit is not set).
324///
325/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_pd&ig_expand=332)
326#[inline]
327#[target_feature(enable = "avx512dq")]
328#[cfg_attr(test, assert_instr(vandnpd))]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
331pub const fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
332    unsafe {
333        let andnot = _mm512_andnot_pd(a, b).as_f64x8();
334        transmute(simd_select_bitmask(k, andnot, src.as_f64x8()))
335    }
336}
337
338/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
339/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
340/// corresponding bit is not set).
341///
342/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_pd&ig_expand=333)
343#[inline]
344#[target_feature(enable = "avx512dq")]
345#[cfg_attr(test, assert_instr(vandnpd))]
346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
347#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
348pub const fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
349    unsafe {
350        let andnot = _mm512_andnot_pd(a, b).as_f64x8();
351        transmute(simd_select_bitmask(k, andnot, f64x8::ZERO))
352    }
353}
354
355/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
356/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
357/// corresponding bit is not set).
358///
359/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_ps&ig_expand=335)
360#[inline]
361#[target_feature(enable = "avx512dq,avx512vl")]
362#[cfg_attr(test, assert_instr(vandnps))]
363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
365pub const fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
366    unsafe {
367        let andnot = _mm_andnot_ps(a, b).as_f32x4();
368        transmute(simd_select_bitmask(k, andnot, src.as_f32x4()))
369    }
370}
371
372/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
373/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
374/// corresponding bit is not set).
375///
376/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_ps&ig_expand=336)
377#[inline]
378#[target_feature(enable = "avx512dq,avx512vl")]
379#[cfg_attr(test, assert_instr(vandnps))]
380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
381#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
382pub const fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
383    unsafe {
384        let andnot = _mm_andnot_ps(a, b).as_f32x4();
385        transmute(simd_select_bitmask(k, andnot, f32x4::ZERO))
386    }
387}
388
389/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
390/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
391/// corresponding bit is not set).
392///
393/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_ps&ig_expand=338)
394#[inline]
395#[target_feature(enable = "avx512dq,avx512vl")]
396#[cfg_attr(test, assert_instr(vandnps))]
397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
398#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
399pub const fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
400    unsafe {
401        let andnot = _mm256_andnot_ps(a, b).as_f32x8();
402        transmute(simd_select_bitmask(k, andnot, src.as_f32x8()))
403    }
404}
405
406/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
407/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
408/// corresponding bit is not set).
409///
410/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_ps&ig_expand=339)
411#[inline]
412#[target_feature(enable = "avx512dq,avx512vl")]
413#[cfg_attr(test, assert_instr(vandnps))]
414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
415#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
416pub const fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
417    unsafe {
418        let andnot = _mm256_andnot_ps(a, b).as_f32x8();
419        transmute(simd_select_bitmask(k, andnot, f32x8::ZERO))
420    }
421}
422
423/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
424/// bitwise AND with b and store the results in dst.
425///
426/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_ps&ig_expand=340)
427#[inline]
428#[target_feature(enable = "avx512dq")]
429#[cfg_attr(test, assert_instr(vandnps))]
430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
432pub const fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 {
433    unsafe { _mm512_and_ps(_mm512_xor_ps(a, transmute(_mm512_set1_epi32(-1))), b) }
434}
435
436/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
437/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
438/// corresponding bit is not set).
439///
440/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_ps&ig_expand=341)
441#[inline]
442#[target_feature(enable = "avx512dq")]
443#[cfg_attr(test, assert_instr(vandnps))]
444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
445#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
446pub const fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
447    unsafe {
448        let andnot = _mm512_andnot_ps(a, b).as_f32x16();
449        transmute(simd_select_bitmask(k, andnot, src.as_f32x16()))
450    }
451}
452
453/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
454/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
455/// corresponding bit is not set).
456///
457/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_ps&ig_expand=342)
458#[inline]
459#[target_feature(enable = "avx512dq")]
460#[cfg_attr(test, assert_instr(vandnps))]
461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
463pub const fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
464    unsafe {
465        let andnot = _mm512_andnot_ps(a, b).as_f32x16();
466        transmute(simd_select_bitmask(k, andnot, f32x16::ZERO))
467    }
468}
469
470// Or
471
472/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
473/// and store the results in dst using writemask k (elements are copied from src if the corresponding
474/// bit is not set).
475///
476/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_pd&ig_expand=4824)
477#[inline]
478#[target_feature(enable = "avx512dq,avx512vl")]
479#[cfg_attr(test, assert_instr(vorpd))]
480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
482pub const fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
483    unsafe {
484        let or = _mm_or_pd(a, b).as_f64x2();
485        transmute(simd_select_bitmask(k, or, src.as_f64x2()))
486    }
487}
488
489/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
490/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
491///
492/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_pd&ig_expand=4825)
493#[inline]
494#[target_feature(enable = "avx512dq,avx512vl")]
495#[cfg_attr(test, assert_instr(vorpd))]
496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
497#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
498pub const fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
499    unsafe {
500        let or = _mm_or_pd(a, b).as_f64x2();
501        transmute(simd_select_bitmask(k, or, f64x2::ZERO))
502    }
503}
504
505/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
506/// and store the results in dst using writemask k (elements are copied from src if the corresponding
507/// bit is not set).
508///
509/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_pd&ig_expand=4827)
510#[inline]
511#[target_feature(enable = "avx512dq,avx512vl")]
512#[cfg_attr(test, assert_instr(vorpd))]
513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
514#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
515pub const fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
516    unsafe {
517        let or = _mm256_or_pd(a, b).as_f64x4();
518        transmute(simd_select_bitmask(k, or, src.as_f64x4()))
519    }
520}
521
522/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
523/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
524///
525/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_pd&ig_expand=4828)
526#[inline]
527#[target_feature(enable = "avx512dq,avx512vl")]
528#[cfg_attr(test, assert_instr(vorpd))]
529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
530#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
531pub const fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
532    unsafe {
533        let or = _mm256_or_pd(a, b).as_f64x4();
534        transmute(simd_select_bitmask(k, or, f64x4::ZERO))
535    }
536}
537
538/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
539/// and store the results in dst.
540///
541/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_pd&ig_expand=4829)
542#[inline]
543#[target_feature(enable = "avx512dq")]
544#[cfg_attr(test, assert_instr(vorp))]
545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
547pub const fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d {
548    unsafe { transmute(simd_or(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) }
549}
550
551/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
552/// store the results in dst using writemask k (elements are copied from src if the corresponding
553/// bit is not set).
554///
555/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_pd&ig_expand=4830)
556#[inline]
557#[target_feature(enable = "avx512dq")]
558#[cfg_attr(test, assert_instr(vorpd))]
559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
560#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
561pub const fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
562    unsafe {
563        let or = _mm512_or_pd(a, b).as_f64x8();
564        transmute(simd_select_bitmask(k, or, src.as_f64x8()))
565    }
566}
567
568/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
569/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
570///
571/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_pd&ig_expand=4831)
572#[inline]
573#[target_feature(enable = "avx512dq")]
574#[cfg_attr(test, assert_instr(vorpd))]
575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
576#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
577pub const fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
578    unsafe {
579        let or = _mm512_or_pd(a, b).as_f64x8();
580        transmute(simd_select_bitmask(k, or, f64x8::ZERO))
581    }
582}
583
584/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
585/// and store the results in dst using writemask k (elements are copied from src if the corresponding
586/// bit is not set).
587///
588/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_ps&ig_expand=4833)
589#[inline]
590#[target_feature(enable = "avx512dq,avx512vl")]
591#[cfg_attr(test, assert_instr(vorps))]
592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
594pub const fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
595    unsafe {
596        let or = _mm_or_ps(a, b).as_f32x4();
597        transmute(simd_select_bitmask(k, or, src.as_f32x4()))
598    }
599}
600
601/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
602/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
603///
604/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_ps&ig_expand=4834)
605#[inline]
606#[target_feature(enable = "avx512dq,avx512vl")]
607#[cfg_attr(test, assert_instr(vorps))]
608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
609#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
610pub const fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
611    unsafe {
612        let or = _mm_or_ps(a, b).as_f32x4();
613        transmute(simd_select_bitmask(k, or, f32x4::ZERO))
614    }
615}
616
617/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
618/// and store the results in dst using writemask k (elements are copied from src if the corresponding
619/// bit is not set).
620///
621/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_ps&ig_expand=4836)
622#[inline]
623#[target_feature(enable = "avx512dq,avx512vl")]
624#[cfg_attr(test, assert_instr(vorps))]
625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
626#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
627pub const fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
628    unsafe {
629        let or = _mm256_or_ps(a, b).as_f32x8();
630        transmute(simd_select_bitmask(k, or, src.as_f32x8()))
631    }
632}
633
634/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
635/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
636///
637/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_ps&ig_expand=4837)
638#[inline]
639#[target_feature(enable = "avx512dq,avx512vl")]
640#[cfg_attr(test, assert_instr(vorps))]
641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
642#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
643pub const fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
644    unsafe {
645        let or = _mm256_or_ps(a, b).as_f32x8();
646        transmute(simd_select_bitmask(k, or, f32x8::ZERO))
647    }
648}
649
650/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
651/// and store the results in dst.
652///
653/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_ps&ig_expand=4838)
654#[inline]
655#[target_feature(enable = "avx512dq")]
656#[cfg_attr(test, assert_instr(vorps))]
657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
658#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
659pub const fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 {
660    unsafe {
661        transmute(simd_or(
662            transmute::<_, u32x16>(a),
663            transmute::<_, u32x16>(b),
664        ))
665    }
666}
667
668/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
669/// store the results in dst using writemask k (elements are copied from src if the corresponding
670/// bit is not set).
671///
672/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_ps&ig_expand=4839)
673#[inline]
674#[target_feature(enable = "avx512dq")]
675#[cfg_attr(test, assert_instr(vorps))]
676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
677#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
678pub const fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
679    unsafe {
680        let or = _mm512_or_ps(a, b).as_f32x16();
681        transmute(simd_select_bitmask(k, or, src.as_f32x16()))
682    }
683}
684
685/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
686/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
687///
688/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_ps&ig_expand=4840)
689#[inline]
690#[target_feature(enable = "avx512dq")]
691#[cfg_attr(test, assert_instr(vorps))]
692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
693#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
694pub const fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
695    unsafe {
696        let or = _mm512_or_ps(a, b).as_f32x16();
697        transmute(simd_select_bitmask(k, or, f32x16::ZERO))
698    }
699}
700
701// Xor
702
703/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
704/// and store the results in dst using writemask k (elements are copied from src if the corresponding
705/// bit is not set).
706///
707/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_pd&ig_expand=7094)
708#[inline]
709#[target_feature(enable = "avx512dq,avx512vl")]
710#[cfg_attr(test, assert_instr(vxorpd))]
711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
712#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
713pub const fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
714    unsafe {
715        let xor = _mm_xor_pd(a, b).as_f64x2();
716        transmute(simd_select_bitmask(k, xor, src.as_f64x2()))
717    }
718}
719
720/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
721/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
722///
723/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_pd&ig_expand=7095)
724#[inline]
725#[target_feature(enable = "avx512dq,avx512vl")]
726#[cfg_attr(test, assert_instr(vxorpd))]
727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
728#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
729pub const fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
730    unsafe {
731        let xor = _mm_xor_pd(a, b).as_f64x2();
732        transmute(simd_select_bitmask(k, xor, f64x2::ZERO))
733    }
734}
735
736/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
737/// and store the results in dst using writemask k (elements are copied from src if the corresponding
738/// bit is not set).
739///
740/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_pd&ig_expand=7097)
741#[inline]
742#[target_feature(enable = "avx512dq,avx512vl")]
743#[cfg_attr(test, assert_instr(vxorpd))]
744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
745#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
746pub const fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
747    unsafe {
748        let xor = _mm256_xor_pd(a, b).as_f64x4();
749        transmute(simd_select_bitmask(k, xor, src.as_f64x4()))
750    }
751}
752
753/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
754/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
755///
756/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_pd&ig_expand=7098)
757#[inline]
758#[target_feature(enable = "avx512dq,avx512vl")]
759#[cfg_attr(test, assert_instr(vxorpd))]
760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
761#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
762pub const fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
763    unsafe {
764        let xor = _mm256_xor_pd(a, b).as_f64x4();
765        transmute(simd_select_bitmask(k, xor, f64x4::ZERO))
766    }
767}
768
769/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
770/// and store the results in dst.
771///
772/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_pd&ig_expand=7102)
773#[inline]
774#[target_feature(enable = "avx512dq")]
775#[cfg_attr(test, assert_instr(vxorp))]
776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
777#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
778pub const fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d {
779    unsafe { transmute(simd_xor(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) }
780}
781
782/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
783/// store the results in dst using writemask k (elements are copied from src if the corresponding
784/// bit is not set).
785///
786/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_pd&ig_expand=7100)
787#[inline]
788#[target_feature(enable = "avx512dq")]
789#[cfg_attr(test, assert_instr(vxorpd))]
790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
791#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
792pub const fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
793    unsafe {
794        let xor = _mm512_xor_pd(a, b).as_f64x8();
795        transmute(simd_select_bitmask(k, xor, src.as_f64x8()))
796    }
797}
798
799/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
800/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
801///
802/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_pd&ig_expand=7101)
803#[inline]
804#[target_feature(enable = "avx512dq")]
805#[cfg_attr(test, assert_instr(vxorpd))]
806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
807#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
808pub const fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
809    unsafe {
810        let xor = _mm512_xor_pd(a, b).as_f64x8();
811        transmute(simd_select_bitmask(k, xor, f64x8::ZERO))
812    }
813}
814
815/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
816/// and store the results in dst using writemask k (elements are copied from src if the corresponding
817/// bit is not set).
818///
819/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_ps&ig_expand=7103)
820#[inline]
821#[target_feature(enable = "avx512dq,avx512vl")]
822#[cfg_attr(test, assert_instr(vxorps))]
823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
824#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
825pub const fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
826    unsafe {
827        let xor = _mm_xor_ps(a, b).as_f32x4();
828        transmute(simd_select_bitmask(k, xor, src.as_f32x4()))
829    }
830}
831
832/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
833/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
834///
835/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_ps&ig_expand=7104)
836#[inline]
837#[target_feature(enable = "avx512dq,avx512vl")]
838#[cfg_attr(test, assert_instr(vxorps))]
839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
840#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
841pub const fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
842    unsafe {
843        let xor = _mm_xor_ps(a, b).as_f32x4();
844        transmute(simd_select_bitmask(k, xor, f32x4::ZERO))
845    }
846}
847
848/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
849/// and store the results in dst using writemask k (elements are copied from src if the corresponding
850/// bit is not set).
851///
852/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_ps&ig_expand=7106)
853#[inline]
854#[target_feature(enable = "avx512dq,avx512vl")]
855#[cfg_attr(test, assert_instr(vxorps))]
856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
857#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
858pub const fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
859    unsafe {
860        let xor = _mm256_xor_ps(a, b).as_f32x8();
861        transmute(simd_select_bitmask(k, xor, src.as_f32x8()))
862    }
863}
864
865/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
866/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
867///
868/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_ps&ig_expand=7107)
869#[inline]
870#[target_feature(enable = "avx512dq,avx512vl")]
871#[cfg_attr(test, assert_instr(vxorps))]
872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
873#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
874pub const fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
875    unsafe {
876        let xor = _mm256_xor_ps(a, b).as_f32x8();
877        transmute(simd_select_bitmask(k, xor, f32x8::ZERO))
878    }
879}
880
881/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
882/// and store the results in dst.
883///
884/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_ps&ig_expand=7111)
885#[inline]
886#[target_feature(enable = "avx512dq")]
887#[cfg_attr(test, assert_instr(vxorps))]
888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
889#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
890pub const fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 {
891    unsafe {
892        transmute(simd_xor(
893            transmute::<_, u32x16>(a),
894            transmute::<_, u32x16>(b),
895        ))
896    }
897}
898
899/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
900/// store the results in dst using writemask k (elements are copied from src if the corresponding
901/// bit is not set).
902///
903/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_ps&ig_expand=7109)
904#[inline]
905#[target_feature(enable = "avx512dq")]
906#[cfg_attr(test, assert_instr(vxorps))]
907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
908#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
909pub const fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
910    unsafe {
911        let xor = _mm512_xor_ps(a, b).as_f32x16();
912        transmute(simd_select_bitmask(k, xor, src.as_f32x16()))
913    }
914}
915
916/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
917/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
918///
919/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_ps&ig_expand=7110)
920#[inline]
921#[target_feature(enable = "avx512dq")]
922#[cfg_attr(test, assert_instr(vxorps))]
923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
924#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
925pub const fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
926    unsafe {
927        let xor = _mm512_xor_ps(a, b).as_f32x16();
928        transmute(simd_select_bitmask(k, xor, f32x16::ZERO))
929    }
930}
931
932// Broadcast
933
934/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
935/// elements of dst.
936///
937/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x2&ig_expand=509)
938#[inline]
939#[target_feature(enable = "avx512dq,avx512vl")]
940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
941#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
942pub const fn _mm256_broadcast_f32x2(a: __m128) -> __m256 {
943    unsafe {
944        let b: f32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
945        transmute(b)
946    }
947}
948
949/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
950/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
951///
952/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x2&ig_expand=510)
953#[inline]
954#[target_feature(enable = "avx512dq,avx512vl")]
955#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
957#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
958pub const fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m256 {
959    unsafe {
960        let b = _mm256_broadcast_f32x2(a).as_f32x8();
961        transmute(simd_select_bitmask(k, b, src.as_f32x8()))
962    }
963}
964
965/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
966/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
967///
968/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x2&ig_expand=511)
969#[inline]
970#[target_feature(enable = "avx512dq,avx512vl")]
971#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
973#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
974pub const fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 {
975    unsafe {
976        let b = _mm256_broadcast_f32x2(a).as_f32x8();
977        transmute(simd_select_bitmask(k, b, f32x8::ZERO))
978    }
979}
980
981/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
982/// elements of dst.
983///
984/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x2&ig_expand=512)
985#[inline]
986#[target_feature(enable = "avx512dq")]
987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
989pub const fn _mm512_broadcast_f32x2(a: __m128) -> __m512 {
990    unsafe {
991        let b: f32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
992        transmute(b)
993    }
994}
995
996/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
997/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
998///
999/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x2&ig_expand=513)
1000#[inline]
1001#[target_feature(enable = "avx512dq")]
1002#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
1003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1004#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1005pub const fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m512 {
1006    unsafe {
1007        let b = _mm512_broadcast_f32x2(a).as_f32x16();
1008        transmute(simd_select_bitmask(k, b, src.as_f32x16()))
1009    }
1010}
1011
1012/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
1013/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1014///
1015/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x2&ig_expand=514)
1016#[inline]
1017#[target_feature(enable = "avx512dq")]
1018#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
1019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1021pub const fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 {
1022    unsafe {
1023        let b = _mm512_broadcast_f32x2(a).as_f32x16();
1024        transmute(simd_select_bitmask(k, b, f32x16::ZERO))
1025    }
1026}
1027
1028/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
1029/// elements of dst.
1030///
1031/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x8&ig_expand=521)
1032#[inline]
1033#[target_feature(enable = "avx512dq")]
1034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1036pub const fn _mm512_broadcast_f32x8(a: __m256) -> __m512 {
1037    unsafe {
1038        let b: f32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
1039        transmute(b)
1040    }
1041}
1042
1043/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
1044/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
1045///
1046/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x8&ig_expand=522)
1047#[inline]
1048#[target_feature(enable = "avx512dq")]
1049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1051pub const fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m512 {
1052    unsafe {
1053        let b = _mm512_broadcast_f32x8(a).as_f32x16();
1054        transmute(simd_select_bitmask(k, b, src.as_f32x16()))
1055    }
1056}
1057
1058/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
1059/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1060///
1061/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x8&ig_expand=523)
1062#[inline]
1063#[target_feature(enable = "avx512dq")]
1064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1065#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1066pub const fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 {
1067    unsafe {
1068        let b = _mm512_broadcast_f32x8(a).as_f32x16();
1069        transmute(simd_select_bitmask(k, b, f32x16::ZERO))
1070    }
1071}
1072
1073/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1074/// elements of dst.
1075///
1076/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f64x2&ig_expand=524)
1077#[inline]
1078#[target_feature(enable = "avx512dq,avx512vl")]
1079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1081pub const fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d {
1082    unsafe {
1083        let b: f64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
1084        transmute(b)
1085    }
1086}
1087
1088/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1089/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
1090///
1091/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f64x2&ig_expand=525)
1092#[inline]
1093#[target_feature(enable = "avx512dq,avx512vl")]
1094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1095#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1096pub const fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
1097    unsafe {
1098        let b = _mm256_broadcast_f64x2(a).as_f64x4();
1099        transmute(simd_select_bitmask(k, b, src.as_f64x4()))
1100    }
1101}
1102
1103/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1104/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1105///
1106/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f64x2&ig_expand=526)
1107#[inline]
1108#[target_feature(enable = "avx512dq,avx512vl")]
1109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1111pub const fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d {
1112    unsafe {
1113        let b = _mm256_broadcast_f64x2(a).as_f64x4();
1114        transmute(simd_select_bitmask(k, b, f64x4::ZERO))
1115    }
1116}
1117
1118/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1119/// elements of dst.
1120///
1121/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x2&ig_expand=527)
1122#[inline]
1123#[target_feature(enable = "avx512dq")]
1124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1125#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1126pub const fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d {
1127    unsafe {
1128        let b: f64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
1129        transmute(b)
1130    }
1131}
1132
1133/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1134/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
1135///
1136/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x2&ig_expand=528)
1137#[inline]
1138#[target_feature(enable = "avx512dq")]
1139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1141pub const fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
1142    unsafe {
1143        let b = _mm512_broadcast_f64x2(a).as_f64x8();
1144        transmute(simd_select_bitmask(k, b, src.as_f64x8()))
1145    }
1146}
1147
1148/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1149/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1150///
1151/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x2&ig_expand=529)
1152#[inline]
1153#[target_feature(enable = "avx512dq")]
1154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1155#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1156pub const fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d {
1157    unsafe {
1158        let b = _mm512_broadcast_f64x2(a).as_f64x8();
1159        transmute(simd_select_bitmask(k, b, f64x8::ZERO))
1160    }
1161}
1162
1163/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1164///
1165/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_i32x2&ig_expand=533)
1166#[inline]
1167#[target_feature(enable = "avx512dq,avx512vl")]
1168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1170pub const fn _mm_broadcast_i32x2(a: __m128i) -> __m128i {
1171    unsafe {
1172        let a = a.as_i32x4();
1173        let b: i32x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
1174        transmute(b)
1175    }
1176}
1177
1178/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1179/// (elements are copied from src if the corresponding bit is not set).
1180///
1181/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcast_i32x2&ig_expand=534)
1182#[inline]
1183#[target_feature(enable = "avx512dq,avx512vl")]
1184#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1187pub const fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
1188    unsafe {
1189        let b = _mm_broadcast_i32x2(a).as_i32x4();
1190        transmute(simd_select_bitmask(k, b, src.as_i32x4()))
1191    }
1192}
1193
1194/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1195/// (elements are zeroed out if the corresponding bit is not set).
1196///
1197/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcast_i32x2&ig_expand=535)
1198#[inline]
1199#[target_feature(enable = "avx512dq,avx512vl")]
1200#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1203pub const fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i {
1204    unsafe {
1205        let b = _mm_broadcast_i32x2(a).as_i32x4();
1206        transmute(simd_select_bitmask(k, b, i32x4::ZERO))
1207    }
1208}
1209
1210/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1211///
1212/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x2&ig_expand=536)
1213#[inline]
1214#[target_feature(enable = "avx512dq,avx512vl")]
1215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1217pub const fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i {
1218    unsafe {
1219        let a = a.as_i32x4();
1220        let b: i32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
1221        transmute(b)
1222    }
1223}
1224
1225/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1226/// (elements are copied from src if the corresponding bit is not set).
1227///
1228/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x2&ig_expand=537)
1229#[inline]
1230#[target_feature(enable = "avx512dq,avx512vl")]
1231#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1233#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1234pub const fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
1235    unsafe {
1236        let b = _mm256_broadcast_i32x2(a).as_i32x8();
1237        transmute(simd_select_bitmask(k, b, src.as_i32x8()))
1238    }
1239}
1240
1241/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1242/// (elements are zeroed out if the corresponding bit is not set).
1243///
1244/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x2&ig_expand=538)
1245#[inline]
1246#[target_feature(enable = "avx512dq,avx512vl")]
1247#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1249#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1250pub const fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i {
1251    unsafe {
1252        let b = _mm256_broadcast_i32x2(a).as_i32x8();
1253        transmute(simd_select_bitmask(k, b, i32x8::ZERO))
1254    }
1255}
1256
1257/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1258///
1259/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x2&ig_expand=539)
1260#[inline]
1261#[target_feature(enable = "avx512dq")]
1262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1263#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1264pub const fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i {
1265    unsafe {
1266        let a = a.as_i32x4();
1267        let b: i32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
1268        transmute(b)
1269    }
1270}
1271
1272/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1273/// (elements are copied from src if the corresponding bit is not set).
1274///
1275/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x2&ig_expand=540)
1276#[inline]
1277#[target_feature(enable = "avx512dq")]
1278#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1280#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1281pub const fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
1282    unsafe {
1283        let b = _mm512_broadcast_i32x2(a).as_i32x16();
1284        transmute(simd_select_bitmask(k, b, src.as_i32x16()))
1285    }
1286}
1287
1288/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1289/// (elements are zeroed out if the corresponding bit is not set).
1290///
1291/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x2&ig_expand=541)
1292#[inline]
1293#[target_feature(enable = "avx512dq")]
1294#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1297pub const fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i {
1298    unsafe {
1299        let b = _mm512_broadcast_i32x2(a).as_i32x16();
1300        transmute(simd_select_bitmask(k, b, i32x16::ZERO))
1301    }
1302}
1303
1304/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst.
1305///
1306/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x8&ig_expand=548)
1307#[inline]
1308#[target_feature(enable = "avx512dq")]
1309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1310#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1311pub const fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i {
1312    unsafe {
1313        let a = a.as_i32x8();
1314        let b: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
1315        transmute(b)
1316    }
1317}
1318
1319/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using writemask k
1320/// (elements are copied from src if the corresponding bit is not set).
1321///
1322/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x8&ig_expand=549)
1323#[inline]
1324#[target_feature(enable = "avx512dq")]
1325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1327pub const fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
1328    unsafe {
1329        let b = _mm512_broadcast_i32x8(a).as_i32x16();
1330        transmute(simd_select_bitmask(k, b, src.as_i32x16()))
1331    }
1332}
1333
1334/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using zeromask k
1335/// (elements are zeroed out if the corresponding bit is not set).
1336///
1337/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x8&ig_expand=550)
1338#[inline]
1339#[target_feature(enable = "avx512dq")]
1340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1342pub const fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i {
1343    unsafe {
1344        let b = _mm512_broadcast_i32x8(a).as_i32x16();
1345        transmute(simd_select_bitmask(k, b, i32x16::ZERO))
1346    }
1347}
1348
1349/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
1350///
1351/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i64x2&ig_expand=551)
1352#[inline]
1353#[target_feature(enable = "avx512dq,avx512vl")]
1354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1355#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1356pub const fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i {
1357    unsafe {
1358        let a = a.as_i64x2();
1359        let b: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
1360        transmute(b)
1361    }
1362}
1363
1364/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
1365/// (elements are copied from src if the corresponding bit is not set).
1366///
1367/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i64x2&ig_expand=552)
1368#[inline]
1369#[target_feature(enable = "avx512dq,avx512vl")]
1370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1371#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1372pub const fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
1373    unsafe {
1374        let b = _mm256_broadcast_i64x2(a).as_i64x4();
1375        transmute(simd_select_bitmask(k, b, src.as_i64x4()))
1376    }
1377}
1378
1379/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
1380/// (elements are zeroed out if the corresponding bit is not set).
1381///
1382/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i64x2&ig_expand=553)
1383#[inline]
1384#[target_feature(enable = "avx512dq,avx512vl")]
1385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1386#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1387pub const fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i {
1388    unsafe {
1389        let b = _mm256_broadcast_i64x2(a).as_i64x4();
1390        transmute(simd_select_bitmask(k, b, i64x4::ZERO))
1391    }
1392}
1393
1394/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
1395///
1396/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x2&ig_expand=554)
1397#[inline]
1398#[target_feature(enable = "avx512dq")]
1399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1400#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1401pub const fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i {
1402    unsafe {
1403        let a = a.as_i64x2();
1404        let b: i64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
1405        transmute(b)
1406    }
1407}
1408
1409/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
1410/// (elements are copied from src if the corresponding bit is not set).
1411///
1412/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x2&ig_expand=555)
1413#[inline]
1414#[target_feature(enable = "avx512dq")]
1415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1417pub const fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
1418    unsafe {
1419        let b = _mm512_broadcast_i64x2(a).as_i64x8();
1420        transmute(simd_select_bitmask(k, b, src.as_i64x8()))
1421    }
1422}
1423
1424/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
1425/// (elements are zeroed out if the corresponding bit is not set).
1426///
1427/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x2&ig_expand=556)
1428#[inline]
1429#[target_feature(enable = "avx512dq")]
1430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1432pub const fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i {
1433    unsafe {
1434        let b = _mm512_broadcast_i64x2(a).as_i64x8();
1435        transmute(simd_select_bitmask(k, b, i64x8::ZERO))
1436    }
1437}
1438
1439// Extract
1440
1441/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1442/// selected with IMM8, and stores the result in dst.
1443///
1444/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x8_ps&ig_expand=2946)
1445#[inline]
1446#[target_feature(enable = "avx512dq")]
1447#[rustc_legacy_const_generics(1)]
1448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1449#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1450pub const fn _mm512_extractf32x8_ps<const IMM8: i32>(a: __m512) -> __m256 {
1451    unsafe {
1452        static_assert_uimm_bits!(IMM8, 1);
1453        match IMM8 & 1 {
1454            0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
1455            _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
1456        }
1457    }
1458}
1459
1460/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1461/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1462/// if the corresponding bit is not set).
1463///
1464/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x8_ps&ig_expand=2947)
1465#[inline]
1466#[target_feature(enable = "avx512dq")]
1467#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
1468#[rustc_legacy_const_generics(3)]
1469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1471pub const fn _mm512_mask_extractf32x8_ps<const IMM8: i32>(
1472    src: __m256,
1473    k: __mmask8,
1474    a: __m512,
1475) -> __m256 {
1476    unsafe {
1477        static_assert_uimm_bits!(IMM8, 1);
1478        let b = _mm512_extractf32x8_ps::<IMM8>(a);
1479        transmute(simd_select_bitmask(k, b.as_f32x8(), src.as_f32x8()))
1480    }
1481}
1482
1483/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1484/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1485/// corresponding bit is not set).
1486///
1487/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x8_ps&ig_expand=2948)
1488#[inline]
1489#[target_feature(enable = "avx512dq")]
1490#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
1491#[rustc_legacy_const_generics(2)]
1492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1493#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1494pub const fn _mm512_maskz_extractf32x8_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m256 {
1495    unsafe {
1496        static_assert_uimm_bits!(IMM8, 1);
1497        let b = _mm512_extractf32x8_ps::<IMM8>(a);
1498        transmute(simd_select_bitmask(k, b.as_f32x8(), f32x8::ZERO))
1499    }
1500}
1501
1502/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1503/// selected with IMM8, and stores the result in dst.
1504///
1505/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf64x2_pd&ig_expand=2949)
1506#[inline]
1507#[target_feature(enable = "avx512dq,avx512vl")]
1508#[rustc_legacy_const_generics(1)]
1509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1510#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1511pub const fn _mm256_extractf64x2_pd<const IMM8: i32>(a: __m256d) -> __m128d {
1512    unsafe {
1513        static_assert_uimm_bits!(IMM8, 1);
1514        match IMM8 & 1 {
1515            0 => simd_shuffle!(a, a, [0, 1]),
1516            _ => simd_shuffle!(a, a, [2, 3]),
1517        }
1518    }
1519}
1520
1521/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1522/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1523/// if the corresponding bit is not set).
1524///
1525/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf64x2_pd&ig_expand=2950)
1526#[inline]
1527#[target_feature(enable = "avx512dq,avx512vl")]
1528#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
1529#[rustc_legacy_const_generics(3)]
1530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1532pub const fn _mm256_mask_extractf64x2_pd<const IMM8: i32>(
1533    src: __m128d,
1534    k: __mmask8,
1535    a: __m256d,
1536) -> __m128d {
1537    unsafe {
1538        static_assert_uimm_bits!(IMM8, 1);
1539        let b = _mm256_extractf64x2_pd::<IMM8>(a);
1540        transmute(simd_select_bitmask(k, b.as_f64x2(), src.as_f64x2()))
1541    }
1542}
1543
1544/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1545/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1546/// corresponding bit is not set).
1547///
1548/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf64x2_pd&ig_expand=2951)
1549#[inline]
1550#[target_feature(enable = "avx512dq,avx512vl")]
1551#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
1552#[rustc_legacy_const_generics(2)]
1553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1554#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1555pub const fn _mm256_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m128d {
1556    unsafe {
1557        static_assert_uimm_bits!(IMM8, 1);
1558        let b = _mm256_extractf64x2_pd::<IMM8>(a);
1559        transmute(simd_select_bitmask(k, b.as_f64x2(), f64x2::ZERO))
1560    }
1561}
1562
1563/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1564/// selected with IMM8, and stores the result in dst.
1565///
1566/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x2_pd&ig_expand=2952)
1567#[inline]
1568#[target_feature(enable = "avx512dq")]
1569#[rustc_legacy_const_generics(1)]
1570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1572pub const fn _mm512_extractf64x2_pd<const IMM8: i32>(a: __m512d) -> __m128d {
1573    unsafe {
1574        static_assert_uimm_bits!(IMM8, 2);
1575        match IMM8 & 3 {
1576            0 => simd_shuffle!(a, a, [0, 1]),
1577            1 => simd_shuffle!(a, a, [2, 3]),
1578            2 => simd_shuffle!(a, a, [4, 5]),
1579            _ => simd_shuffle!(a, a, [6, 7]),
1580        }
1581    }
1582}
1583
1584/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1585/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1586/// if the corresponding bit is not set).
1587///
1588/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x2_pd&ig_expand=2953)
1589#[inline]
1590#[target_feature(enable = "avx512dq")]
1591#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
1592#[rustc_legacy_const_generics(3)]
1593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1594#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1595pub const fn _mm512_mask_extractf64x2_pd<const IMM8: i32>(
1596    src: __m128d,
1597    k: __mmask8,
1598    a: __m512d,
1599) -> __m128d {
1600    unsafe {
1601        static_assert_uimm_bits!(IMM8, 2);
1602        let b = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2();
1603        transmute(simd_select_bitmask(k, b, src.as_f64x2()))
1604    }
1605}
1606
1607/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1608/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1609/// corresponding bit is not set).
1610///
1611/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x2_pd&ig_expand=2954)
1612#[inline]
1613#[target_feature(enable = "avx512dq")]
1614#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
1615#[rustc_legacy_const_generics(2)]
1616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1618pub const fn _mm512_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m128d {
1619    unsafe {
1620        static_assert_uimm_bits!(IMM8, 2);
1621        let b = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2();
1622        transmute(simd_select_bitmask(k, b, f64x2::ZERO))
1623    }
1624}
1625
1626/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1627/// the result in dst.
1628///
1629/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x8_epi32&ig_expand=2965)
1630#[inline]
1631#[target_feature(enable = "avx512dq")]
1632#[rustc_legacy_const_generics(1)]
1633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1634#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1635pub const fn _mm512_extracti32x8_epi32<const IMM8: i32>(a: __m512i) -> __m256i {
1636    unsafe {
1637        static_assert_uimm_bits!(IMM8, 1);
1638        let a = a.as_i32x16();
1639        let b: i32x8 = match IMM8 & 1 {
1640            0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
1641            _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
1642        };
1643        transmute(b)
1644    }
1645}
1646
1647/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1648/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1649///
1650/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x8_epi32&ig_expand=2966)
1651#[inline]
1652#[target_feature(enable = "avx512dq")]
1653#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
1654#[rustc_legacy_const_generics(3)]
1655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1656#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1657pub const fn _mm512_mask_extracti32x8_epi32<const IMM8: i32>(
1658    src: __m256i,
1659    k: __mmask8,
1660    a: __m512i,
1661) -> __m256i {
1662    unsafe {
1663        static_assert_uimm_bits!(IMM8, 1);
1664        let b = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8();
1665        transmute(simd_select_bitmask(k, b, src.as_i32x8()))
1666    }
1667}
1668
1669/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1670/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1671///
1672/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x8_epi32&ig_expand=2967)
1673#[inline]
1674#[target_feature(enable = "avx512dq")]
1675#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
1676#[rustc_legacy_const_generics(2)]
1677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1678#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1679pub const fn _mm512_maskz_extracti32x8_epi32<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m256i {
1680    unsafe {
1681        static_assert_uimm_bits!(IMM8, 1);
1682        let b = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8();
1683        transmute(simd_select_bitmask(k, b, i32x8::ZERO))
1684    }
1685}
1686
1687/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1688/// the result in dst.
1689///
1690/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti64x2_epi64&ig_expand=2968)
1691#[inline]
1692#[target_feature(enable = "avx512dq,avx512vl")]
1693#[rustc_legacy_const_generics(1)]
1694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1696pub const fn _mm256_extracti64x2_epi64<const IMM8: i32>(a: __m256i) -> __m128i {
1697    unsafe {
1698        static_assert_uimm_bits!(IMM8, 1);
1699        let a = a.as_i64x4();
1700        match IMM8 & 1 {
1701            0 => simd_shuffle!(a, a, [0, 1]),
1702            _ => simd_shuffle!(a, a, [2, 3]),
1703        }
1704    }
1705}
1706
1707/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1708/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1709///
1710/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti64x2_epi64&ig_expand=2969)
1711#[inline]
1712#[target_feature(enable = "avx512dq,avx512vl")]
1713#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
1714#[rustc_legacy_const_generics(3)]
1715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1716#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1717pub const fn _mm256_mask_extracti64x2_epi64<const IMM8: i32>(
1718    src: __m128i,
1719    k: __mmask8,
1720    a: __m256i,
1721) -> __m128i {
1722    unsafe {
1723        static_assert_uimm_bits!(IMM8, 1);
1724        let b = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1725        transmute(simd_select_bitmask(k, b, src.as_i64x2()))
1726    }
1727}
1728
1729/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1730/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1731///
1732/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti64x2_epi64&ig_expand=2970)
1733#[inline]
1734#[target_feature(enable = "avx512dq,avx512vl")]
1735#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
1736#[rustc_legacy_const_generics(2)]
1737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1738#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1739pub const fn _mm256_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m128i {
1740    unsafe {
1741        static_assert_uimm_bits!(IMM8, 1);
1742        let b = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1743        transmute(simd_select_bitmask(k, b, i64x2::ZERO))
1744    }
1745}
1746
1747/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1748/// the result in dst.
1749///
1750/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x2_epi64&ig_expand=2971)
1751#[inline]
1752#[target_feature(enable = "avx512dq")]
1753#[rustc_legacy_const_generics(1)]
1754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1755#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1756pub const fn _mm512_extracti64x2_epi64<const IMM8: i32>(a: __m512i) -> __m128i {
1757    unsafe {
1758        static_assert_uimm_bits!(IMM8, 2);
1759        let a = a.as_i64x8();
1760        match IMM8 & 3 {
1761            0 => simd_shuffle!(a, a, [0, 1]),
1762            1 => simd_shuffle!(a, a, [2, 3]),
1763            2 => simd_shuffle!(a, a, [4, 5]),
1764            _ => simd_shuffle!(a, a, [6, 7]),
1765        }
1766    }
1767}
1768
1769/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1770/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1771///
1772/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x2_epi64&ig_expand=2972)
1773#[inline]
1774#[target_feature(enable = "avx512dq")]
1775#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
1776#[rustc_legacy_const_generics(3)]
1777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1778#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1779pub const fn _mm512_mask_extracti64x2_epi64<const IMM8: i32>(
1780    src: __m128i,
1781    k: __mmask8,
1782    a: __m512i,
1783) -> __m128i {
1784    unsafe {
1785        static_assert_uimm_bits!(IMM8, 2);
1786        let b = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1787        transmute(simd_select_bitmask(k, b, src.as_i64x2()))
1788    }
1789}
1790
1791/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1792/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1793///
1794/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x2_epi64&ig_expand=2973)
1795#[inline]
1796#[target_feature(enable = "avx512dq")]
1797#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
1798#[rustc_legacy_const_generics(2)]
1799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1800#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1801pub const fn _mm512_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m128i {
1802    unsafe {
1803        static_assert_uimm_bits!(IMM8, 2);
1804        let b = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1805        transmute(simd_select_bitmask(k, b, i64x2::ZERO))
1806    }
1807}
1808
1809// Insert
1810
1811/// Copy a to dst, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1812/// elements) from b into dst at the location specified by IMM8.
1813///
1814/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x8&ig_expand=3850)
1815#[inline]
1816#[target_feature(enable = "avx512dq")]
1817#[rustc_legacy_const_generics(2)]
1818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1819#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1820pub const fn _mm512_insertf32x8<const IMM8: i32>(a: __m512, b: __m256) -> __m512 {
1821    unsafe {
1822        static_assert_uimm_bits!(IMM8, 1);
1823        let b = _mm512_castps256_ps512(b);
1824        match IMM8 & 1 {
1825            0 => {
1826                simd_shuffle!(
1827                    a,
1828                    b,
1829                    [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15]
1830                )
1831            }
1832            _ => {
1833                simd_shuffle!(
1834                    a,
1835                    b,
1836                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
1837                )
1838            }
1839        }
1840    }
1841}
1842
1843/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1844/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1845/// (elements are copied from src if the corresponding bit is not set).
1846///
1847/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x8&ig_expand=3851)
1848#[inline]
1849#[target_feature(enable = "avx512dq")]
1850#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
1851#[rustc_legacy_const_generics(4)]
1852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1854pub const fn _mm512_mask_insertf32x8<const IMM8: i32>(
1855    src: __m512,
1856    k: __mmask16,
1857    a: __m512,
1858    b: __m256,
1859) -> __m512 {
1860    unsafe {
1861        static_assert_uimm_bits!(IMM8, 1);
1862        let c = _mm512_insertf32x8::<IMM8>(a, b);
1863        transmute(simd_select_bitmask(k, c.as_f32x16(), src.as_f32x16()))
1864    }
1865}
1866
1867/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1868/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
1869/// (elements are zeroed out if the corresponding bit is not set).
1870///
1871/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x8&ig_expand=3852)
1872#[inline]
1873#[target_feature(enable = "avx512dq")]
1874#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
1875#[rustc_legacy_const_generics(3)]
1876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1877#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1878pub const fn _mm512_maskz_insertf32x8<const IMM8: i32>(
1879    k: __mmask16,
1880    a: __m512,
1881    b: __m256,
1882) -> __m512 {
1883    unsafe {
1884        static_assert_uimm_bits!(IMM8, 1);
1885        let c = _mm512_insertf32x8::<IMM8>(a, b).as_f32x16();
1886        transmute(simd_select_bitmask(k, c, f32x16::ZERO))
1887    }
1888}
1889
1890/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1891/// elements) from b into dst at the location specified by IMM8.
1892///
1893/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf64x2&ig_expand=3853)
1894#[inline]
1895#[target_feature(enable = "avx512dq,avx512vl")]
1896#[rustc_legacy_const_generics(2)]
1897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1899pub const fn _mm256_insertf64x2<const IMM8: i32>(a: __m256d, b: __m128d) -> __m256d {
1900    unsafe {
1901        static_assert_uimm_bits!(IMM8, 1);
1902        let b = _mm256_castpd128_pd256(b);
1903        match IMM8 & 1 {
1904            0 => simd_shuffle!(a, b, [4, 5, 2, 3]),
1905            _ => simd_shuffle!(a, b, [0, 1, 4, 5]),
1906        }
1907    }
1908}
1909
1910/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1911/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1912/// (elements are copied from src if the corresponding bit is not set).
1913///
1914/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf64x2&ig_expand=3854)
1915#[inline]
1916#[target_feature(enable = "avx512dq,avx512vl")]
1917#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
1918#[rustc_legacy_const_generics(4)]
1919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1920#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1921pub const fn _mm256_mask_insertf64x2<const IMM8: i32>(
1922    src: __m256d,
1923    k: __mmask8,
1924    a: __m256d,
1925    b: __m128d,
1926) -> __m256d {
1927    unsafe {
1928        static_assert_uimm_bits!(IMM8, 1);
1929        let c = _mm256_insertf64x2::<IMM8>(a, b);
1930        transmute(simd_select_bitmask(k, c.as_f64x4(), src.as_f64x4()))
1931    }
1932}
1933
1934/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1935/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
1936/// (elements are zeroed out if the corresponding bit is not set).
1937///
1938/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf64x2&ig_expand=3855)
1939#[inline]
1940#[target_feature(enable = "avx512dq,avx512vl")]
1941#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
1942#[rustc_legacy_const_generics(3)]
1943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1944#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1945pub const fn _mm256_maskz_insertf64x2<const IMM8: i32>(
1946    k: __mmask8,
1947    a: __m256d,
1948    b: __m128d,
1949) -> __m256d {
1950    unsafe {
1951        static_assert_uimm_bits!(IMM8, 1);
1952        let c = _mm256_insertf64x2::<IMM8>(a, b).as_f64x4();
1953        transmute(simd_select_bitmask(k, c, f64x4::ZERO))
1954    }
1955}
1956
1957/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1958/// elements) from b into dst at the location specified by IMM8.
1959///
1960/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x2&ig_expand=3856)
1961#[inline]
1962#[target_feature(enable = "avx512dq")]
1963#[rustc_legacy_const_generics(2)]
1964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1966pub const fn _mm512_insertf64x2<const IMM8: i32>(a: __m512d, b: __m128d) -> __m512d {
1967    unsafe {
1968        static_assert_uimm_bits!(IMM8, 2);
1969        let b = _mm512_castpd128_pd512(b);
1970        match IMM8 & 3 {
1971            0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]),
1972            1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]),
1973            2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]),
1974            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]),
1975        }
1976    }
1977}
1978
1979/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1980/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1981/// (elements are copied from src if the corresponding bit is not set).
1982///
1983/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x2&ig_expand=3857)
1984#[inline]
1985#[target_feature(enable = "avx512dq")]
1986#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
1987#[rustc_legacy_const_generics(4)]
1988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1989#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1990pub const fn _mm512_mask_insertf64x2<const IMM8: i32>(
1991    src: __m512d,
1992    k: __mmask8,
1993    a: __m512d,
1994    b: __m128d,
1995) -> __m512d {
1996    unsafe {
1997        static_assert_uimm_bits!(IMM8, 2);
1998        let c = _mm512_insertf64x2::<IMM8>(a, b);
1999        transmute(simd_select_bitmask(k, c.as_f64x8(), src.as_f64x8()))
2000    }
2001}
2002
2003/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
2004/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
2005/// (elements are zeroed out if the corresponding bit is not set).
2006///
2007/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x2&ig_expand=3858)
2008#[inline]
2009#[target_feature(enable = "avx512dq")]
2010#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
2011#[rustc_legacy_const_generics(3)]
2012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2014pub const fn _mm512_maskz_insertf64x2<const IMM8: i32>(
2015    k: __mmask8,
2016    a: __m512d,
2017    b: __m128d,
2018) -> __m512d {
2019    unsafe {
2020        static_assert_uimm_bits!(IMM8, 2);
2021        let c = _mm512_insertf64x2::<IMM8>(a, b).as_f64x8();
2022        transmute(simd_select_bitmask(k, c, f64x8::ZERO))
2023    }
2024}
2025
2026/// Copy a to dst, then insert 256 bits (composed of 8 packed 32-bit integers) from b into dst at the
2027/// location specified by IMM8.
2028///
2029/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x8&ig_expand=3869)
2030#[inline]
2031#[target_feature(enable = "avx512dq")]
2032#[rustc_legacy_const_generics(2)]
2033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2034#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2035pub const fn _mm512_inserti32x8<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
2036    unsafe {
2037        static_assert_uimm_bits!(IMM8, 1);
2038        let a = a.as_i32x16();
2039        let b = _mm512_castsi256_si512(b).as_i32x16();
2040        let r: i32x16 = match IMM8 & 1 {
2041            0 => {
2042                simd_shuffle!(
2043                    a,
2044                    b,
2045                    [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15]
2046                )
2047            }
2048            _ => {
2049                simd_shuffle!(
2050                    a,
2051                    b,
2052                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
2053                )
2054            }
2055        };
2056        transmute(r)
2057    }
2058}
2059
2060/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
2061/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
2062/// the corresponding bit is not set).
2063///
2064/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x8&ig_expand=3870)
2065#[inline]
2066#[target_feature(enable = "avx512dq")]
2067#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
2068#[rustc_legacy_const_generics(4)]
2069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2070#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2071pub const fn _mm512_mask_inserti32x8<const IMM8: i32>(
2072    src: __m512i,
2073    k: __mmask16,
2074    a: __m512i,
2075    b: __m256i,
2076) -> __m512i {
2077    unsafe {
2078        static_assert_uimm_bits!(IMM8, 1);
2079        let c = _mm512_inserti32x8::<IMM8>(a, b);
2080        transmute(simd_select_bitmask(k, c.as_i32x16(), src.as_i32x16()))
2081    }
2082}
2083
2084/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
2085/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
2086/// corresponding bit is not set).
2087///
2088/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x8&ig_expand=3871)
2089#[inline]
2090#[target_feature(enable = "avx512dq")]
2091#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
2092#[rustc_legacy_const_generics(3)]
2093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2094#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2095pub const fn _mm512_maskz_inserti32x8<const IMM8: i32>(
2096    k: __mmask16,
2097    a: __m512i,
2098    b: __m256i,
2099) -> __m512i {
2100    unsafe {
2101        static_assert_uimm_bits!(IMM8, 1);
2102        let c = _mm512_inserti32x8::<IMM8>(a, b).as_i32x16();
2103        transmute(simd_select_bitmask(k, c, i32x16::ZERO))
2104    }
2105}
2106
2107/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
2108/// location specified by IMM8.
2109///
2110/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti64x2&ig_expand=3872)
2111#[inline]
2112#[target_feature(enable = "avx512dq,avx512vl")]
2113#[rustc_legacy_const_generics(2)]
2114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2116pub const fn _mm256_inserti64x2<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
2117    unsafe {
2118        static_assert_uimm_bits!(IMM8, 1);
2119        let a = a.as_i64x4();
2120        let b = _mm256_castsi128_si256(b).as_i64x4();
2121        match IMM8 & 1 {
2122            0 => simd_shuffle!(a, b, [4, 5, 2, 3]),
2123            _ => simd_shuffle!(a, b, [0, 1, 4, 5]),
2124        }
2125    }
2126}
2127
2128/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2129/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
2130/// the corresponding bit is not set).
2131///
2132/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti64x2&ig_expand=3873)
2133#[inline]
2134#[target_feature(enable = "avx512dq,avx512vl")]
2135#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
2136#[rustc_legacy_const_generics(4)]
2137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2139pub const fn _mm256_mask_inserti64x2<const IMM8: i32>(
2140    src: __m256i,
2141    k: __mmask8,
2142    a: __m256i,
2143    b: __m128i,
2144) -> __m256i {
2145    unsafe {
2146        static_assert_uimm_bits!(IMM8, 1);
2147        let c = _mm256_inserti64x2::<IMM8>(a, b);
2148        transmute(simd_select_bitmask(k, c.as_i64x4(), src.as_i64x4()))
2149    }
2150}
2151
2152/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2153/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
2154/// corresponding bit is not set).
2155///
2156/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti64x2&ig_expand=3874)
2157#[inline]
2158#[target_feature(enable = "avx512dq,avx512vl")]
2159#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
2160#[rustc_legacy_const_generics(3)]
2161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2163pub const fn _mm256_maskz_inserti64x2<const IMM8: i32>(
2164    k: __mmask8,
2165    a: __m256i,
2166    b: __m128i,
2167) -> __m256i {
2168    unsafe {
2169        static_assert_uimm_bits!(IMM8, 1);
2170        let c = _mm256_inserti64x2::<IMM8>(a, b).as_i64x4();
2171        transmute(simd_select_bitmask(k, c, i64x4::ZERO))
2172    }
2173}
2174
2175/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
2176/// location specified by IMM8.
2177///
2178/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x2&ig_expand=3875)
2179#[inline]
2180#[target_feature(enable = "avx512dq")]
2181#[rustc_legacy_const_generics(2)]
2182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2183#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2184pub const fn _mm512_inserti64x2<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
2185    unsafe {
2186        static_assert_uimm_bits!(IMM8, 2);
2187        let a = a.as_i64x8();
2188        let b = _mm512_castsi128_si512(b).as_i64x8();
2189        match IMM8 & 3 {
2190            0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]),
2191            1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]),
2192            2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]),
2193            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]),
2194        }
2195    }
2196}
2197
2198/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2199/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
2200/// the corresponding bit is not set).
2201///
2202/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x2&ig_expand=3876)
2203#[inline]
2204#[target_feature(enable = "avx512dq")]
2205#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
2206#[rustc_legacy_const_generics(4)]
2207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2208#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2209pub const fn _mm512_mask_inserti64x2<const IMM8: i32>(
2210    src: __m512i,
2211    k: __mmask8,
2212    a: __m512i,
2213    b: __m128i,
2214) -> __m512i {
2215    unsafe {
2216        static_assert_uimm_bits!(IMM8, 2);
2217        let c = _mm512_inserti64x2::<IMM8>(a, b);
2218        transmute(simd_select_bitmask(k, c.as_i64x8(), src.as_i64x8()))
2219    }
2220}
2221
2222/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2223/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
2224/// corresponding bit is not set).
2225///
2226/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x2&ig_expand=3877)
2227#[inline]
2228#[target_feature(enable = "avx512dq")]
2229#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
2230#[rustc_legacy_const_generics(3)]
2231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2232#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2233pub const fn _mm512_maskz_inserti64x2<const IMM8: i32>(
2234    k: __mmask8,
2235    a: __m512i,
2236    b: __m128i,
2237) -> __m512i {
2238    unsafe {
2239        static_assert_uimm_bits!(IMM8, 2);
2240        let c = _mm512_inserti64x2::<IMM8>(a, b).as_i64x8();
2241        transmute(simd_select_bitmask(k, c, i64x8::ZERO))
2242    }
2243}
2244
2245// Convert
2246
2247/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2248/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2249///
2250/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2251/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2252/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2253/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2254/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2255///
2256/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_pd&ig_expand=1437)
2257#[inline]
2258#[target_feature(enable = "avx512dq")]
2259#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
2260#[rustc_legacy_const_generics(1)]
2261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2262pub fn _mm512_cvt_roundepi64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d {
2263    unsafe {
2264        static_assert_rounding!(ROUNDING);
2265        transmute(vcvtqq2pd_512(a.as_i64x8(), ROUNDING))
2266    }
2267}
2268
2269/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2270/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2271/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2272///
2273/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2274/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2275/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2276/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2277/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2278///
2279/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_pd&ig_expand=1438)
2280#[inline]
2281#[target_feature(enable = "avx512dq")]
2282#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
2283#[rustc_legacy_const_generics(3)]
2284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2285pub fn _mm512_mask_cvt_roundepi64_pd<const ROUNDING: i32>(
2286    src: __m512d,
2287    k: __mmask8,
2288    a: __m512i,
2289) -> __m512d {
2290    unsafe {
2291        static_assert_rounding!(ROUNDING);
2292        let b = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8();
2293        transmute(simd_select_bitmask(k, b, src.as_f64x8()))
2294    }
2295}
2296
2297/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2298/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2299/// Rounding is done according to the ROUNDING parameter, which can be one of:
2300///
2301/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2302/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2303/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2304/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2305/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2306///
2307/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_pd&ig_expand=1439)
2308#[inline]
2309#[target_feature(enable = "avx512dq")]
2310#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
2311#[rustc_legacy_const_generics(2)]
2312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2313pub fn _mm512_maskz_cvt_roundepi64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d {
2314    unsafe {
2315        static_assert_rounding!(ROUNDING);
2316        let b = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8();
2317        transmute(simd_select_bitmask(k, b, f64x8::ZERO))
2318    }
2319}
2320
2321/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2322/// and store the results in dst.
2323///
2324/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_pd&ig_expand=1705)
2325#[inline]
2326#[target_feature(enable = "avx512dq,avx512vl")]
2327#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2329pub fn _mm_cvtepi64_pd(a: __m128i) -> __m128d {
2330    unsafe { transmute(vcvtqq2pd_128(a.as_i64x2(), _MM_FROUND_CUR_DIRECTION)) }
2331}
2332
2333/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2334/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2335/// not set).
2336///
2337/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_pd&ig_expand=1706)
2338#[inline]
2339#[target_feature(enable = "avx512dq,avx512vl")]
2340#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2342pub fn _mm_mask_cvtepi64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
2343    unsafe {
2344        let b = _mm_cvtepi64_pd(a).as_f64x2();
2345        transmute(simd_select_bitmask(k, b, src.as_f64x2()))
2346    }
2347}
2348
2349/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2350/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2351///
2352/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_pd&ig_expand=1707)
2353#[inline]
2354#[target_feature(enable = "avx512dq,avx512vl")]
2355#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2357pub fn _mm_maskz_cvtepi64_pd(k: __mmask8, a: __m128i) -> __m128d {
2358    unsafe {
2359        let b = _mm_cvtepi64_pd(a).as_f64x2();
2360        transmute(simd_select_bitmask(k, b, f64x2::ZERO))
2361    }
2362}
2363
2364/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2365/// and store the results in dst.
2366///
2367/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_pd&ig_expand=1708)
2368#[inline]
2369#[target_feature(enable = "avx512dq,avx512vl")]
2370#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2372pub fn _mm256_cvtepi64_pd(a: __m256i) -> __m256d {
2373    unsafe { transmute(vcvtqq2pd_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) }
2374}
2375
2376/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2377/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2378/// not set).
2379///
2380/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_pd&ig_expand=1709)
2381#[inline]
2382#[target_feature(enable = "avx512dq,avx512vl")]
2383#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2385pub fn _mm256_mask_cvtepi64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d {
2386    unsafe {
2387        let b = _mm256_cvtepi64_pd(a).as_f64x4();
2388        transmute(simd_select_bitmask(k, b, src.as_f64x4()))
2389    }
2390}
2391
2392/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2393/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2394///
2395/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_pd&ig_expand=1710)
2396#[inline]
2397#[target_feature(enable = "avx512dq,avx512vl")]
2398#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2400pub fn _mm256_maskz_cvtepi64_pd(k: __mmask8, a: __m256i) -> __m256d {
2401    unsafe {
2402        let b = _mm256_cvtepi64_pd(a).as_f64x4();
2403        transmute(simd_select_bitmask(k, b, f64x4::ZERO))
2404    }
2405}
2406
2407/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2408/// and store the results in dst.
2409///
2410/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_pd&ig_expand=1711)
2411#[inline]
2412#[target_feature(enable = "avx512dq")]
2413#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2415pub fn _mm512_cvtepi64_pd(a: __m512i) -> __m512d {
2416    unsafe { transmute(vcvtqq2pd_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) }
2417}
2418
2419/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2420/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2421/// not set).
2422///
2423/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_pd&ig_expand=1712)
2424#[inline]
2425#[target_feature(enable = "avx512dq")]
2426#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2428pub fn _mm512_mask_cvtepi64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d {
2429    unsafe {
2430        let b = _mm512_cvtepi64_pd(a).as_f64x8();
2431        transmute(simd_select_bitmask(k, b, src.as_f64x8()))
2432    }
2433}
2434
2435/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2436/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2437///
2438/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_pd&ig_expand=1713)
2439#[inline]
2440#[target_feature(enable = "avx512dq")]
2441#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2443pub fn _mm512_maskz_cvtepi64_pd(k: __mmask8, a: __m512i) -> __m512d {
2444    unsafe {
2445        let b = _mm512_cvtepi64_pd(a).as_f64x8();
2446        transmute(simd_select_bitmask(k, b, f64x8::ZERO))
2447    }
2448}
2449
2450/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2451/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2452///
2453/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2454/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2455/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2456/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2457/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2458///
2459/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_ps&ig_expand=1443)
2460#[inline]
2461#[target_feature(enable = "avx512dq")]
2462#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
2463#[rustc_legacy_const_generics(1)]
2464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2465pub fn _mm512_cvt_roundepi64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 {
2466    unsafe {
2467        static_assert_rounding!(ROUNDING);
2468        transmute(vcvtqq2ps_512(a.as_i64x8(), ROUNDING))
2469    }
2470}
2471
2472/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2473/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2474/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2475///
2476/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2477/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2478/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2479/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2480/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2481///
2482/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_ps&ig_expand=1444)
2483#[inline]
2484#[target_feature(enable = "avx512dq")]
2485#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
2486#[rustc_legacy_const_generics(3)]
2487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2488pub fn _mm512_mask_cvt_roundepi64_ps<const ROUNDING: i32>(
2489    src: __m256,
2490    k: __mmask8,
2491    a: __m512i,
2492) -> __m256 {
2493    unsafe {
2494        static_assert_rounding!(ROUNDING);
2495        let b = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8();
2496        transmute(simd_select_bitmask(k, b, src.as_f32x8()))
2497    }
2498}
2499
2500/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2501/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2502/// Rounding is done according to the ROUNDING parameter, which can be one of:
2503///
2504/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2505/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2506/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2507/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2508/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2509///
2510/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_ps&ig_expand=1445)
2511#[inline]
2512#[target_feature(enable = "avx512dq")]
2513#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
2514#[rustc_legacy_const_generics(2)]
2515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2516pub fn _mm512_maskz_cvt_roundepi64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 {
2517    unsafe {
2518        static_assert_rounding!(ROUNDING);
2519        let b = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8();
2520        transmute(simd_select_bitmask(k, b, f32x8::ZERO))
2521    }
2522}
2523
2524/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2525/// and store the results in dst.
2526///
2527/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_ps&ig_expand=1723)
2528#[inline]
2529#[target_feature(enable = "avx512dq,avx512vl")]
2530#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2532pub fn _mm_cvtepi64_ps(a: __m128i) -> __m128 {
2533    _mm_mask_cvtepi64_ps(_mm_undefined_ps(), 0xff, a)
2534}
2535
2536/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2537/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2538/// not set).
2539///
2540/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_ps&ig_expand=1724)
2541#[inline]
2542#[target_feature(enable = "avx512dq,avx512vl")]
2543#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2545pub fn _mm_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
2546    unsafe { transmute(vcvtqq2ps_128(a.as_i64x2(), src.as_f32x4(), k)) }
2547}
2548
2549/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2550/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2551///
2552/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_ps&ig_expand=1725)
2553#[inline]
2554#[target_feature(enable = "avx512dq,avx512vl")]
2555#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2557pub fn _mm_maskz_cvtepi64_ps(k: __mmask8, a: __m128i) -> __m128 {
2558    _mm_mask_cvtepi64_ps(_mm_setzero_ps(), k, a)
2559}
2560
2561/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2562/// and store the results in dst.
2563///
2564/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_ps&ig_expand=1726)
2565#[inline]
2566#[target_feature(enable = "avx512dq,avx512vl")]
2567#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2569pub fn _mm256_cvtepi64_ps(a: __m256i) -> __m128 {
2570    unsafe { transmute(vcvtqq2ps_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) }
2571}
2572
2573/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2574/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2575/// not set).
2576///
2577/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_ps&ig_expand=1727)
2578#[inline]
2579#[target_feature(enable = "avx512dq,avx512vl")]
2580#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2582pub fn _mm256_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 {
2583    unsafe {
2584        let b = _mm256_cvtepi64_ps(a).as_f32x4();
2585        transmute(simd_select_bitmask(k, b, src.as_f32x4()))
2586    }
2587}
2588
2589/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2590/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2591///
2592/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_ps&ig_expand=1728)
2593#[inline]
2594#[target_feature(enable = "avx512dq,avx512vl")]
2595#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2597pub fn _mm256_maskz_cvtepi64_ps(k: __mmask8, a: __m256i) -> __m128 {
2598    unsafe {
2599        let b = _mm256_cvtepi64_ps(a).as_f32x4();
2600        transmute(simd_select_bitmask(k, b, f32x4::ZERO))
2601    }
2602}
2603
2604/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2605/// and store the results in dst.
2606///
2607/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_ps&ig_expand=1729)
2608#[inline]
2609#[target_feature(enable = "avx512dq")]
2610#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2612pub fn _mm512_cvtepi64_ps(a: __m512i) -> __m256 {
2613    unsafe { transmute(vcvtqq2ps_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) }
2614}
2615
2616/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2617/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2618/// not set).
2619///
2620/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_ps&ig_expand=1730)
2621#[inline]
2622#[target_feature(enable = "avx512dq")]
2623#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2625pub fn _mm512_mask_cvtepi64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 {
2626    unsafe {
2627        let b = _mm512_cvtepi64_ps(a).as_f32x8();
2628        transmute(simd_select_bitmask(k, b, src.as_f32x8()))
2629    }
2630}
2631
2632/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2633/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2634///
2635/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_ps&ig_expand=1731)
2636#[inline]
2637#[target_feature(enable = "avx512dq")]
2638#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2640pub fn _mm512_maskz_cvtepi64_ps(k: __mmask8, a: __m512i) -> __m256 {
2641    unsafe {
2642        let b = _mm512_cvtepi64_ps(a).as_f32x8();
2643        transmute(simd_select_bitmask(k, b, f32x8::ZERO))
2644    }
2645}
2646
2647/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2648/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2649///
2650/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2651/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2652/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2653/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2654/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2655///
2656/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_pd&ig_expand=1455)
2657#[inline]
2658#[target_feature(enable = "avx512dq")]
2659#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
2660#[rustc_legacy_const_generics(1)]
2661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2662pub fn _mm512_cvt_roundepu64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d {
2663    unsafe {
2664        static_assert_rounding!(ROUNDING);
2665        transmute(vcvtuqq2pd_512(a.as_u64x8(), ROUNDING))
2666    }
2667}
2668
2669/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2670/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2671/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2672///
2673/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2674/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2675/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2676/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2677/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2678///
2679/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_pd&ig_expand=1456)
2680#[inline]
2681#[target_feature(enable = "avx512dq")]
2682#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
2683#[rustc_legacy_const_generics(3)]
2684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2685pub fn _mm512_mask_cvt_roundepu64_pd<const ROUNDING: i32>(
2686    src: __m512d,
2687    k: __mmask8,
2688    a: __m512i,
2689) -> __m512d {
2690    unsafe {
2691        static_assert_rounding!(ROUNDING);
2692        let b = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8();
2693        transmute(simd_select_bitmask(k, b, src.as_f64x8()))
2694    }
2695}
2696
2697/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2698/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2699/// Rounding is done according to the ROUNDING parameter, which can be one of:
2700///
2701/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2702/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2703/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2704/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2705/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2706///
2707/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_pd&ig_expand=1457)
2708#[inline]
2709#[target_feature(enable = "avx512dq")]
2710#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
2711#[rustc_legacy_const_generics(2)]
2712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2713pub fn _mm512_maskz_cvt_roundepu64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d {
2714    unsafe {
2715        static_assert_rounding!(ROUNDING);
2716        let b = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8();
2717        transmute(simd_select_bitmask(k, b, f64x8::ZERO))
2718    }
2719}
2720
2721/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2722/// and store the results in dst.
2723///
2724/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_pd&ig_expand=1827)
2725#[inline]
2726#[target_feature(enable = "avx512dq,avx512vl")]
2727#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2729pub fn _mm_cvtepu64_pd(a: __m128i) -> __m128d {
2730    unsafe { transmute(vcvtuqq2pd_128(a.as_u64x2(), _MM_FROUND_CUR_DIRECTION)) }
2731}
2732
2733/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2734/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2735/// not set).
2736///
2737/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_pd&ig_expand=1828)
2738#[inline]
2739#[target_feature(enable = "avx512dq,avx512vl")]
2740#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2742pub fn _mm_mask_cvtepu64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
2743    unsafe {
2744        let b = _mm_cvtepu64_pd(a).as_f64x2();
2745        transmute(simd_select_bitmask(k, b, src.as_f64x2()))
2746    }
2747}
2748
2749/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2750/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2751///
2752/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_pd&ig_expand=1829)
2753#[inline]
2754#[target_feature(enable = "avx512dq,avx512vl")]
2755#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2757pub fn _mm_maskz_cvtepu64_pd(k: __mmask8, a: __m128i) -> __m128d {
2758    unsafe {
2759        let b = _mm_cvtepu64_pd(a).as_f64x2();
2760        transmute(simd_select_bitmask(k, b, f64x2::ZERO))
2761    }
2762}
2763
2764/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2765/// and store the results in dst.
2766///
2767/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_pd&ig_expand=1830)
2768#[inline]
2769#[target_feature(enable = "avx512dq,avx512vl")]
2770#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2772pub fn _mm256_cvtepu64_pd(a: __m256i) -> __m256d {
2773    unsafe { transmute(vcvtuqq2pd_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) }
2774}
2775
2776/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2777/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2778/// not set).
2779///
2780/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_pd&ig_expand=1831)
2781#[inline]
2782#[target_feature(enable = "avx512dq,avx512vl")]
2783#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2785pub fn _mm256_mask_cvtepu64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d {
2786    unsafe {
2787        let b = _mm256_cvtepu64_pd(a).as_f64x4();
2788        transmute(simd_select_bitmask(k, b, src.as_f64x4()))
2789    }
2790}
2791
2792/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2793/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2794///
2795/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_pd&ig_expand=1832)
2796#[inline]
2797#[target_feature(enable = "avx512dq,avx512vl")]
2798#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2800pub fn _mm256_maskz_cvtepu64_pd(k: __mmask8, a: __m256i) -> __m256d {
2801    unsafe {
2802        let b = _mm256_cvtepu64_pd(a).as_f64x4();
2803        transmute(simd_select_bitmask(k, b, f64x4::ZERO))
2804    }
2805}
2806
2807/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2808/// and store the results in dst.
2809///
2810/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_pd&ig_expand=1833)
2811#[inline]
2812#[target_feature(enable = "avx512dq")]
2813#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2815pub fn _mm512_cvtepu64_pd(a: __m512i) -> __m512d {
2816    unsafe { transmute(vcvtuqq2pd_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) }
2817}
2818
2819/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2820/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2821/// not set).
2822///
2823/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_pd&ig_expand=1834)
2824#[inline]
2825#[target_feature(enable = "avx512dq")]
2826#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2828pub fn _mm512_mask_cvtepu64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d {
2829    unsafe {
2830        let b = _mm512_cvtepu64_pd(a).as_f64x8();
2831        transmute(simd_select_bitmask(k, b, src.as_f64x8()))
2832    }
2833}
2834
2835/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2836/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2837///
2838/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_pd&ig_expand=1835)
2839#[inline]
2840#[target_feature(enable = "avx512dq")]
2841#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2843pub fn _mm512_maskz_cvtepu64_pd(k: __mmask8, a: __m512i) -> __m512d {
2844    unsafe {
2845        let b = _mm512_cvtepu64_pd(a).as_f64x8();
2846        transmute(simd_select_bitmask(k, b, f64x8::ZERO))
2847    }
2848}
2849
2850/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2851/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2852///
2853/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2854/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2855/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2856/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2857/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2858///
2859/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_ps&ig_expand=1461)
2860#[inline]
2861#[target_feature(enable = "avx512dq")]
2862#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
2863#[rustc_legacy_const_generics(1)]
2864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2865pub fn _mm512_cvt_roundepu64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 {
2866    unsafe {
2867        static_assert_rounding!(ROUNDING);
2868        transmute(vcvtuqq2ps_512(a.as_u64x8(), ROUNDING))
2869    }
2870}
2871
2872/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2873/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2874/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2875///
2876/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2877/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2878/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2879/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2880/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2881///
2882/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_ps&ig_expand=1462)
2883#[inline]
2884#[target_feature(enable = "avx512dq")]
2885#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
2886#[rustc_legacy_const_generics(3)]
2887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2888pub fn _mm512_mask_cvt_roundepu64_ps<const ROUNDING: i32>(
2889    src: __m256,
2890    k: __mmask8,
2891    a: __m512i,
2892) -> __m256 {
2893    unsafe {
2894        static_assert_rounding!(ROUNDING);
2895        let b = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8();
2896        transmute(simd_select_bitmask(k, b, src.as_f32x8()))
2897    }
2898}
2899
2900/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2901/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2902/// Rounding is done according to the ROUNDING parameter, which can be one of:
2903///
2904/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2905/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2906/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2907/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2908/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2909///
2910/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_ps&ig_expand=1463)
2911#[inline]
2912#[target_feature(enable = "avx512dq")]
2913#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
2914#[rustc_legacy_const_generics(2)]
2915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2916pub fn _mm512_maskz_cvt_roundepu64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 {
2917    unsafe {
2918        static_assert_rounding!(ROUNDING);
2919        let b = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8();
2920        transmute(simd_select_bitmask(k, b, f32x8::ZERO))
2921    }
2922}
2923
2924/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2925/// and store the results in dst.
2926///
2927/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_ps&ig_expand=1845)
2928#[inline]
2929#[target_feature(enable = "avx512dq,avx512vl")]
2930#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2932pub fn _mm_cvtepu64_ps(a: __m128i) -> __m128 {
2933    _mm_mask_cvtepu64_ps(_mm_undefined_ps(), 0xff, a)
2934}
2935
2936/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2937/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2938/// not set).
2939///
2940/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_ps&ig_expand=1846)
2941#[inline]
2942#[target_feature(enable = "avx512dq,avx512vl")]
2943#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2945pub fn _mm_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
2946    unsafe { transmute(vcvtuqq2ps_128(a.as_u64x2(), src.as_f32x4(), k)) }
2947}
2948
2949/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2950/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2951///
2952/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_ps&ig_expand=1847)
2953#[inline]
2954#[target_feature(enable = "avx512dq,avx512vl")]
2955#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2957pub fn _mm_maskz_cvtepu64_ps(k: __mmask8, a: __m128i) -> __m128 {
2958    _mm_mask_cvtepu64_ps(_mm_setzero_ps(), k, a)
2959}
2960
2961/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2962/// and store the results in dst.
2963///
2964/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_ps&ig_expand=1848)
2965#[inline]
2966#[target_feature(enable = "avx512dq,avx512vl")]
2967#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2969pub fn _mm256_cvtepu64_ps(a: __m256i) -> __m128 {
2970    unsafe { transmute(vcvtuqq2ps_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) }
2971}
2972
2973/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2974/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2975/// not set).
2976///
2977/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_ps&ig_expand=1849)
2978#[inline]
2979#[target_feature(enable = "avx512dq,avx512vl")]
2980#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2982pub fn _mm256_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 {
2983    unsafe {
2984        let b = _mm256_cvtepu64_ps(a).as_f32x4();
2985        transmute(simd_select_bitmask(k, b, src.as_f32x4()))
2986    }
2987}
2988
2989/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2990/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2991///
2992/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_ps&ig_expand=1850)
2993#[inline]
2994#[target_feature(enable = "avx512dq,avx512vl")]
2995#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2997pub fn _mm256_maskz_cvtepu64_ps(k: __mmask8, a: __m256i) -> __m128 {
2998    unsafe {
2999        let b = _mm256_cvtepu64_ps(a).as_f32x4();
3000        transmute(simd_select_bitmask(k, b, f32x4::ZERO))
3001    }
3002}
3003
3004/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
3005/// and store the results in dst.
3006///
3007/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_ps&ig_expand=1851)
3008#[inline]
3009#[target_feature(enable = "avx512dq")]
3010#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
3011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3012pub fn _mm512_cvtepu64_ps(a: __m512i) -> __m256 {
3013    unsafe { transmute(vcvtuqq2ps_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) }
3014}
3015
3016/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
3017/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3018/// not set).
3019///
3020/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_ps&ig_expand=1852)
3021#[inline]
3022#[target_feature(enable = "avx512dq")]
3023#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
3024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3025pub fn _mm512_mask_cvtepu64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 {
3026    unsafe {
3027        let b = _mm512_cvtepu64_ps(a).as_f32x8();
3028        transmute(simd_select_bitmask(k, b, src.as_f32x8()))
3029    }
3030}
3031
3032/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
3033/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3034///
3035/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_ps&ig_expand=1853)
3036#[inline]
3037#[target_feature(enable = "avx512dq")]
3038#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
3039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3040pub fn _mm512_maskz_cvtepu64_ps(k: __mmask8, a: __m512i) -> __m256 {
3041    unsafe {
3042        let b = _mm512_cvtepu64_ps(a).as_f32x8();
3043        transmute(simd_select_bitmask(k, b, f32x8::ZERO))
3044    }
3045}
3046
3047/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3048/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3049///
3050/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3051/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3052/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3053/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3054/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3055///
3056/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi64&ig_expand=1472)
3057#[inline]
3058#[target_feature(enable = "avx512dq")]
3059#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
3060#[rustc_legacy_const_generics(1)]
3061#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3062pub fn _mm512_cvt_roundpd_epi64<const ROUNDING: i32>(a: __m512d) -> __m512i {
3063    static_assert_rounding!(ROUNDING);
3064    _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
3065}
3066
3067/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3068/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3069/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3070///
3071/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3072/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3073/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3074/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3075/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3076///
3077/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi64&ig_expand=1473)
3078#[inline]
3079#[target_feature(enable = "avx512dq")]
3080#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
3081#[rustc_legacy_const_generics(3)]
3082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3083pub fn _mm512_mask_cvt_roundpd_epi64<const ROUNDING: i32>(
3084    src: __m512i,
3085    k: __mmask8,
3086    a: __m512d,
3087) -> __m512i {
3088    unsafe {
3089        static_assert_rounding!(ROUNDING);
3090        transmute(vcvtpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, ROUNDING))
3091    }
3092}
3093
3094/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3095/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3096/// Rounding is done according to the ROUNDING parameter, which can be one of:
3097///
3098/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3099/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3100/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3101/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3102/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3103///
3104/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epi64&ig_expand=1474)
3105#[inline]
3106#[target_feature(enable = "avx512dq")]
3107#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
3108#[rustc_legacy_const_generics(2)]
3109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3110pub fn _mm512_maskz_cvt_roundpd_epi64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i {
3111    static_assert_rounding!(ROUNDING);
3112    _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(_mm512_setzero_si512(), k, a)
3113}
3114
3115/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3116/// and store the results in dst.
3117///
3118/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi64&ig_expand=1941)
3119#[inline]
3120#[target_feature(enable = "avx512dq,avx512vl")]
3121#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3123pub fn _mm_cvtpd_epi64(a: __m128d) -> __m128i {
3124    _mm_mask_cvtpd_epi64(_mm_undefined_si128(), 0xff, a)
3125}
3126
3127/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3128/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3129/// not set).
3130///
3131/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi64&ig_expand=1942)
3132#[inline]
3133#[target_feature(enable = "avx512dq,avx512vl")]
3134#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3136pub fn _mm_mask_cvtpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3137    unsafe { transmute(vcvtpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) }
3138}
3139
3140/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3141/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3142///
3143/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi64&ig_expand=1943)
3144#[inline]
3145#[target_feature(enable = "avx512dq,avx512vl")]
3146#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3148pub fn _mm_maskz_cvtpd_epi64(k: __mmask8, a: __m128d) -> __m128i {
3149    _mm_mask_cvtpd_epi64(_mm_setzero_si128(), k, a)
3150}
3151
3152/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3153/// and store the results in dst.
3154///
3155/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi64&ig_expand=1944)
3156#[inline]
3157#[target_feature(enable = "avx512dq,avx512vl")]
3158#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3160pub fn _mm256_cvtpd_epi64(a: __m256d) -> __m256i {
3161    _mm256_mask_cvtpd_epi64(_mm256_undefined_si256(), 0xff, a)
3162}
3163
3164/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3165/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3166/// not set).
3167///
3168/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi64&ig_expand=1945)
3169#[inline]
3170#[target_feature(enable = "avx512dq,avx512vl")]
3171#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3173pub fn _mm256_mask_cvtpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3174    unsafe { transmute(vcvtpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) }
3175}
3176
3177/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3178/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3179///
3180/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi64&ig_expand=1946)
3181#[inline]
3182#[target_feature(enable = "avx512dq,avx512vl")]
3183#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3185pub fn _mm256_maskz_cvtpd_epi64(k: __mmask8, a: __m256d) -> __m256i {
3186    _mm256_mask_cvtpd_epi64(_mm256_setzero_si256(), k, a)
3187}
3188
3189/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3190/// and store the results in dst.
3191///
3192/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi64&ig_expand=1947)
3193#[inline]
3194#[target_feature(enable = "avx512dq")]
3195#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3197pub fn _mm512_cvtpd_epi64(a: __m512d) -> __m512i {
3198    _mm512_mask_cvtpd_epi64(_mm512_undefined_epi32(), 0xff, a)
3199}
3200
3201/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3202/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3203/// not set).
3204///
3205/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi64&ig_expand=1948)
3206#[inline]
3207#[target_feature(enable = "avx512dq")]
3208#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3210pub fn _mm512_mask_cvtpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3211    unsafe {
3212        transmute(vcvtpd2qq_512(
3213            a.as_f64x8(),
3214            src.as_i64x8(),
3215            k,
3216            _MM_FROUND_CUR_DIRECTION,
3217        ))
3218    }
3219}
3220
3221/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3222/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3223///
3224/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi64&ig_expand=1949)
3225#[inline]
3226#[target_feature(enable = "avx512dq")]
3227#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3229pub fn _mm512_maskz_cvtpd_epi64(k: __mmask8, a: __m512d) -> __m512i {
3230    _mm512_mask_cvtpd_epi64(_mm512_setzero_si512(), k, a)
3231}
3232
3233/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3234/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3235///
3236/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3237/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3238/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3239/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3240/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3241///
3242/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi64&ig_expand=1514)
3243#[inline]
3244#[target_feature(enable = "avx512dq")]
3245#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
3246#[rustc_legacy_const_generics(1)]
3247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3248pub fn _mm512_cvt_roundps_epi64<const ROUNDING: i32>(a: __m256) -> __m512i {
3249    static_assert_rounding!(ROUNDING);
3250    _mm512_mask_cvt_roundps_epi64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
3251}
3252
3253/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3254/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3255/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3256///
3257/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3258/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3259/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3260/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3261/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3262///
3263/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi64&ig_expand=1515)
3264#[inline]
3265#[target_feature(enable = "avx512dq")]
3266#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
3267#[rustc_legacy_const_generics(3)]
3268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3269pub fn _mm512_mask_cvt_roundps_epi64<const ROUNDING: i32>(
3270    src: __m512i,
3271    k: __mmask8,
3272    a: __m256,
3273) -> __m512i {
3274    unsafe {
3275        static_assert_rounding!(ROUNDING);
3276        transmute(vcvtps2qq_512(a.as_f32x8(), src.as_i64x8(), k, ROUNDING))
3277    }
3278}
3279
3280/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3281/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3282/// Rounding is done according to the ROUNDING parameter, which can be one of:
3283///
3284/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3285/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3286/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3287/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3288/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3289///
3290/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi64&ig_expand=1516)
3291#[inline]
3292#[target_feature(enable = "avx512dq")]
3293#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
3294#[rustc_legacy_const_generics(2)]
3295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3296pub fn _mm512_maskz_cvt_roundps_epi64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i {
3297    static_assert_rounding!(ROUNDING);
3298    _mm512_mask_cvt_roundps_epi64::<ROUNDING>(_mm512_setzero_si512(), k, a)
3299}
3300
3301/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3302/// and store the results in dst.
3303///
3304/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi64&ig_expand=2075)
3305#[inline]
3306#[target_feature(enable = "avx512dq,avx512vl")]
3307#[cfg_attr(test, assert_instr(vcvtps2qq))]
3308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3309pub fn _mm_cvtps_epi64(a: __m128) -> __m128i {
3310    _mm_mask_cvtps_epi64(_mm_undefined_si128(), 0xff, a)
3311}
3312
3313/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3314/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3315/// not set).
3316///
3317/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi64&ig_expand=2076)
3318#[inline]
3319#[target_feature(enable = "avx512dq,avx512vl")]
3320#[cfg_attr(test, assert_instr(vcvtps2qq))]
3321#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3322pub fn _mm_mask_cvtps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
3323    unsafe { transmute(vcvtps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) }
3324}
3325
3326/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3327/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3328///
3329/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi64&ig_expand=2077)
3330#[inline]
3331#[target_feature(enable = "avx512dq,avx512vl")]
3332#[cfg_attr(test, assert_instr(vcvtps2qq))]
3333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3334pub fn _mm_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m128i {
3335    _mm_mask_cvtps_epi64(_mm_setzero_si128(), k, a)
3336}
3337
3338/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3339/// and store the results in dst.
3340///
3341/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi64&ig_expand=2078)
3342#[inline]
3343#[target_feature(enable = "avx512dq,avx512vl")]
3344#[cfg_attr(test, assert_instr(vcvtps2qq))]
3345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3346pub fn _mm256_cvtps_epi64(a: __m128) -> __m256i {
3347    _mm256_mask_cvtps_epi64(_mm256_undefined_si256(), 0xff, a)
3348}
3349
3350/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3351/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3352/// not set).
3353///
3354/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi64&ig_expand=2079)
3355#[inline]
3356#[target_feature(enable = "avx512dq,avx512vl")]
3357#[cfg_attr(test, assert_instr(vcvtps2qq))]
3358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3359pub fn _mm256_mask_cvtps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
3360    unsafe { transmute(vcvtps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) }
3361}
3362
3363/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3364/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3365///
3366/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi64&ig_expand=2080)
3367#[inline]
3368#[target_feature(enable = "avx512dq,avx512vl")]
3369#[cfg_attr(test, assert_instr(vcvtps2qq))]
3370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3371pub fn _mm256_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m256i {
3372    _mm256_mask_cvtps_epi64(_mm256_setzero_si256(), k, a)
3373}
3374
3375/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3376/// and store the results in dst.
3377///
3378/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi64&ig_expand=2081)
3379#[inline]
3380#[target_feature(enable = "avx512dq")]
3381#[cfg_attr(test, assert_instr(vcvtps2qq))]
3382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3383pub fn _mm512_cvtps_epi64(a: __m256) -> __m512i {
3384    _mm512_mask_cvtps_epi64(_mm512_undefined_epi32(), 0xff, a)
3385}
3386
3387/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3388/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3389/// not set).
3390///
3391/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi64&ig_expand=2082)
3392#[inline]
3393#[target_feature(enable = "avx512dq")]
3394#[cfg_attr(test, assert_instr(vcvtps2qq))]
3395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3396pub fn _mm512_mask_cvtps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
3397    unsafe {
3398        transmute(vcvtps2qq_512(
3399            a.as_f32x8(),
3400            src.as_i64x8(),
3401            k,
3402            _MM_FROUND_CUR_DIRECTION,
3403        ))
3404    }
3405}
3406
3407/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3408/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3409///
3410/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi64&ig_expand=2083)
3411#[inline]
3412#[target_feature(enable = "avx512dq")]
3413#[cfg_attr(test, assert_instr(vcvtps2qq))]
3414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3415pub fn _mm512_maskz_cvtps_epi64(k: __mmask8, a: __m256) -> __m512i {
3416    _mm512_mask_cvtps_epi64(_mm512_setzero_si512(), k, a)
3417}
3418
3419/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3420/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3421///
3422/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3423/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3424/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3425/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3426/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3427///
3428/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu64&ig_expand=1478)
3429#[inline]
3430#[target_feature(enable = "avx512dq")]
3431#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
3432#[rustc_legacy_const_generics(1)]
3433#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3434pub fn _mm512_cvt_roundpd_epu64<const ROUNDING: i32>(a: __m512d) -> __m512i {
3435    static_assert_rounding!(ROUNDING);
3436    _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
3437}
3438
3439/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3440/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3441/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3442///
3443/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3444/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3445/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3446/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3447/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3448///
3449/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu64&ig_expand=1479)
3450#[inline]
3451#[target_feature(enable = "avx512dq")]
3452#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
3453#[rustc_legacy_const_generics(3)]
3454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3455pub fn _mm512_mask_cvt_roundpd_epu64<const ROUNDING: i32>(
3456    src: __m512i,
3457    k: __mmask8,
3458    a: __m512d,
3459) -> __m512i {
3460    unsafe {
3461        static_assert_rounding!(ROUNDING);
3462        transmute(vcvtpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, ROUNDING))
3463    }
3464}
3465
3466/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3467/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3468/// Rounding is done according to the ROUNDING parameter, which can be one of:
3469///
3470/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3471/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3472/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3473/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3474/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3475///
3476/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epu64&ig_expand=1480)
3477#[inline]
3478#[target_feature(enable = "avx512dq")]
3479#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
3480#[rustc_legacy_const_generics(2)]
3481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3482pub fn _mm512_maskz_cvt_roundpd_epu64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i {
3483    static_assert_rounding!(ROUNDING);
3484    _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(_mm512_setzero_si512(), k, a)
3485}
3486
3487/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3488/// and store the results in dst.
3489///
3490/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu64&ig_expand=1959)
3491#[inline]
3492#[target_feature(enable = "avx512dq,avx512vl")]
3493#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3495pub fn _mm_cvtpd_epu64(a: __m128d) -> __m128i {
3496    _mm_mask_cvtpd_epu64(_mm_undefined_si128(), 0xff, a)
3497}
3498
3499/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3500/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3501/// not set).
3502///
3503/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu64&ig_expand=1960)
3504#[inline]
3505#[target_feature(enable = "avx512dq,avx512vl")]
3506#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3508pub fn _mm_mask_cvtpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3509    unsafe { transmute(vcvtpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) }
3510}
3511
3512/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3513/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3514///
3515/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu64&ig_expand=1961)
3516#[inline]
3517#[target_feature(enable = "avx512dq,avx512vl")]
3518#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3520pub fn _mm_maskz_cvtpd_epu64(k: __mmask8, a: __m128d) -> __m128i {
3521    _mm_mask_cvtpd_epu64(_mm_setzero_si128(), k, a)
3522}
3523
3524/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3525/// and store the results in dst.
3526///
3527/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu64&ig_expand=1962)
3528#[inline]
3529#[target_feature(enable = "avx512dq,avx512vl")]
3530#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3532pub fn _mm256_cvtpd_epu64(a: __m256d) -> __m256i {
3533    _mm256_mask_cvtpd_epu64(_mm256_undefined_si256(), 0xff, a)
3534}
3535
3536/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3537/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3538/// not set).
3539///
3540/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu64&ig_expand=1963)
3541#[inline]
3542#[target_feature(enable = "avx512dq,avx512vl")]
3543#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3545pub fn _mm256_mask_cvtpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3546    unsafe { transmute(vcvtpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) }
3547}
3548
3549/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3550/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3551///
3552/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu64&ig_expand=1964)
3553#[inline]
3554#[target_feature(enable = "avx512dq,avx512vl")]
3555#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3557pub fn _mm256_maskz_cvtpd_epu64(k: __mmask8, a: __m256d) -> __m256i {
3558    _mm256_mask_cvtpd_epu64(_mm256_setzero_si256(), k, a)
3559}
3560
3561/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3562/// and store the results in dst.
3563///
3564/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu64&ig_expand=1965)
3565#[inline]
3566#[target_feature(enable = "avx512dq")]
3567#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3569pub fn _mm512_cvtpd_epu64(a: __m512d) -> __m512i {
3570    _mm512_mask_cvtpd_epu64(_mm512_undefined_epi32(), 0xff, a)
3571}
3572
3573/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3574/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3575/// not set).
3576///
3577/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu64&ig_expand=1966)
3578#[inline]
3579#[target_feature(enable = "avx512dq")]
3580#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3582pub fn _mm512_mask_cvtpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3583    unsafe {
3584        transmute(vcvtpd2uqq_512(
3585            a.as_f64x8(),
3586            src.as_u64x8(),
3587            k,
3588            _MM_FROUND_CUR_DIRECTION,
3589        ))
3590    }
3591}
3592
3593/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3594/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3595///
3596/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu64&ig_expand=1967)
3597#[inline]
3598#[target_feature(enable = "avx512dq")]
3599#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3601pub fn _mm512_maskz_cvtpd_epu64(k: __mmask8, a: __m512d) -> __m512i {
3602    _mm512_mask_cvtpd_epu64(_mm512_setzero_si512(), k, a)
3603}
3604
3605/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3606/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3607///
3608/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3609/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3610/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3611/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3612/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3613///
3614/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu64&ig_expand=1520)
3615#[inline]
3616#[target_feature(enable = "avx512dq")]
3617#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
3618#[rustc_legacy_const_generics(1)]
3619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3620pub fn _mm512_cvt_roundps_epu64<const ROUNDING: i32>(a: __m256) -> __m512i {
3621    static_assert_rounding!(ROUNDING);
3622    _mm512_mask_cvt_roundps_epu64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
3623}
3624
3625/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3626/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3627/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3628///
3629/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3630/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3631/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3632/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3633/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3634///
3635/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu64&ig_expand=1521)
3636#[inline]
3637#[target_feature(enable = "avx512dq")]
3638#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
3639#[rustc_legacy_const_generics(3)]
3640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3641pub fn _mm512_mask_cvt_roundps_epu64<const ROUNDING: i32>(
3642    src: __m512i,
3643    k: __mmask8,
3644    a: __m256,
3645) -> __m512i {
3646    unsafe {
3647        static_assert_rounding!(ROUNDING);
3648        transmute(vcvtps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, ROUNDING))
3649    }
3650}
3651
3652/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3653/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3654/// Rounding is done according to the ROUNDING parameter, which can be one of:
3655///
3656/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3657/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3658/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3659/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3660/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3661///
3662/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu64&ig_expand=1522)
3663#[inline]
3664#[target_feature(enable = "avx512dq")]
3665#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
3666#[rustc_legacy_const_generics(2)]
3667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3668pub fn _mm512_maskz_cvt_roundps_epu64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i {
3669    static_assert_rounding!(ROUNDING);
3670    _mm512_mask_cvt_roundps_epu64::<ROUNDING>(_mm512_setzero_si512(), k, a)
3671}
3672
3673/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3674/// and store the results in dst.
3675///
3676/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu64&ig_expand=2093)
3677#[inline]
3678#[target_feature(enable = "avx512dq,avx512vl")]
3679#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3681pub fn _mm_cvtps_epu64(a: __m128) -> __m128i {
3682    _mm_mask_cvtps_epu64(_mm_undefined_si128(), 0xff, a)
3683}
3684
3685/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3686/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3687/// not set).
3688///
3689/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu64&ig_expand=2094)
3690#[inline]
3691#[target_feature(enable = "avx512dq,avx512vl")]
3692#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3694pub fn _mm_mask_cvtps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
3695    unsafe { transmute(vcvtps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) }
3696}
3697
3698/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3699/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3700///
3701/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu64&ig_expand=2095)
3702#[inline]
3703#[target_feature(enable = "avx512dq,avx512vl")]
3704#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3706pub fn _mm_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m128i {
3707    _mm_mask_cvtps_epu64(_mm_setzero_si128(), k, a)
3708}
3709
3710/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3711/// and store the results in dst.
3712///
3713/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu64&ig_expand=2096)
3714#[inline]
3715#[target_feature(enable = "avx512dq,avx512vl")]
3716#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3718pub fn _mm256_cvtps_epu64(a: __m128) -> __m256i {
3719    _mm256_mask_cvtps_epu64(_mm256_undefined_si256(), 0xff, a)
3720}
3721
3722/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3723/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3724/// not set).
3725///
3726/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu64&ig_expand=2097)
3727#[inline]
3728#[target_feature(enable = "avx512dq,avx512vl")]
3729#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3731pub fn _mm256_mask_cvtps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
3732    unsafe { transmute(vcvtps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) }
3733}
3734
3735/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3736/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3737///
3738/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu64&ig_expand=2098)
3739#[inline]
3740#[target_feature(enable = "avx512dq,avx512vl")]
3741#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3743pub fn _mm256_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m256i {
3744    _mm256_mask_cvtps_epu64(_mm256_setzero_si256(), k, a)
3745}
3746
3747/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3748/// and store the results in dst.
3749///
3750/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu64&ig_expand=2099)
3751#[inline]
3752#[target_feature(enable = "avx512dq")]
3753#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3755pub fn _mm512_cvtps_epu64(a: __m256) -> __m512i {
3756    _mm512_mask_cvtps_epu64(_mm512_undefined_epi32(), 0xff, a)
3757}
3758
3759/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3760/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3761/// not set).
3762///
3763/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu64&ig_expand=2100)
3764#[inline]
3765#[target_feature(enable = "avx512dq")]
3766#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3768pub fn _mm512_mask_cvtps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
3769    unsafe {
3770        transmute(vcvtps2uqq_512(
3771            a.as_f32x8(),
3772            src.as_u64x8(),
3773            k,
3774            _MM_FROUND_CUR_DIRECTION,
3775        ))
3776    }
3777}
3778
3779/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3780/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3781///
3782/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu64&ig_expand=2101)
3783#[inline]
3784#[target_feature(enable = "avx512dq")]
3785#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3787pub fn _mm512_maskz_cvtps_epu64(k: __mmask8, a: __m256) -> __m512i {
3788    _mm512_mask_cvtps_epu64(_mm512_setzero_si512(), k, a)
3789}
3790
3791/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3792/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3793/// to the sae parameter.
3794///
3795/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi64&ig_expand=2264)
3796#[inline]
3797#[target_feature(enable = "avx512dq")]
3798#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
3799#[rustc_legacy_const_generics(1)]
3800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3801pub fn _mm512_cvtt_roundpd_epi64<const SAE: i32>(a: __m512d) -> __m512i {
3802    static_assert_sae!(SAE);
3803    _mm512_mask_cvtt_roundpd_epi64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
3804}
3805
3806/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3807/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3808/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3809///
3810/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi64&ig_expand=2265)
3811#[inline]
3812#[target_feature(enable = "avx512dq")]
3813#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
3814#[rustc_legacy_const_generics(3)]
3815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3816pub fn _mm512_mask_cvtt_roundpd_epi64<const SAE: i32>(
3817    src: __m512i,
3818    k: __mmask8,
3819    a: __m512d,
3820) -> __m512i {
3821    unsafe {
3822        static_assert_sae!(SAE);
3823        transmute(vcvttpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, SAE))
3824    }
3825}
3826
3827/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3828/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3829/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3830///
3831/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi64&ig_expand=2266)
3832#[inline]
3833#[target_feature(enable = "avx512dq")]
3834#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
3835#[rustc_legacy_const_generics(2)]
3836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3837pub fn _mm512_maskz_cvtt_roundpd_epi64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i {
3838    static_assert_sae!(SAE);
3839    _mm512_mask_cvtt_roundpd_epi64::<SAE>(_mm512_setzero_si512(), k, a)
3840}
3841
3842/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3843/// with truncation, and store the result in dst.
3844///
3845/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi64&ig_expand=2329)
3846#[inline]
3847#[target_feature(enable = "avx512dq,avx512vl")]
3848#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3850pub fn _mm_cvttpd_epi64(a: __m128d) -> __m128i {
3851    _mm_mask_cvttpd_epi64(_mm_undefined_si128(), 0xff, a)
3852}
3853
3854/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3855/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3856/// corresponding bit is not set).
3857///
3858/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi64&ig_expand=2330)
3859#[inline]
3860#[target_feature(enable = "avx512dq,avx512vl")]
3861#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3863pub fn _mm_mask_cvttpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3864    unsafe { transmute(vcvttpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) }
3865}
3866
3867/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3868/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3869/// bit is not set).
3870///
3871/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi64&ig_expand=2331)
3872#[inline]
3873#[target_feature(enable = "avx512dq,avx512vl")]
3874#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3876pub fn _mm_maskz_cvttpd_epi64(k: __mmask8, a: __m128d) -> __m128i {
3877    _mm_mask_cvttpd_epi64(_mm_setzero_si128(), k, a)
3878}
3879
3880/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3881/// with truncation, and store the result in dst.
3882///
3883/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi64&ig_expand=2332)
3884#[inline]
3885#[target_feature(enable = "avx512dq,avx512vl")]
3886#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3888pub fn _mm256_cvttpd_epi64(a: __m256d) -> __m256i {
3889    _mm256_mask_cvttpd_epi64(_mm256_undefined_si256(), 0xff, a)
3890}
3891
3892/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3893/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3894/// corresponding bit is not set).
3895///
3896/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi64&ig_expand=2333)
3897#[inline]
3898#[target_feature(enable = "avx512dq,avx512vl")]
3899#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3901pub fn _mm256_mask_cvttpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3902    unsafe { transmute(vcvttpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) }
3903}
3904
3905/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3906/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3907/// bit is not set).
3908///
3909/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi64&ig_expand=2334)
3910#[inline]
3911#[target_feature(enable = "avx512dq,avx512vl")]
3912#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3914pub fn _mm256_maskz_cvttpd_epi64(k: __mmask8, a: __m256d) -> __m256i {
3915    _mm256_mask_cvttpd_epi64(_mm256_setzero_si256(), k, a)
3916}
3917
3918/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3919/// with truncation, and store the result in dst.
3920///
3921/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi64&ig_expand=2335)
3922#[inline]
3923#[target_feature(enable = "avx512dq")]
3924#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3926pub fn _mm512_cvttpd_epi64(a: __m512d) -> __m512i {
3927    _mm512_mask_cvttpd_epi64(_mm512_undefined_epi32(), 0xff, a)
3928}
3929
3930/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3931/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3932/// corresponding bit is not set).
3933///
3934/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi64&ig_expand=2336)
3935#[inline]
3936#[target_feature(enable = "avx512dq")]
3937#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3939pub fn _mm512_mask_cvttpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3940    unsafe {
3941        transmute(vcvttpd2qq_512(
3942            a.as_f64x8(),
3943            src.as_i64x8(),
3944            k,
3945            _MM_FROUND_CUR_DIRECTION,
3946        ))
3947    }
3948}
3949
3950/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3951/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3952/// bit is not set).
3953///
3954/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi64&ig_expand=2337)
3955#[inline]
3956#[target_feature(enable = "avx512dq")]
3957#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3959pub fn _mm512_maskz_cvttpd_epi64(k: __mmask8, a: __m512d) -> __m512i {
3960    _mm512_mask_cvttpd_epi64(_mm512_setzero_si512(), k, a)
3961}
3962
3963/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3964/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3965/// to the sae parameter.
3966///
3967/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi64&ig_expand=2294)
3968#[inline]
3969#[target_feature(enable = "avx512dq")]
3970#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
3971#[rustc_legacy_const_generics(1)]
3972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3973pub fn _mm512_cvtt_roundps_epi64<const SAE: i32>(a: __m256) -> __m512i {
3974    static_assert_sae!(SAE);
3975    _mm512_mask_cvtt_roundps_epi64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
3976}
3977
3978/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3979/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3980/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3981///
3982/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi64&ig_expand=2295)
3983#[inline]
3984#[target_feature(enable = "avx512dq")]
3985#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
3986#[rustc_legacy_const_generics(3)]
3987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3988pub fn _mm512_mask_cvtt_roundps_epi64<const SAE: i32>(
3989    src: __m512i,
3990    k: __mmask8,
3991    a: __m256,
3992) -> __m512i {
3993    unsafe {
3994        static_assert_sae!(SAE);
3995        transmute(vcvttps2qq_512(a.as_f32x8(), src.as_i64x8(), k, SAE))
3996    }
3997}
3998
3999/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4000/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4001/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4002///
4003/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi64&ig_expand=2296)
4004#[inline]
4005#[target_feature(enable = "avx512dq")]
4006#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
4007#[rustc_legacy_const_generics(2)]
4008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4009pub fn _mm512_maskz_cvtt_roundps_epi64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i {
4010    static_assert_sae!(SAE);
4011    _mm512_mask_cvtt_roundps_epi64::<SAE>(_mm512_setzero_si512(), k, a)
4012}
4013
4014/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4015/// with truncation, and store the result in dst.
4016///
4017/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi64&ig_expand=2420)
4018#[inline]
4019#[target_feature(enable = "avx512dq,avx512vl")]
4020#[cfg_attr(test, assert_instr(vcvttps2qq))]
4021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4022pub fn _mm_cvttps_epi64(a: __m128) -> __m128i {
4023    _mm_mask_cvttps_epi64(_mm_undefined_si128(), 0xff, a)
4024}
4025
4026/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4027/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4028/// corresponding bit is not set).
4029///
4030/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi64&ig_expand=2421)
4031#[inline]
4032#[target_feature(enable = "avx512dq,avx512vl")]
4033#[cfg_attr(test, assert_instr(vcvttps2qq))]
4034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4035pub fn _mm_mask_cvttps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
4036    unsafe { transmute(vcvttps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) }
4037}
4038
4039/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4040/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4041/// bit is not set).
4042///
4043/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi64&ig_expand=2422)
4044#[inline]
4045#[target_feature(enable = "avx512dq,avx512vl")]
4046#[cfg_attr(test, assert_instr(vcvttps2qq))]
4047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4048pub fn _mm_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m128i {
4049    _mm_mask_cvttps_epi64(_mm_setzero_si128(), k, a)
4050}
4051
4052/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4053/// with truncation, and store the result in dst.
4054///
4055/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi64&ig_expand=2423)
4056#[inline]
4057#[target_feature(enable = "avx512dq,avx512vl")]
4058#[cfg_attr(test, assert_instr(vcvttps2qq))]
4059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4060pub fn _mm256_cvttps_epi64(a: __m128) -> __m256i {
4061    _mm256_mask_cvttps_epi64(_mm256_undefined_si256(), 0xff, a)
4062}
4063
4064/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4065/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4066/// corresponding bit is not set).
4067///
4068/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi64&ig_expand=2424)
4069#[inline]
4070#[target_feature(enable = "avx512dq,avx512vl")]
4071#[cfg_attr(test, assert_instr(vcvttps2qq))]
4072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4073pub fn _mm256_mask_cvttps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
4074    unsafe { transmute(vcvttps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) }
4075}
4076
4077/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4078/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4079/// bit is not set).
4080///
4081/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi64&ig_expand=2425)
4082#[inline]
4083#[target_feature(enable = "avx512dq,avx512vl")]
4084#[cfg_attr(test, assert_instr(vcvttps2qq))]
4085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4086pub fn _mm256_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m256i {
4087    _mm256_mask_cvttps_epi64(_mm256_setzero_si256(), k, a)
4088}
4089
4090/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4091/// with truncation, and store the result in dst.
4092///
4093/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi64&ig_expand=2426)
4094#[inline]
4095#[target_feature(enable = "avx512dq")]
4096#[cfg_attr(test, assert_instr(vcvttps2qq))]
4097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4098pub fn _mm512_cvttps_epi64(a: __m256) -> __m512i {
4099    _mm512_mask_cvttps_epi64(_mm512_undefined_epi32(), 0xff, a)
4100}
4101
4102/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4103/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4104/// corresponding bit is not set).
4105///
4106/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi64&ig_expand=2427)
4107#[inline]
4108#[target_feature(enable = "avx512dq")]
4109#[cfg_attr(test, assert_instr(vcvttps2qq))]
4110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4111pub fn _mm512_mask_cvttps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
4112    unsafe {
4113        transmute(vcvttps2qq_512(
4114            a.as_f32x8(),
4115            src.as_i64x8(),
4116            k,
4117            _MM_FROUND_CUR_DIRECTION,
4118        ))
4119    }
4120}
4121
4122/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4123/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4124/// bit is not set).
4125///
4126/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi64&ig_expand=2428)
4127#[inline]
4128#[target_feature(enable = "avx512dq")]
4129#[cfg_attr(test, assert_instr(vcvttps2qq))]
4130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4131pub fn _mm512_maskz_cvttps_epi64(k: __mmask8, a: __m256) -> __m512i {
4132    _mm512_mask_cvttps_epi64(_mm512_setzero_si512(), k, a)
4133}
4134
4135/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4136/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
4137/// to the sae parameter.
4138///
4139/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu64&ig_expand=1965)
4140#[inline]
4141#[target_feature(enable = "avx512dq")]
4142#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
4143#[rustc_legacy_const_generics(1)]
4144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4145pub fn _mm512_cvtt_roundpd_epu64<const SAE: i32>(a: __m512d) -> __m512i {
4146    static_assert_sae!(SAE);
4147    _mm512_mask_cvtt_roundpd_epu64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
4148}
4149
4150/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4151/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4152/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4153///
4154/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu64&ig_expand=1966)
4155#[inline]
4156#[target_feature(enable = "avx512dq")]
4157#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
4158#[rustc_legacy_const_generics(3)]
4159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4160pub fn _mm512_mask_cvtt_roundpd_epu64<const SAE: i32>(
4161    src: __m512i,
4162    k: __mmask8,
4163    a: __m512d,
4164) -> __m512i {
4165    unsafe {
4166        static_assert_sae!(SAE);
4167        transmute(vcvttpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, SAE))
4168    }
4169}
4170
4171/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4172/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4173/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4174///
4175/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu64&ig_expand=1967)
4176#[inline]
4177#[target_feature(enable = "avx512dq")]
4178#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
4179#[rustc_legacy_const_generics(2)]
4180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4181pub fn _mm512_maskz_cvtt_roundpd_epu64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i {
4182    static_assert_sae!(SAE);
4183    _mm512_mask_cvtt_roundpd_epu64::<SAE>(_mm512_setzero_si512(), k, a)
4184}
4185
4186/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4187/// with truncation, and store the result in dst.
4188///
4189/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu64&ig_expand=2347)
4190#[inline]
4191#[target_feature(enable = "avx512dq,avx512vl")]
4192#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4194pub fn _mm_cvttpd_epu64(a: __m128d) -> __m128i {
4195    _mm_mask_cvttpd_epu64(_mm_undefined_si128(), 0xff, a)
4196}
4197
4198/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4199/// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding
4200/// bit is not set).
4201///
4202/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu64&ig_expand=2348)
4203#[inline]
4204#[target_feature(enable = "avx512dq,avx512vl")]
4205#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4207pub fn _mm_mask_cvttpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
4208    unsafe { transmute(vcvttpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) }
4209}
4210
4211/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4212/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4213/// bit is not set).
4214///
4215/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu64&ig_expand=2349)
4216#[inline]
4217#[target_feature(enable = "avx512dq,avx512vl")]
4218#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4220pub fn _mm_maskz_cvttpd_epu64(k: __mmask8, a: __m128d) -> __m128i {
4221    _mm_mask_cvttpd_epu64(_mm_setzero_si128(), k, a)
4222}
4223
4224/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4225/// with truncation, and store the result in dst.
4226///
4227/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu64&ig_expand=2350)
4228#[inline]
4229#[target_feature(enable = "avx512dq,avx512vl")]
4230#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4232pub fn _mm256_cvttpd_epu64(a: __m256d) -> __m256i {
4233    _mm256_mask_cvttpd_epu64(_mm256_undefined_si256(), 0xff, a)
4234}
4235
4236/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4237/// with truncation, and store the results in dst using writemask k (elements are copied from src if the corresponding
4238/// bit is not set).
4239///
4240/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu64&ig_expand=2351)
4241#[inline]
4242#[target_feature(enable = "avx512dq,avx512vl")]
4243#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4245pub fn _mm256_mask_cvttpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
4246    unsafe { transmute(vcvttpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) }
4247}
4248
4249/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4250/// with truncation, and store the results in dst using zeromask k (elements are zeroed out if the corresponding
4251/// bit is not set).
4252///
4253/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu64&ig_expand=2352)
4254#[inline]
4255#[target_feature(enable = "avx512dq,avx512vl")]
4256#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4258pub fn _mm256_maskz_cvttpd_epu64(k: __mmask8, a: __m256d) -> __m256i {
4259    _mm256_mask_cvttpd_epu64(_mm256_setzero_si256(), k, a)
4260}
4261
4262/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4263/// with truncation, and store the result in dst.
4264///
4265/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu64&ig_expand=2353)
4266#[inline]
4267#[target_feature(enable = "avx512dq")]
4268#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4270pub fn _mm512_cvttpd_epu64(a: __m512d) -> __m512i {
4271    _mm512_mask_cvttpd_epu64(_mm512_undefined_epi32(), 0xff, a)
4272}
4273
4274/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4275/// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding
4276/// bit is not set).
4277///
4278/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu64&ig_expand=2354)
4279#[inline]
4280#[target_feature(enable = "avx512dq")]
4281#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4283pub fn _mm512_mask_cvttpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
4284    unsafe {
4285        transmute(vcvttpd2uqq_512(
4286            a.as_f64x8(),
4287            src.as_u64x8(),
4288            k,
4289            _MM_FROUND_CUR_DIRECTION,
4290        ))
4291    }
4292}
4293
4294/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4295/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4296///
4297///
4298/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu64&ig_expand=2355)
4299#[inline]
4300#[target_feature(enable = "avx512dq")]
4301#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4303pub fn _mm512_maskz_cvttpd_epu64(k: __mmask8, a: __m512d) -> __m512i {
4304    _mm512_mask_cvttpd_epu64(_mm512_setzero_si512(), k, a)
4305}
4306
4307/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4308/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
4309/// to the sae parameter.
4310///
4311/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu64&ig_expand=2300)
4312#[inline]
4313#[target_feature(enable = "avx512dq")]
4314#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
4315#[rustc_legacy_const_generics(1)]
4316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4317pub fn _mm512_cvtt_roundps_epu64<const SAE: i32>(a: __m256) -> __m512i {
4318    static_assert_sae!(SAE);
4319    _mm512_mask_cvtt_roundps_epu64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
4320}
4321
4322/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4323/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4324/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4325///
4326/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu64&ig_expand=2301)
4327#[inline]
4328#[target_feature(enable = "avx512dq")]
4329#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
4330#[rustc_legacy_const_generics(3)]
4331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4332pub fn _mm512_mask_cvtt_roundps_epu64<const SAE: i32>(
4333    src: __m512i,
4334    k: __mmask8,
4335    a: __m256,
4336) -> __m512i {
4337    unsafe {
4338        static_assert_sae!(SAE);
4339        transmute(vcvttps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, SAE))
4340    }
4341}
4342
4343/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4344/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4345/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4346///
4347/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu64&ig_expand=2302)
4348#[inline]
4349#[target_feature(enable = "avx512dq")]
4350#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
4351#[rustc_legacy_const_generics(2)]
4352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4353pub fn _mm512_maskz_cvtt_roundps_epu64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i {
4354    static_assert_sae!(SAE);
4355    _mm512_mask_cvtt_roundps_epu64::<SAE>(_mm512_setzero_si512(), k, a)
4356}
4357
4358/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4359/// with truncation, and store the result in dst.
4360///
4361/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu64&ig_expand=2438)
4362#[inline]
4363#[target_feature(enable = "avx512dq,avx512vl")]
4364#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4366pub fn _mm_cvttps_epu64(a: __m128) -> __m128i {
4367    _mm_mask_cvttps_epu64(_mm_undefined_si128(), 0xff, a)
4368}
4369
4370/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4371/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4372/// corresponding bit is not set).
4373///
4374/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu64&ig_expand=2439)
4375#[inline]
4376#[target_feature(enable = "avx512dq,avx512vl")]
4377#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4379pub fn _mm_mask_cvttps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
4380    unsafe { transmute(vcvttps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) }
4381}
4382
4383/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4384/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4385/// bit is not set).
4386///
4387/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu64&ig_expand=2440)
4388#[inline]
4389#[target_feature(enable = "avx512dq,avx512vl")]
4390#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4392pub fn _mm_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m128i {
4393    _mm_mask_cvttps_epu64(_mm_setzero_si128(), k, a)
4394}
4395
4396/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4397/// with truncation, and store the result in dst.
4398///
4399/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu64&ig_expand=2441)
4400#[inline]
4401#[target_feature(enable = "avx512dq,avx512vl")]
4402#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4404pub fn _mm256_cvttps_epu64(a: __m128) -> __m256i {
4405    _mm256_mask_cvttps_epu64(_mm256_undefined_si256(), 0xff, a)
4406}
4407
4408/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4409/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4410/// corresponding bit is not set).
4411///
4412/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu64&ig_expand=2442)
4413#[inline]
4414#[target_feature(enable = "avx512dq,avx512vl")]
4415#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4417pub fn _mm256_mask_cvttps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
4418    unsafe { transmute(vcvttps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) }
4419}
4420
4421/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4422/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4423/// bit is not set).
4424///
4425/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu64&ig_expand=2443)
4426#[inline]
4427#[target_feature(enable = "avx512dq,avx512vl")]
4428#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4430pub fn _mm256_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m256i {
4431    _mm256_mask_cvttps_epu64(_mm256_setzero_si256(), k, a)
4432}
4433
4434/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4435/// with truncation, and store the result in dst.
4436///
4437/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu64&ig_expand=2444)
4438#[inline]
4439#[target_feature(enable = "avx512dq")]
4440#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4442pub fn _mm512_cvttps_epu64(a: __m256) -> __m512i {
4443    _mm512_mask_cvttps_epu64(_mm512_undefined_epi32(), 0xff, a)
4444}
4445
4446/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4447/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4448/// corresponding bit is not set).
4449///
4450/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu64&ig_expand=2445)
4451#[inline]
4452#[target_feature(enable = "avx512dq")]
4453#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4455pub fn _mm512_mask_cvttps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
4456    unsafe {
4457        transmute(vcvttps2uqq_512(
4458            a.as_f32x8(),
4459            src.as_u64x8(),
4460            k,
4461            _MM_FROUND_CUR_DIRECTION,
4462        ))
4463    }
4464}
4465
4466/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4467/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4468/// bit is not set).
4469///
4470/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu64&ig_expand=2446)
4471#[inline]
4472#[target_feature(enable = "avx512dq")]
4473#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4475pub fn _mm512_maskz_cvttps_epu64(k: __mmask8, a: __m256) -> __m512i {
4476    _mm512_mask_cvttps_epu64(_mm512_setzero_si512(), k, a)
4477}
4478
4479// Multiply-Low
4480
4481/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4482/// the low 64 bits of the intermediate integers in `dst`.
4483///
4484/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi64&ig_expand=4778)
4485#[inline]
4486#[target_feature(enable = "avx512dq,avx512vl")]
4487#[cfg_attr(test, assert_instr(vpmullq))]
4488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4489#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4490pub const fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i {
4491    unsafe { transmute(simd_mul(a.as_i64x2(), b.as_i64x2())) }
4492}
4493
4494/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4495/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4496/// `src` if the corresponding bit is not set).
4497///
4498/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi64&ig_expand=4776)
4499#[inline]
4500#[target_feature(enable = "avx512dq,avx512vl")]
4501#[cfg_attr(test, assert_instr(vpmullq))]
4502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4503#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4504pub const fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4505    unsafe {
4506        let b = _mm_mullo_epi64(a, b).as_i64x2();
4507        transmute(simd_select_bitmask(k, b, src.as_i64x2()))
4508    }
4509}
4510
4511/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4512/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4513/// the corresponding bit is not set).
4514///
4515/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi64&ig_expand=4777)
4516#[inline]
4517#[target_feature(enable = "avx512dq,avx512vl")]
4518#[cfg_attr(test, assert_instr(vpmullq))]
4519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4521pub const fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4522    unsafe {
4523        let b = _mm_mullo_epi64(a, b).as_i64x2();
4524        transmute(simd_select_bitmask(k, b, i64x2::ZERO))
4525    }
4526}
4527
4528/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4529/// the low 64 bits of the intermediate integers in `dst`.
4530///
4531/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi64&ig_expand=4781)
4532#[inline]
4533#[target_feature(enable = "avx512dq,avx512vl")]
4534#[cfg_attr(test, assert_instr(vpmullq))]
4535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4536#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4537pub const fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i {
4538    unsafe { transmute(simd_mul(a.as_i64x4(), b.as_i64x4())) }
4539}
4540
4541/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4542/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4543/// `src` if the corresponding bit is not set).
4544///
4545/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi64&ig_expand=4779)
4546#[inline]
4547#[target_feature(enable = "avx512dq,avx512vl")]
4548#[cfg_attr(test, assert_instr(vpmullq))]
4549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4551pub const fn _mm256_mask_mullo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
4552    unsafe {
4553        let b = _mm256_mullo_epi64(a, b).as_i64x4();
4554        transmute(simd_select_bitmask(k, b, src.as_i64x4()))
4555    }
4556}
4557
4558/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4559/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4560/// the corresponding bit is not set).
4561///
4562/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi64&ig_expand=4780)
4563#[inline]
4564#[target_feature(enable = "avx512dq,avx512vl")]
4565#[cfg_attr(test, assert_instr(vpmullq))]
4566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4568pub const fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
4569    unsafe {
4570        let b = _mm256_mullo_epi64(a, b).as_i64x4();
4571        transmute(simd_select_bitmask(k, b, i64x4::ZERO))
4572    }
4573}
4574
4575/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4576/// the low 64 bits of the intermediate integers in `dst`.
4577///
4578/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi64&ig_expand=4784)
4579#[inline]
4580#[target_feature(enable = "avx512dq")]
4581#[cfg_attr(test, assert_instr(vpmullq))]
4582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4584pub const fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i {
4585    unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) }
4586}
4587
4588/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4589/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4590/// `src` if the corresponding bit is not set).
4591///
4592/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi64&ig_expand=4782)
4593#[inline]
4594#[target_feature(enable = "avx512dq")]
4595#[cfg_attr(test, assert_instr(vpmullq))]
4596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4597#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4598pub const fn _mm512_mask_mullo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
4599    unsafe {
4600        let b = _mm512_mullo_epi64(a, b).as_i64x8();
4601        transmute(simd_select_bitmask(k, b, src.as_i64x8()))
4602    }
4603}
4604
4605/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4606/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4607/// the corresponding bit is not set).
4608///
4609/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi64&ig_expand=4783)
4610#[inline]
4611#[target_feature(enable = "avx512dq")]
4612#[cfg_attr(test, assert_instr(vpmullq))]
4613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4614#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4615pub const fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
4616    unsafe {
4617        let b = _mm512_mullo_epi64(a, b).as_i64x8();
4618        transmute(simd_select_bitmask(k, b, i64x8::ZERO))
4619    }
4620}
4621
4622// Mask Registers
4623
4624/// Convert 8-bit mask a to a 32-bit integer value and store the result in dst.
4625///
4626/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask8_u32&ig_expand=1891)
4627#[inline]
4628#[target_feature(enable = "avx512dq")]
4629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4631pub const fn _cvtmask8_u32(a: __mmask8) -> u32 {
4632    a as u32
4633}
4634
4635/// Convert 32-bit integer value a to an 8-bit mask and store the result in dst.
4636///
4637/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask8&ig_expand=2467)
4638#[inline]
4639#[target_feature(enable = "avx512dq")]
4640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4642pub const fn _cvtu32_mask8(a: u32) -> __mmask8 {
4643    a as __mmask8
4644}
4645
4646/// Add 16-bit masks a and b, and store the result in dst.
4647///
4648/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask16&ig_expand=3903)
4649#[inline]
4650#[target_feature(enable = "avx512dq")]
4651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4652#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4653pub const fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
4654    a.wrapping_add(b)
4655}
4656
4657/// Add 8-bit masks a and b, and store the result in dst.
4658///
4659/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask8&ig_expand=3906)
4660#[inline]
4661#[target_feature(enable = "avx512dq")]
4662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4663#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4664pub const fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4665    a.wrapping_add(b)
4666}
4667
4668/// Bitwise AND of 8-bit masks a and b, and store the result in dst.
4669///
4670/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask8&ig_expand=3911)
4671#[inline]
4672#[target_feature(enable = "avx512dq")]
4673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4674#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4675pub const fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4676    a & b
4677}
4678
4679/// Bitwise AND NOT of 8-bit masks a and b, and store the result in dst.
4680///
4681/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask8&ig_expand=3916)
4682#[inline]
4683#[target_feature(enable = "avx512dq")]
4684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4685#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4686pub const fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4687    _knot_mask8(a) & b
4688}
4689
4690/// Bitwise NOT of 8-bit mask a, and store the result in dst.
4691///
4692/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask8&ig_expand=3922)
4693#[inline]
4694#[target_feature(enable = "avx512dq")]
4695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4696#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4697pub const fn _knot_mask8(a: __mmask8) -> __mmask8 {
4698    a ^ 0b11111111
4699}
4700
4701/// Bitwise OR of 8-bit masks a and b, and store the result in dst.
4702///
4703/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask8&ig_expand=3927)
4704#[inline]
4705#[target_feature(enable = "avx512dq")]
4706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4707#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4708pub const fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4709    a | b
4710}
4711
4712/// Bitwise XNOR of 8-bit masks a and b, and store the result in dst.
4713///
4714/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask8&ig_expand=3969)
4715#[inline]
4716#[target_feature(enable = "avx512dq")]
4717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4718#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4719pub const fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4720    _knot_mask8(_kxor_mask8(a, b))
4721}
4722
4723/// Bitwise XOR of 8-bit masks a and b, and store the result in dst.
4724///
4725/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask8&ig_expand=3974)
4726#[inline]
4727#[target_feature(enable = "avx512dq")]
4728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4729#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4730pub const fn _kxor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4731    a ^ b
4732}
4733
4734/// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
4735/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
4736///
4737/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask8_u8&ig_expand=3931)
4738#[inline]
4739#[target_feature(enable = "avx512dq")]
4740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4741#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4742pub const unsafe fn _kortest_mask8_u8(a: __mmask8, b: __mmask8, all_ones: *mut u8) -> u8 {
4743    let tmp = _kor_mask8(a, b);
4744    *all_ones = (tmp == 0xff) as u8;
4745    (tmp == 0) as u8
4746}
4747
4748/// Compute the bitwise OR of 8-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
4749/// store 0 in dst.
4750///
4751/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask8_u8&ig_expand=3936)
4752#[inline]
4753#[target_feature(enable = "avx512dq")]
4754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4755#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4756pub const fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4757    (_kor_mask8(a, b) == 0xff) as u8
4758}
4759
4760/// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
4761/// store 0 in dst.
4762///
4763/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask8_u8&ig_expand=3941)
4764#[inline]
4765#[target_feature(enable = "avx512dq")]
4766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4767#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4768pub const fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4769    (_kor_mask8(a, b) == 0) as u8
4770}
4771
4772/// Shift 8-bit mask a left by count bits while shifting in zeros, and store the result in dst.
4773///
4774/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask8&ig_expand=3945)
4775#[inline]
4776#[target_feature(enable = "avx512dq")]
4777#[rustc_legacy_const_generics(1)]
4778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4779#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4780pub const fn _kshiftli_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
4781    a.unbounded_shl(COUNT)
4782}
4783
4784/// Shift 8-bit mask a right by count bits while shifting in zeros, and store the result in dst.
4785///
4786/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask8&ig_expand=3949)
4787#[inline]
4788#[target_feature(enable = "avx512dq")]
4789#[rustc_legacy_const_generics(1)]
4790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4791#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4792pub const fn _kshiftri_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
4793    a.unbounded_shr(COUNT)
4794}
4795
4796/// Compute the bitwise AND of 16-bit masks a and b, and if the result is all zeros, store 1 in dst,
4797/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
4798/// zeros, store 1 in and_not, otherwise store 0 in and_not.
4799///
4800/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask16_u8&ig_expand=3950)
4801#[inline]
4802#[target_feature(enable = "avx512dq")]
4803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4804#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4805pub const unsafe fn _ktest_mask16_u8(a: __mmask16, b: __mmask16, and_not: *mut u8) -> u8 {
4806    *and_not = (_kandn_mask16(a, b) == 0) as u8;
4807    (_kand_mask16(a, b) == 0) as u8
4808}
4809
4810/// Compute the bitwise AND of 8-bit masks a and b, and if the result is all zeros, store 1 in dst,
4811/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
4812/// zeros, store 1 in and_not, otherwise store 0 in and_not.
4813///
4814/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask8_u8&ig_expand=3953)
4815#[inline]
4816#[target_feature(enable = "avx512dq")]
4817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4818#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4819pub const unsafe fn _ktest_mask8_u8(a: __mmask8, b: __mmask8, and_not: *mut u8) -> u8 {
4820    *and_not = (_kandn_mask8(a, b) == 0) as u8;
4821    (_kand_mask8(a, b) == 0) as u8
4822}
4823
4824/// Compute the bitwise NOT of 16-bit mask a and then AND with 16-bit mask b, if the result is all
4825/// zeros, store 1 in dst, otherwise store 0 in dst.
4826///
4827/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask16_u8&ig_expand=3954)
4828#[inline]
4829#[target_feature(enable = "avx512dq")]
4830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4832pub const fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
4833    (_kandn_mask16(a, b) == 0) as u8
4834}
4835
4836/// Compute the bitwise NOT of 8-bit mask a and then AND with 8-bit mask b, if the result is all
4837/// zeros, store 1 in dst, otherwise store 0 in dst.
4838///
4839/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask8_u8&ig_expand=3957)
4840#[inline]
4841#[target_feature(enable = "avx512dq")]
4842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4843#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4844pub const fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4845    (_kandn_mask8(a, b) == 0) as u8
4846}
4847
4848/// Compute the bitwise AND of 16-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
4849/// store 0 in dst.
4850///
4851/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask16_u8&ig_expand=3958)
4852#[inline]
4853#[target_feature(enable = "avx512dq")]
4854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4855#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4856pub const fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
4857    (_kand_mask16(a, b) == 0) as u8
4858}
4859
4860/// Compute the bitwise AND of 8-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
4861/// store 0 in dst.
4862///
4863/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask8_u8&ig_expand=3961)
4864#[inline]
4865#[target_feature(enable = "avx512dq")]
4866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4868pub const fn _ktestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4869    (_kand_mask8(a, b) == 0) as u8
4870}
4871
4872/// Load 8-bit mask from memory
4873///
4874/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask8&ig_expand=3999)
4875#[inline]
4876#[target_feature(enable = "avx512dq")]
4877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4878#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4879pub const unsafe fn _load_mask8(mem_addr: *const __mmask8) -> __mmask8 {
4880    *mem_addr
4881}
4882
4883/// Store 8-bit mask to memory
4884///
4885/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask8&ig_expand=6468)
4886#[inline]
4887#[target_feature(enable = "avx512dq")]
4888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4889#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4890pub const unsafe fn _store_mask8(mem_addr: *mut __mmask8, a: __mmask8) {
4891    *mem_addr = a;
4892}
4893
4894/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4895/// integer in a.
4896///
4897/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi32_mask&ig_expand=4612)
4898#[inline]
4899#[target_feature(enable = "avx512dq,avx512vl")]
4900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4901#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4902pub const fn _mm_movepi32_mask(a: __m128i) -> __mmask8 {
4903    let zero = _mm_setzero_si128();
4904    _mm_cmplt_epi32_mask(a, zero)
4905}
4906
4907/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4908/// integer in a.
4909///
4910/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi32_mask&ig_expand=4613)
4911#[inline]
4912#[target_feature(enable = "avx512dq,avx512vl")]
4913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4914#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4915pub const fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 {
4916    let zero = _mm256_setzero_si256();
4917    _mm256_cmplt_epi32_mask(a, zero)
4918}
4919
4920/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4921/// integer in a.
4922///
4923/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi32_mask&ig_expand=4614)
4924#[inline]
4925#[target_feature(enable = "avx512dq")]
4926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4927#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4928pub const fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 {
4929    let zero = _mm512_setzero_si512();
4930    _mm512_cmplt_epi32_mask(a, zero)
4931}
4932
4933/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4934/// integer in a.
4935///
4936/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi64_mask&ig_expand=4615)
4937#[inline]
4938#[target_feature(enable = "avx512dq,avx512vl")]
4939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4940#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4941pub const fn _mm_movepi64_mask(a: __m128i) -> __mmask8 {
4942    let zero = _mm_setzero_si128();
4943    _mm_cmplt_epi64_mask(a, zero)
4944}
4945
4946/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4947/// integer in a.
4948///
4949/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi64_mask&ig_expand=4616)
4950#[inline]
4951#[target_feature(enable = "avx512dq,avx512vl")]
4952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4953#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4954pub const fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 {
4955    let zero = _mm256_setzero_si256();
4956    _mm256_cmplt_epi64_mask(a, zero)
4957}
4958
4959/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4960/// integer in a.
4961///
4962/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi64_mask&ig_expand=4617)
4963#[inline]
4964#[target_feature(enable = "avx512dq")]
4965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4967pub const fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 {
4968    let zero = _mm512_setzero_si512();
4969    _mm512_cmplt_epi64_mask(a, zero)
4970}
4971
4972/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
4973/// bit in k.
4974///
4975/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi32&ig_expand=4625)
4976#[inline]
4977#[target_feature(enable = "avx512dq,avx512vl")]
4978#[cfg_attr(test, assert_instr(vpmovm2d))]
4979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4980#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4981pub const fn _mm_movm_epi32(k: __mmask8) -> __m128i {
4982    let ones = _mm_set1_epi32(-1);
4983    _mm_maskz_mov_epi32(k, ones)
4984}
4985
4986/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
4987/// bit in k.
4988///
4989/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi32&ig_expand=4626)
4990#[inline]
4991#[target_feature(enable = "avx512dq,avx512vl")]
4992#[cfg_attr(test, assert_instr(vpmovm2d))]
4993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4995pub const fn _mm256_movm_epi32(k: __mmask8) -> __m256i {
4996    let ones = _mm256_set1_epi32(-1);
4997    _mm256_maskz_mov_epi32(k, ones)
4998}
4999
5000/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
5001/// bit in k.
5002///
5003/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi32&ig_expand=4627)
5004#[inline]
5005#[target_feature(enable = "avx512dq")]
5006#[cfg_attr(test, assert_instr(vpmovm2d))]
5007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5009pub const fn _mm512_movm_epi32(k: __mmask16) -> __m512i {
5010    let ones = _mm512_set1_epi32(-1);
5011    _mm512_maskz_mov_epi32(k, ones)
5012}
5013
5014/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
5015/// bit in k.
5016///
5017/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi64&ig_expand=4628)
5018#[inline]
5019#[target_feature(enable = "avx512dq,avx512vl")]
5020#[cfg_attr(test, assert_instr(vpmovm2q))]
5021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5022#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5023pub const fn _mm_movm_epi64(k: __mmask8) -> __m128i {
5024    let ones = _mm_set1_epi64x(-1);
5025    _mm_maskz_mov_epi64(k, ones)
5026}
5027
5028/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
5029/// bit in k.
5030///
5031/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi64&ig_expand=4629)
5032#[inline]
5033#[target_feature(enable = "avx512dq,avx512vl")]
5034#[cfg_attr(test, assert_instr(vpmovm2q))]
5035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5036#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5037pub const fn _mm256_movm_epi64(k: __mmask8) -> __m256i {
5038    let ones = _mm256_set1_epi64x(-1);
5039    _mm256_maskz_mov_epi64(k, ones)
5040}
5041
5042/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
5043/// bit in k.
5044///
5045/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi64&ig_expand=4630)
5046#[inline]
5047#[target_feature(enable = "avx512dq")]
5048#[cfg_attr(test, assert_instr(vpmovm2q))]
5049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5051pub const fn _mm512_movm_epi64(k: __mmask8) -> __m512i {
5052    let ones = _mm512_set1_epi64(-1);
5053    _mm512_maskz_mov_epi64(k, ones)
5054}
5055
5056// Range
5057
5058/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5059/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5060/// Lower 2 bits of IMM8 specifies the operation control:
5061///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5062/// Upper 2 bits of IMM8 specifies the sign control:
5063///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5064/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5065///
5066/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_pd&ig_expand=5210)
5067#[inline]
5068#[target_feature(enable = "avx512dq")]
5069#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
5070#[rustc_legacy_const_generics(2, 3)]
5071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5072pub fn _mm512_range_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
5073    static_assert_uimm_bits!(IMM8, 4);
5074    static_assert_sae!(SAE);
5075    _mm512_mask_range_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), 0xff, a, b)
5076}
5077
5078/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5079/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5080/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5081/// Lower 2 bits of IMM8 specifies the operation control:
5082///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5083/// Upper 2 bits of IMM8 specifies the sign control:
5084///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5085/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5086///
5087/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_pd&ig_expand=5208)
5088#[inline]
5089#[target_feature(enable = "avx512dq")]
5090#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
5091#[rustc_legacy_const_generics(4, 5)]
5092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5093pub fn _mm512_mask_range_round_pd<const IMM8: i32, const SAE: i32>(
5094    src: __m512d,
5095    k: __mmask8,
5096    a: __m512d,
5097    b: __m512d,
5098) -> __m512d {
5099    unsafe {
5100        static_assert_uimm_bits!(IMM8, 4);
5101        static_assert_sae!(SAE);
5102        transmute(vrangepd_512(
5103            a.as_f64x8(),
5104            b.as_f64x8(),
5105            IMM8,
5106            src.as_f64x8(),
5107            k,
5108            SAE,
5109        ))
5110    }
5111}
5112
5113/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5114/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5115/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5116/// Lower 2 bits of IMM8 specifies the operation control:
5117///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5118/// Upper 2 bits of IMM8 specifies the sign control:
5119///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5120/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5121///
5122/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_pd&ig_expand=5209)
5123#[inline]
5124#[target_feature(enable = "avx512dq")]
5125#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
5126#[rustc_legacy_const_generics(3, 4)]
5127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5128pub fn _mm512_maskz_range_round_pd<const IMM8: i32, const SAE: i32>(
5129    k: __mmask8,
5130    a: __m512d,
5131    b: __m512d,
5132) -> __m512d {
5133    static_assert_uimm_bits!(IMM8, 4);
5134    static_assert_sae!(SAE);
5135    _mm512_mask_range_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), k, a, b)
5136}
5137
5138/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5139/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5140/// Lower 2 bits of IMM8 specifies the operation control:
5141///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5142/// Upper 2 bits of IMM8 specifies the sign control:
5143///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5144///
5145/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_pd&ig_expand=5192)
5146#[inline]
5147#[target_feature(enable = "avx512dq,avx512vl")]
5148#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5149#[rustc_legacy_const_generics(2)]
5150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5151pub fn _mm_range_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
5152    static_assert_uimm_bits!(IMM8, 4);
5153    _mm_mask_range_pd::<IMM8>(_mm_setzero_pd(), 0xff, a, b)
5154}
5155
5156/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5157/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5158/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5159/// Lower 2 bits of IMM8 specifies the operation control:
5160///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5161/// Upper 2 bits of IMM8 specifies the sign control:
5162///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5163///
5164/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_pd&ig_expand=5190)
5165#[inline]
5166#[target_feature(enable = "avx512dq,avx512vl")]
5167#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5168#[rustc_legacy_const_generics(4)]
5169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5170pub fn _mm_mask_range_pd<const IMM8: i32>(
5171    src: __m128d,
5172    k: __mmask8,
5173    a: __m128d,
5174    b: __m128d,
5175) -> __m128d {
5176    unsafe {
5177        static_assert_uimm_bits!(IMM8, 4);
5178        transmute(vrangepd_128(
5179            a.as_f64x2(),
5180            b.as_f64x2(),
5181            IMM8,
5182            src.as_f64x2(),
5183            k,
5184        ))
5185    }
5186}
5187
5188/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5189/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5190/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5191/// Lower 2 bits of IMM8 specifies the operation control:
5192///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5193/// Upper 2 bits of IMM8 specifies the sign control:
5194///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5195///
5196/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_pd&ig_expand=5191)
5197#[inline]
5198#[target_feature(enable = "avx512dq,avx512vl")]
5199#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5200#[rustc_legacy_const_generics(3)]
5201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5202pub fn _mm_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
5203    static_assert_uimm_bits!(IMM8, 4);
5204    _mm_mask_range_pd::<IMM8>(_mm_setzero_pd(), k, a, b)
5205}
5206
5207/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5208/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5209/// Lower 2 bits of IMM8 specifies the operation control:
5210///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5211/// Upper 2 bits of IMM8 specifies the sign control:
5212///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5213///
5214/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_pd&ig_expand=5195)
5215#[inline]
5216#[target_feature(enable = "avx512dq,avx512vl")]
5217#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5218#[rustc_legacy_const_generics(2)]
5219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5220pub fn _mm256_range_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
5221    static_assert_uimm_bits!(IMM8, 4);
5222    _mm256_mask_range_pd::<IMM8>(_mm256_setzero_pd(), 0xff, a, b)
5223}
5224
5225/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5226/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5227/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5228/// Lower 2 bits of IMM8 specifies the operation control:
5229///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5230/// Upper 2 bits of IMM8 specifies the sign control:
5231///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5232///
5233/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_pd&ig_expand=5193)
5234#[inline]
5235#[target_feature(enable = "avx512dq,avx512vl")]
5236#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5237#[rustc_legacy_const_generics(4)]
5238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5239pub fn _mm256_mask_range_pd<const IMM8: i32>(
5240    src: __m256d,
5241    k: __mmask8,
5242    a: __m256d,
5243    b: __m256d,
5244) -> __m256d {
5245    unsafe {
5246        static_assert_uimm_bits!(IMM8, 4);
5247        transmute(vrangepd_256(
5248            a.as_f64x4(),
5249            b.as_f64x4(),
5250            IMM8,
5251            src.as_f64x4(),
5252            k,
5253        ))
5254    }
5255}
5256
5257/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5258/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5259/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5260/// Lower 2 bits of IMM8 specifies the operation control:
5261///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5262/// Upper 2 bits of IMM8 specifies the sign control:
5263///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5264///
5265/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_pd&ig_expand=5194)
5266#[inline]
5267#[target_feature(enable = "avx512dq,avx512vl")]
5268#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5269#[rustc_legacy_const_generics(3)]
5270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5271pub fn _mm256_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
5272    static_assert_uimm_bits!(IMM8, 4);
5273    _mm256_mask_range_pd::<IMM8>(_mm256_setzero_pd(), k, a, b)
5274}
5275
5276/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5277/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5278/// Lower 2 bits of IMM8 specifies the operation control:
5279///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5280/// Upper 2 bits of IMM8 specifies the sign control:
5281///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5282///
5283/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_pd&ig_expand=5198)
5284#[inline]
5285#[target_feature(enable = "avx512dq")]
5286#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5287#[rustc_legacy_const_generics(2)]
5288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5289pub fn _mm512_range_pd<const IMM8: i32>(a: __m512d, b: __m512d) -> __m512d {
5290    static_assert_uimm_bits!(IMM8, 4);
5291    _mm512_mask_range_pd::<IMM8>(_mm512_setzero_pd(), 0xff, a, b)
5292}
5293
5294/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5295/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5296/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5297/// Lower 2 bits of IMM8 specifies the operation control:
5298///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5299/// Upper 2 bits of IMM8 specifies the sign control:
5300///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5301///
5302/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_pd&ig_expand=5196)
5303#[inline]
5304#[target_feature(enable = "avx512dq")]
5305#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5306#[rustc_legacy_const_generics(4)]
5307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5308pub fn _mm512_mask_range_pd<const IMM8: i32>(
5309    src: __m512d,
5310    k: __mmask8,
5311    a: __m512d,
5312    b: __m512d,
5313) -> __m512d {
5314    unsafe {
5315        static_assert_uimm_bits!(IMM8, 4);
5316        transmute(vrangepd_512(
5317            a.as_f64x8(),
5318            b.as_f64x8(),
5319            IMM8,
5320            src.as_f64x8(),
5321            k,
5322            _MM_FROUND_CUR_DIRECTION,
5323        ))
5324    }
5325}
5326
5327/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5328/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5329/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5330/// Lower 2 bits of IMM8 specifies the operation control:
5331///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5332/// Upper 2 bits of IMM8 specifies the sign control:
5333///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5334///
5335/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_pd&ig_expand=5197)
5336#[inline]
5337#[target_feature(enable = "avx512dq")]
5338#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5339#[rustc_legacy_const_generics(3)]
5340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5341pub fn _mm512_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
5342    static_assert_uimm_bits!(IMM8, 4);
5343    _mm512_mask_range_pd::<IMM8>(_mm512_setzero_pd(), k, a, b)
5344}
5345
5346/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5347/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5348/// Lower 2 bits of IMM8 specifies the operation control:
5349///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5350/// Upper 2 bits of IMM8 specifies the sign control:
5351///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5352/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5353///
5354/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_ps&ig_expand=5213)
5355#[inline]
5356#[target_feature(enable = "avx512dq")]
5357#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
5358#[rustc_legacy_const_generics(2, 3)]
5359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5360pub fn _mm512_range_round_ps<const IMM8: i32, const SAE: i32>(a: __m512, b: __m512) -> __m512 {
5361    static_assert_uimm_bits!(IMM8, 4);
5362    static_assert_sae!(SAE);
5363    _mm512_mask_range_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), 0xffff, a, b)
5364}
5365
5366/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5367/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5368/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5369/// Lower 2 bits of IMM8 specifies the operation control:
5370///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5371/// Upper 2 bits of IMM8 specifies the sign control:
5372///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5373///
5374/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_ps&ig_expand=5211)
5375#[inline]
5376#[target_feature(enable = "avx512dq")]
5377#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
5378#[rustc_legacy_const_generics(4, 5)]
5379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5380pub fn _mm512_mask_range_round_ps<const IMM8: i32, const SAE: i32>(
5381    src: __m512,
5382    k: __mmask16,
5383    a: __m512,
5384    b: __m512,
5385) -> __m512 {
5386    unsafe {
5387        static_assert_uimm_bits!(IMM8, 4);
5388        static_assert_sae!(SAE);
5389        transmute(vrangeps_512(
5390            a.as_f32x16(),
5391            b.as_f32x16(),
5392            IMM8,
5393            src.as_f32x16(),
5394            k,
5395            SAE,
5396        ))
5397    }
5398}
5399
5400/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5401/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5402/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5403/// Lower 2 bits of IMM8 specifies the operation control:
5404///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5405/// Upper 2 bits of IMM8 specifies the sign control:
5406///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5407///
5408/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_ps&ig_expand=5212)
5409#[inline]
5410#[target_feature(enable = "avx512dq")]
5411#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
5412#[rustc_legacy_const_generics(3, 4)]
5413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5414pub fn _mm512_maskz_range_round_ps<const IMM8: i32, const SAE: i32>(
5415    k: __mmask16,
5416    a: __m512,
5417    b: __m512,
5418) -> __m512 {
5419    static_assert_uimm_bits!(IMM8, 4);
5420    static_assert_sae!(SAE);
5421    _mm512_mask_range_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), k, a, b)
5422}
5423
5424/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5425/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5426/// Lower 2 bits of IMM8 specifies the operation control:
5427///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5428/// Upper 2 bits of IMM8 specifies the sign control:
5429///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5430///
5431/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_ps&ig_expand=5201)
5432#[inline]
5433#[target_feature(enable = "avx512dq,avx512vl")]
5434#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5435#[rustc_legacy_const_generics(2)]
5436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5437pub fn _mm_range_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
5438    static_assert_uimm_bits!(IMM8, 4);
5439    _mm_mask_range_ps::<IMM8>(_mm_setzero_ps(), 0xff, a, b)
5440}
5441
5442/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5443/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5444/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5445/// Lower 2 bits of IMM8 specifies the operation control:
5446///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5447/// Upper 2 bits of IMM8 specifies the sign control:
5448///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5449///
5450/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ps&ig_expand=5199)
5451#[inline]
5452#[target_feature(enable = "avx512dq,avx512vl")]
5453#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5454#[rustc_legacy_const_generics(4)]
5455#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5456pub fn _mm_mask_range_ps<const IMM8: i32>(
5457    src: __m128,
5458    k: __mmask8,
5459    a: __m128,
5460    b: __m128,
5461) -> __m128 {
5462    unsafe {
5463        static_assert_uimm_bits!(IMM8, 4);
5464        transmute(vrangeps_128(
5465            a.as_f32x4(),
5466            b.as_f32x4(),
5467            IMM8,
5468            src.as_f32x4(),
5469            k,
5470        ))
5471    }
5472}
5473
5474/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5475/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5476/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5477/// Lower 2 bits of IMM8 specifies the operation control:
5478///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5479/// Upper 2 bits of IMM8 specifies the sign control:
5480///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5481///
5482/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ps&ig_expand=5200)
5483#[inline]
5484#[target_feature(enable = "avx512dq,avx512vl")]
5485#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5486#[rustc_legacy_const_generics(3)]
5487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5488pub fn _mm_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
5489    static_assert_uimm_bits!(IMM8, 4);
5490    _mm_mask_range_ps::<IMM8>(_mm_setzero_ps(), k, a, b)
5491}
5492
5493/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5494/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5495/// Lower 2 bits of IMM8 specifies the operation control:
5496///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5497/// Upper 2 bits of IMM8 specifies the sign control:
5498///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5499///
5500/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_ps&ig_expand=5204)
5501#[inline]
5502#[target_feature(enable = "avx512dq,avx512vl")]
5503#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5504#[rustc_legacy_const_generics(2)]
5505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5506pub fn _mm256_range_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
5507    static_assert_uimm_bits!(IMM8, 4);
5508    _mm256_mask_range_ps::<IMM8>(_mm256_setzero_ps(), 0xff, a, b)
5509}
5510
5511/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5512/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5513/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5514/// Lower 2 bits of IMM8 specifies the operation control:
5515///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5516/// Upper 2 bits of IMM8 specifies the sign control:
5517///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5518///
5519/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_ps&ig_expand=5202)
5520#[inline]
5521#[target_feature(enable = "avx512dq,avx512vl")]
5522#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5523#[rustc_legacy_const_generics(4)]
5524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5525pub fn _mm256_mask_range_ps<const IMM8: i32>(
5526    src: __m256,
5527    k: __mmask8,
5528    a: __m256,
5529    b: __m256,
5530) -> __m256 {
5531    unsafe {
5532        static_assert_uimm_bits!(IMM8, 4);
5533        transmute(vrangeps_256(
5534            a.as_f32x8(),
5535            b.as_f32x8(),
5536            IMM8,
5537            src.as_f32x8(),
5538            k,
5539        ))
5540    }
5541}
5542
5543/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5544/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5545/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5546/// Lower 2 bits of IMM8 specifies the operation control:
5547///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5548/// Upper 2 bits of IMM8 specifies the sign control:
5549///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5550///
5551/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_ps&ig_expand=5203)
5552#[inline]
5553#[target_feature(enable = "avx512dq,avx512vl")]
5554#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5555#[rustc_legacy_const_generics(3)]
5556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5557pub fn _mm256_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
5558    static_assert_uimm_bits!(IMM8, 4);
5559    _mm256_mask_range_ps::<IMM8>(_mm256_setzero_ps(), k, a, b)
5560}
5561
5562/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5563/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5564/// Lower 2 bits of IMM8 specifies the operation control:
5565///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5566/// Upper 2 bits of IMM8 specifies the sign control:
5567///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5568///
5569/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_ps&ig_expand=5207)
5570#[inline]
5571#[target_feature(enable = "avx512dq")]
5572#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5573#[rustc_legacy_const_generics(2)]
5574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5575pub fn _mm512_range_ps<const IMM8: i32>(a: __m512, b: __m512) -> __m512 {
5576    static_assert_uimm_bits!(IMM8, 4);
5577    _mm512_mask_range_ps::<IMM8>(_mm512_setzero_ps(), 0xffff, a, b)
5578}
5579
5580/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5581/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5582/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5583/// Lower 2 bits of IMM8 specifies the operation control:
5584///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5585/// Upper 2 bits of IMM8 specifies the sign control:
5586///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5587///
5588/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_ps&ig_expand=5205)
5589#[inline]
5590#[target_feature(enable = "avx512dq")]
5591#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5592#[rustc_legacy_const_generics(4)]
5593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5594pub fn _mm512_mask_range_ps<const IMM8: i32>(
5595    src: __m512,
5596    k: __mmask16,
5597    a: __m512,
5598    b: __m512,
5599) -> __m512 {
5600    unsafe {
5601        static_assert_uimm_bits!(IMM8, 4);
5602        transmute(vrangeps_512(
5603            a.as_f32x16(),
5604            b.as_f32x16(),
5605            IMM8,
5606            src.as_f32x16(),
5607            k,
5608            _MM_FROUND_CUR_DIRECTION,
5609        ))
5610    }
5611}
5612
5613/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5614/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5615/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5616/// Lower 2 bits of IMM8 specifies the operation control:
5617///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5618/// Upper 2 bits of IMM8 specifies the sign control:
5619///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5620///
5621/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_ps&ig_expand=5206)
5622#[inline]
5623#[target_feature(enable = "avx512dq")]
5624#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5625#[rustc_legacy_const_generics(3)]
5626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5627pub fn _mm512_maskz_range_ps<const IMM8: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
5628    static_assert_uimm_bits!(IMM8, 4);
5629    _mm512_mask_range_ps::<IMM8>(_mm512_setzero_ps(), k, a, b)
5630}
5631
5632/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5633/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5634/// of dst, and copy the upper element from a to the upper element of dst.
5635/// Lower 2 bits of IMM8 specifies the operation control:
5636///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5637/// Upper 2 bits of IMM8 specifies the sign control:
5638///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5639/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5640///
5641/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_sd&ig_expand=5216)
5642#[inline]
5643#[target_feature(enable = "avx512dq")]
5644#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
5645#[rustc_legacy_const_generics(2, 3)]
5646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5647pub fn _mm_range_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
5648    static_assert_uimm_bits!(IMM8, 4);
5649    static_assert_sae!(SAE);
5650    _mm_mask_range_round_sd::<IMM8, SAE>(_mm_setzero_pd(), 0xff, a, b)
5651}
5652
5653/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5654/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5655/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5656/// upper element from a to the upper element of dst.
5657/// Lower 2 bits of IMM8 specifies the operation control:
5658///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5659/// Upper 2 bits of IMM8 specifies the sign control:
5660///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5661/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5662///
5663/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_sd&ig_expand=5214)
5664#[inline]
5665#[target_feature(enable = "avx512dq")]
5666#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
5667#[rustc_legacy_const_generics(4, 5)]
5668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5669pub fn _mm_mask_range_round_sd<const IMM8: i32, const SAE: i32>(
5670    src: __m128d,
5671    k: __mmask8,
5672    a: __m128d,
5673    b: __m128d,
5674) -> __m128d {
5675    unsafe {
5676        static_assert_uimm_bits!(IMM8, 4);
5677        static_assert_sae!(SAE);
5678        transmute(vrangesd(
5679            a.as_f64x2(),
5680            b.as_f64x2(),
5681            src.as_f64x2(),
5682            k,
5683            IMM8,
5684            SAE,
5685        ))
5686    }
5687}
5688
5689/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5690/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5691/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5692/// element from a to the upper element of dst.
5693/// Lower 2 bits of IMM8 specifies the operation control:
5694///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5695/// Upper 2 bits of IMM8 specifies the sign control:
5696///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5697/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5698///
5699/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_sd&ig_expand=5215)
5700#[inline]
5701#[target_feature(enable = "avx512dq")]
5702#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
5703#[rustc_legacy_const_generics(3, 4)]
5704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5705pub fn _mm_maskz_range_round_sd<const IMM8: i32, const SAE: i32>(
5706    k: __mmask8,
5707    a: __m128d,
5708    b: __m128d,
5709) -> __m128d {
5710    static_assert_uimm_bits!(IMM8, 4);
5711    static_assert_sae!(SAE);
5712    _mm_mask_range_round_sd::<IMM8, SAE>(_mm_setzero_pd(), k, a, b)
5713}
5714
5715/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5716/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5717/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5718/// upper element from a to the upper element of dst.
5719/// Lower 2 bits of IMM8 specifies the operation control:
5720///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5721/// Upper 2 bits of IMM8 specifies the sign control:
5722///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5723///
5724/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_sd&ig_expand=5220)
5725#[inline]
5726#[target_feature(enable = "avx512dq")]
5727#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))]
5728#[rustc_legacy_const_generics(4)]
5729#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5730pub fn _mm_mask_range_sd<const IMM8: i32>(
5731    src: __m128d,
5732    k: __mmask8,
5733    a: __m128d,
5734    b: __m128d,
5735) -> __m128d {
5736    unsafe {
5737        static_assert_uimm_bits!(IMM8, 4);
5738        transmute(vrangesd(
5739            a.as_f64x2(),
5740            b.as_f64x2(),
5741            src.as_f64x2(),
5742            k,
5743            IMM8,
5744            _MM_FROUND_CUR_DIRECTION,
5745        ))
5746    }
5747}
5748
5749/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5750/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5751/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5752/// element from a to the upper element of dst.
5753/// Lower 2 bits of IMM8 specifies the operation control:
5754///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5755/// Upper 2 bits of IMM8 specifies the sign control:
5756///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5757///
5758/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_sd&ig_expand=5221)
5759#[inline]
5760#[target_feature(enable = "avx512dq")]
5761#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))]
5762#[rustc_legacy_const_generics(3)]
5763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5764pub fn _mm_maskz_range_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
5765    static_assert_uimm_bits!(IMM8, 4);
5766    _mm_mask_range_sd::<IMM8>(_mm_setzero_pd(), k, a, b)
5767}
5768
5769/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5770/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5771/// of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
5772/// Lower 2 bits of IMM8 specifies the operation control:
5773///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5774/// Upper 2 bits of IMM8 specifies the sign control:
5775///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5776/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5777///
5778/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_ss&ig_expand=5219)
5779#[inline]
5780#[target_feature(enable = "avx512dq")]
5781#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
5782#[rustc_legacy_const_generics(2, 3)]
5783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5784pub fn _mm_range_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
5785    static_assert_uimm_bits!(IMM8, 4);
5786    static_assert_sae!(SAE);
5787    _mm_mask_range_round_ss::<IMM8, SAE>(_mm_setzero_ps(), 0xff, a, b)
5788}
5789
5790/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5791/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5792/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5793/// upper 3 packed elements from a to the upper elements of dst.
5794/// Lower 2 bits of IMM8 specifies the operation control:
5795///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5796/// Upper 2 bits of IMM8 specifies the sign control:
5797///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5798/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5799///
5800/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_ss&ig_expand=5217)
5801#[inline]
5802#[target_feature(enable = "avx512dq")]
5803#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
5804#[rustc_legacy_const_generics(4, 5)]
5805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5806pub fn _mm_mask_range_round_ss<const IMM8: i32, const SAE: i32>(
5807    src: __m128,
5808    k: __mmask8,
5809    a: __m128,
5810    b: __m128,
5811) -> __m128 {
5812    unsafe {
5813        static_assert_uimm_bits!(IMM8, 4);
5814        static_assert_sae!(SAE);
5815        transmute(vrangess(
5816            a.as_f32x4(),
5817            b.as_f32x4(),
5818            src.as_f32x4(),
5819            k,
5820            IMM8,
5821            SAE,
5822        ))
5823    }
5824}
5825
5826/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5827/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5828/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5829/// 3 packed elements from a to the upper elements of dst.
5830/// Lower 2 bits of IMM8 specifies the operation control:
5831///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5832/// Upper 2 bits of IMM8 specifies the sign control:
5833///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5834/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5835///
5836/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_ss&ig_expand=5218)
5837#[inline]
5838#[target_feature(enable = "avx512dq")]
5839#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
5840#[rustc_legacy_const_generics(3, 4)]
5841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5842pub fn _mm_maskz_range_round_ss<const IMM8: i32, const SAE: i32>(
5843    k: __mmask8,
5844    a: __m128,
5845    b: __m128,
5846) -> __m128 {
5847    static_assert_uimm_bits!(IMM8, 4);
5848    static_assert_sae!(SAE);
5849    _mm_mask_range_round_ss::<IMM8, SAE>(_mm_setzero_ps(), k, a, b)
5850}
5851
5852/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5853/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5854/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5855/// upper 3 packed elements from a to the upper elements of dst.
5856/// Lower 2 bits of IMM8 specifies the operation control:
5857///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5858/// Upper 2 bits of IMM8 specifies the sign control:
5859///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5860///
5861/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ss&ig_expand=5222)
5862#[inline]
5863#[target_feature(enable = "avx512dq")]
5864#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))]
5865#[rustc_legacy_const_generics(4)]
5866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5867pub fn _mm_mask_range_ss<const IMM8: i32>(
5868    src: __m128,
5869    k: __mmask8,
5870    a: __m128,
5871    b: __m128,
5872) -> __m128 {
5873    unsafe {
5874        static_assert_uimm_bits!(IMM8, 4);
5875        transmute(vrangess(
5876            a.as_f32x4(),
5877            b.as_f32x4(),
5878            src.as_f32x4(),
5879            k,
5880            IMM8,
5881            _MM_FROUND_CUR_DIRECTION,
5882        ))
5883    }
5884}
5885
5886/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5887/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5888/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5889/// 3 packed elements from a to the upper elements of dst.
5890/// Lower 2 bits of IMM8 specifies the operation control:
5891///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5892/// Upper 2 bits of IMM8 specifies the sign control:
5893///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5894///
5895/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ss&ig_expand=5223)
5896#[inline]
5897#[target_feature(enable = "avx512dq")]
5898#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))]
5899#[rustc_legacy_const_generics(3)]
5900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5901pub fn _mm_maskz_range_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
5902    static_assert_uimm_bits!(IMM8, 4);
5903    _mm_mask_range_ss::<IMM8>(_mm_setzero_ps(), k, a, b)
5904}
5905
5906// Reduce
5907
5908/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5909/// the number of bits specified by imm8, and store the results in dst.
5910/// Rounding is done according to the imm8 parameter, which can be one of:
5911///
5912/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5913/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5914/// * [`_MM_FROUND_TO_POS_INF`] : round up
5915/// * [`_MM_FROUND_TO_ZERO`] : truncate
5916/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5917///
5918/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5919///
5920/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_pd&ig_expand=5438)
5921#[inline]
5922#[target_feature(enable = "avx512dq")]
5923#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
5924#[rustc_legacy_const_generics(1, 2)]
5925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5926pub fn _mm512_reduce_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
5927    static_assert_uimm_bits!(IMM8, 8);
5928    static_assert_sae!(SAE);
5929    _mm512_mask_reduce_round_pd::<IMM8, SAE>(_mm512_undefined_pd(), 0xff, a)
5930}
5931
5932/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5933/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5934/// copied from src to dst if the corresponding mask bit is not set).
5935/// Rounding is done according to the imm8 parameter, which can be one of:
5936///
5937/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5938/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5939/// * [`_MM_FROUND_TO_POS_INF`] : round up
5940/// * [`_MM_FROUND_TO_ZERO`] : truncate
5941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5942///
5943/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5944///
5945/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_pd&ig_expand=5436)
5946#[inline]
5947#[target_feature(enable = "avx512dq")]
5948#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
5949#[rustc_legacy_const_generics(3, 4)]
5950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5951pub fn _mm512_mask_reduce_round_pd<const IMM8: i32, const SAE: i32>(
5952    src: __m512d,
5953    k: __mmask8,
5954    a: __m512d,
5955) -> __m512d {
5956    unsafe {
5957        static_assert_uimm_bits!(IMM8, 8);
5958        static_assert_sae!(SAE);
5959        transmute(vreducepd_512(a.as_f64x8(), IMM8, src.as_f64x8(), k, SAE))
5960    }
5961}
5962
5963/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5964/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5965/// zeroed out if the corresponding mask bit is not set).
5966/// Rounding is done according to the imm8 parameter, which can be one of:
5967///
5968/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5969/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5970/// * [`_MM_FROUND_TO_POS_INF`] : round up
5971/// * [`_MM_FROUND_TO_ZERO`] : truncate
5972/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5973///
5974/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5975///
5976/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_pd&ig_expand=5437)
5977#[inline]
5978#[target_feature(enable = "avx512dq")]
5979#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
5980#[rustc_legacy_const_generics(2, 3)]
5981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5982pub fn _mm512_maskz_reduce_round_pd<const IMM8: i32, const SAE: i32>(
5983    k: __mmask8,
5984    a: __m512d,
5985) -> __m512d {
5986    static_assert_uimm_bits!(IMM8, 8);
5987    static_assert_sae!(SAE);
5988    _mm512_mask_reduce_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), k, a)
5989}
5990
5991/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5992/// the number of bits specified by imm8, and store the results in dst.
5993/// Rounding is done according to the imm8 parameter, which can be one of:
5994///
5995/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5996/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5997/// * [`_MM_FROUND_TO_POS_INF`] : round up
5998/// * [`_MM_FROUND_TO_ZERO`] : truncate
5999/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6000///
6001/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_pd&ig_expand=5411)
6002#[inline]
6003#[target_feature(enable = "avx512dq,avx512vl")]
6004#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6005#[rustc_legacy_const_generics(1)]
6006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6007pub fn _mm_reduce_pd<const IMM8: i32>(a: __m128d) -> __m128d {
6008    static_assert_uimm_bits!(IMM8, 8);
6009    _mm_mask_reduce_pd::<IMM8>(_mm_undefined_pd(), 0xff, a)
6010}
6011
6012/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6013/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6014/// copied from src to dst if the corresponding mask bit is not set).
6015/// Rounding is done according to the imm8 parameter, which can be one of:
6016///
6017/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6018/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6019/// * [`_MM_FROUND_TO_POS_INF`] : round up
6020/// * [`_MM_FROUND_TO_ZERO`] : truncate
6021/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6022///
6023/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_pd&ig_expand=5409)
6024#[inline]
6025#[target_feature(enable = "avx512dq,avx512vl")]
6026#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6027#[rustc_legacy_const_generics(3)]
6028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6029pub fn _mm_mask_reduce_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
6030    unsafe {
6031        static_assert_uimm_bits!(IMM8, 8);
6032        transmute(vreducepd_128(a.as_f64x2(), IMM8, src.as_f64x2(), k))
6033    }
6034}
6035
6036/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6037/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6038/// zeroed out if the corresponding mask bit is not set).
6039/// Rounding is done according to the imm8 parameter, which can be one of:
6040///
6041/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6042/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6043/// * [`_MM_FROUND_TO_POS_INF`] : round up
6044/// * [`_MM_FROUND_TO_ZERO`] : truncate
6045/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6046///
6047/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_pd&ig_expand=5410)
6048#[inline]
6049#[target_feature(enable = "avx512dq,avx512vl")]
6050#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6051#[rustc_legacy_const_generics(2)]
6052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6053pub fn _mm_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
6054    static_assert_uimm_bits!(IMM8, 8);
6055    _mm_mask_reduce_pd::<IMM8>(_mm_setzero_pd(), k, a)
6056}
6057
6058/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6059/// the number of bits specified by imm8, and store the results in dst.
6060/// Rounding is done according to the imm8 parameter, which can be one of:
6061///
6062/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6063/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6064/// * [`_MM_FROUND_TO_POS_INF`] : round up
6065/// * [`_MM_FROUND_TO_ZERO`] : truncate
6066/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6067///
6068/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_pd&ig_expand=5414)
6069#[inline]
6070#[target_feature(enable = "avx512dq,avx512vl")]
6071#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6072#[rustc_legacy_const_generics(1)]
6073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6074pub fn _mm256_reduce_pd<const IMM8: i32>(a: __m256d) -> __m256d {
6075    static_assert_uimm_bits!(IMM8, 8);
6076    _mm256_mask_reduce_pd::<IMM8>(_mm256_undefined_pd(), 0xff, a)
6077}
6078
6079/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6080/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6081/// copied from src to dst if the corresponding mask bit is not set).
6082/// Rounding is done according to the imm8 parameter, which can be one of:
6083///
6084/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6085/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6086/// * [`_MM_FROUND_TO_POS_INF`] : round up
6087/// * [`_MM_FROUND_TO_ZERO`] : truncate
6088/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6089///
6090/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_pd&ig_expand=5412)
6091#[inline]
6092#[target_feature(enable = "avx512dq,avx512vl")]
6093#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6094#[rustc_legacy_const_generics(3)]
6095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6096pub fn _mm256_mask_reduce_pd<const IMM8: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
6097    unsafe {
6098        static_assert_uimm_bits!(IMM8, 8);
6099        transmute(vreducepd_256(a.as_f64x4(), IMM8, src.as_f64x4(), k))
6100    }
6101}
6102
6103/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6104/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6105/// zeroed out if the corresponding mask bit is not set).
6106/// Rounding is done according to the imm8 parameter, which can be one of:
6107///
6108/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6109/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6110/// * [`_MM_FROUND_TO_POS_INF`] : round up
6111/// * [`_MM_FROUND_TO_ZERO`] : truncate
6112/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6113///
6114/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_pd&ig_expand=5413)
6115#[inline]
6116#[target_feature(enable = "avx512dq,avx512vl")]
6117#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6118#[rustc_legacy_const_generics(2)]
6119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6120pub fn _mm256_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
6121    static_assert_uimm_bits!(IMM8, 8);
6122    _mm256_mask_reduce_pd::<IMM8>(_mm256_setzero_pd(), k, a)
6123}
6124
6125/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6126/// the number of bits specified by imm8, and store the results in dst.
6127/// Rounding is done according to the imm8 parameter, which can be one of:
6128///
6129/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6130/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6131/// * [`_MM_FROUND_TO_POS_INF`] : round up
6132/// * [`_MM_FROUND_TO_ZERO`] : truncate
6133/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6134///
6135/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_pd&ig_expand=5417)
6136#[inline]
6137#[target_feature(enable = "avx512dq")]
6138#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6139#[rustc_legacy_const_generics(1)]
6140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6141pub fn _mm512_reduce_pd<const IMM8: i32>(a: __m512d) -> __m512d {
6142    static_assert_uimm_bits!(IMM8, 8);
6143    _mm512_mask_reduce_pd::<IMM8>(_mm512_undefined_pd(), 0xff, a)
6144}
6145
6146/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6147/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6148/// copied from src to dst if the corresponding mask bit is not set).
6149/// Rounding is done according to the imm8 parameter, which can be one of:
6150///
6151/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6152/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6153/// * [`_MM_FROUND_TO_POS_INF`] : round up
6154/// * [`_MM_FROUND_TO_ZERO`] : truncate
6155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6156///
6157/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_pd&ig_expand=5415)
6158#[inline]
6159#[target_feature(enable = "avx512dq")]
6160#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6161#[rustc_legacy_const_generics(3)]
6162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6163pub fn _mm512_mask_reduce_pd<const IMM8: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
6164    unsafe {
6165        static_assert_uimm_bits!(IMM8, 8);
6166        transmute(vreducepd_512(
6167            a.as_f64x8(),
6168            IMM8,
6169            src.as_f64x8(),
6170            k,
6171            _MM_FROUND_CUR_DIRECTION,
6172        ))
6173    }
6174}
6175
6176/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6177/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6178/// zeroed out if the corresponding mask bit is not set).
6179/// Rounding is done according to the imm8 parameter, which can be one of:
6180///
6181/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6182/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6183/// * [`_MM_FROUND_TO_POS_INF`] : round up
6184/// * [`_MM_FROUND_TO_ZERO`] : truncate
6185/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6186///
6187/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_pd&ig_expand=5416)
6188#[inline]
6189#[target_feature(enable = "avx512dq")]
6190#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6191#[rustc_legacy_const_generics(2)]
6192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6193pub fn _mm512_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
6194    static_assert_uimm_bits!(IMM8, 8);
6195    _mm512_mask_reduce_pd::<IMM8>(_mm512_setzero_pd(), k, a)
6196}
6197
6198/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6199/// the number of bits specified by imm8, and store the results in dst.
6200/// Rounding is done according to the imm8 parameter, which can be one of:
6201///
6202/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6203/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6204/// * [`_MM_FROUND_TO_POS_INF`] : round up
6205/// * [`_MM_FROUND_TO_ZERO`] : truncate
6206/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6207///
6208/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6209///
6210/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_ps&ig_expand=5444)
6211#[inline]
6212#[target_feature(enable = "avx512dq")]
6213#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
6214#[rustc_legacy_const_generics(1, 2)]
6215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6216pub fn _mm512_reduce_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
6217    static_assert_uimm_bits!(IMM8, 8);
6218    static_assert_sae!(SAE);
6219    _mm512_mask_reduce_round_ps::<IMM8, SAE>(_mm512_undefined_ps(), 0xffff, a)
6220}
6221
6222/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6223/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6224/// copied from src to dst if the corresponding mask bit is not set).
6225/// Rounding is done according to the imm8 parameter, which can be one of:
6226///
6227/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6228/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6229/// * [`_MM_FROUND_TO_POS_INF`] : round up
6230/// * [`_MM_FROUND_TO_ZERO`] : truncate
6231/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6232///
6233/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6234///
6235/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_ps&ig_expand=5442)
6236#[inline]
6237#[target_feature(enable = "avx512dq")]
6238#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
6239#[rustc_legacy_const_generics(3, 4)]
6240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6241pub fn _mm512_mask_reduce_round_ps<const IMM8: i32, const SAE: i32>(
6242    src: __m512,
6243    k: __mmask16,
6244    a: __m512,
6245) -> __m512 {
6246    unsafe {
6247        static_assert_uimm_bits!(IMM8, 8);
6248        static_assert_sae!(SAE);
6249        transmute(vreduceps_512(a.as_f32x16(), IMM8, src.as_f32x16(), k, SAE))
6250    }
6251}
6252
6253/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6254/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6255/// zeroed out if the corresponding mask bit is not set).
6256/// Rounding is done according to the imm8 parameter, which can be one of:
6257///
6258/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6259/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6260/// * [`_MM_FROUND_TO_POS_INF`] : round up
6261/// * [`_MM_FROUND_TO_ZERO`] : truncate
6262/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6263///
6264/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6265///
6266/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_ps&ig_expand=5443)
6267#[inline]
6268#[target_feature(enable = "avx512dq")]
6269#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
6270#[rustc_legacy_const_generics(2, 3)]
6271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6272pub fn _mm512_maskz_reduce_round_ps<const IMM8: i32, const SAE: i32>(
6273    k: __mmask16,
6274    a: __m512,
6275) -> __m512 {
6276    static_assert_uimm_bits!(IMM8, 8);
6277    static_assert_sae!(SAE);
6278    _mm512_mask_reduce_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), k, a)
6279}
6280
6281/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6282/// the number of bits specified by imm8, and store the results in dst.
6283/// Rounding is done according to the imm8 parameter, which can be one of:
6284///
6285/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6286/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6287/// * [`_MM_FROUND_TO_POS_INF`] : round up
6288/// * [`_MM_FROUND_TO_ZERO`] : truncate
6289/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6290///
6291/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ps&ig_expand=5429)
6292#[inline]
6293#[target_feature(enable = "avx512dq,avx512vl")]
6294#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6295#[rustc_legacy_const_generics(1)]
6296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6297pub fn _mm_reduce_ps<const IMM8: i32>(a: __m128) -> __m128 {
6298    static_assert_uimm_bits!(IMM8, 8);
6299    _mm_mask_reduce_ps::<IMM8>(_mm_undefined_ps(), 0xff, a)
6300}
6301
6302/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6303/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6304/// copied from src to dst if the corresponding mask bit is not set).
6305/// Rounding is done according to the imm8 parameter, which can be one of:
6306///
6307/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6308/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6309/// * [`_MM_FROUND_TO_POS_INF`] : round up
6310/// * [`_MM_FROUND_TO_ZERO`] : truncate
6311/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6312///
6313/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ps&ig_expand=5427)
6314#[inline]
6315#[target_feature(enable = "avx512dq,avx512vl")]
6316#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6317#[rustc_legacy_const_generics(3)]
6318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6319pub fn _mm_mask_reduce_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
6320    unsafe {
6321        static_assert_uimm_bits!(IMM8, 8);
6322        transmute(vreduceps_128(a.as_f32x4(), IMM8, src.as_f32x4(), k))
6323    }
6324}
6325
6326/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6327/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6328/// zeroed out if the corresponding mask bit is not set).
6329/// Rounding is done according to the imm8 parameter, which can be one of:
6330///
6331/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6332/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6333/// * [`_MM_FROUND_TO_POS_INF`] : round up
6334/// * [`_MM_FROUND_TO_ZERO`] : truncate
6335/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6336///
6337/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ps&ig_expand=5428)
6338#[inline]
6339#[target_feature(enable = "avx512dq,avx512vl")]
6340#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6341#[rustc_legacy_const_generics(2)]
6342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6343pub fn _mm_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
6344    static_assert_uimm_bits!(IMM8, 8);
6345    _mm_mask_reduce_ps::<IMM8>(_mm_setzero_ps(), k, a)
6346}
6347
6348/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6349/// the number of bits specified by imm8, and store the results in dst.
6350/// Rounding is done according to the imm8 parameter, which can be one of:
6351///
6352/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6353/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6354/// * [`_MM_FROUND_TO_POS_INF`] : round up
6355/// * [`_MM_FROUND_TO_ZERO`] : truncate
6356/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6357///
6358/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_ps&ig_expand=5432)
6359#[inline]
6360#[target_feature(enable = "avx512dq,avx512vl")]
6361#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6362#[rustc_legacy_const_generics(1)]
6363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6364pub fn _mm256_reduce_ps<const IMM8: i32>(a: __m256) -> __m256 {
6365    static_assert_uimm_bits!(IMM8, 8);
6366    _mm256_mask_reduce_ps::<IMM8>(_mm256_undefined_ps(), 0xff, a)
6367}
6368
6369/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6370/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6371/// copied from src to dst if the corresponding mask bit is not set).
6372/// Rounding is done according to the imm8 parameter, which can be one of:
6373///
6374/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6375/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6376/// * [`_MM_FROUND_TO_POS_INF`] : round up
6377/// * [`_MM_FROUND_TO_ZERO`] : truncate
6378/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6379///
6380/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_ps&ig_expand=5430)
6381#[inline]
6382#[target_feature(enable = "avx512dq,avx512vl")]
6383#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6384#[rustc_legacy_const_generics(3)]
6385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6386pub fn _mm256_mask_reduce_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
6387    unsafe {
6388        static_assert_uimm_bits!(IMM8, 8);
6389        transmute(vreduceps_256(a.as_f32x8(), IMM8, src.as_f32x8(), k))
6390    }
6391}
6392
6393/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6394/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6395/// zeroed out if the corresponding mask bit is not set).
6396/// Rounding is done according to the imm8 parameter, which can be one of:
6397///
6398/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6399/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6400/// * [`_MM_FROUND_TO_POS_INF`] : round up
6401/// * [`_MM_FROUND_TO_ZERO`] : truncate
6402/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6403///
6404/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_ps&ig_expand=5431)
6405#[inline]
6406#[target_feature(enable = "avx512dq,avx512vl")]
6407#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6408#[rustc_legacy_const_generics(2)]
6409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6410pub fn _mm256_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
6411    static_assert_uimm_bits!(IMM8, 8);
6412    _mm256_mask_reduce_ps::<IMM8>(_mm256_setzero_ps(), k, a)
6413}
6414
6415/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6416/// the number of bits specified by imm8, and store the results in dst.
6417/// Rounding is done according to the imm8 parameter, which can be one of:
6418///
6419/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6420/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6421/// * [`_MM_FROUND_TO_POS_INF`] : round up
6422/// * [`_MM_FROUND_TO_ZERO`] : truncate
6423/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6424///
6425/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_ps&ig_expand=5435)
6426#[inline]
6427#[target_feature(enable = "avx512dq")]
6428#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6429#[rustc_legacy_const_generics(1)]
6430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6431pub fn _mm512_reduce_ps<const IMM8: i32>(a: __m512) -> __m512 {
6432    static_assert_uimm_bits!(IMM8, 8);
6433    _mm512_mask_reduce_ps::<IMM8>(_mm512_undefined_ps(), 0xffff, a)
6434}
6435
6436/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6437/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6438/// copied from src to dst if the corresponding mask bit is not set).
6439/// Rounding is done according to the imm8 parameter, which can be one of:
6440///
6441/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6442/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6443/// * [`_MM_FROUND_TO_POS_INF`] : round up
6444/// * [`_MM_FROUND_TO_ZERO`] : truncate
6445/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6446///
6447/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_ps&ig_expand=5433)
6448#[inline]
6449#[target_feature(enable = "avx512dq")]
6450#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6451#[rustc_legacy_const_generics(3)]
6452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6453pub fn _mm512_mask_reduce_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
6454    unsafe {
6455        static_assert_uimm_bits!(IMM8, 8);
6456        transmute(vreduceps_512(
6457            a.as_f32x16(),
6458            IMM8,
6459            src.as_f32x16(),
6460            k,
6461            _MM_FROUND_CUR_DIRECTION,
6462        ))
6463    }
6464}
6465
6466/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6467/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6468/// zeroed out if the corresponding mask bit is not set).
6469/// Rounding is done according to the imm8 parameter, which can be one of:
6470///
6471/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6472/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6473/// * [`_MM_FROUND_TO_POS_INF`] : round up
6474/// * [`_MM_FROUND_TO_ZERO`] : truncate
6475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6476///
6477/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_ps&ig_expand=5434)
6478#[inline]
6479#[target_feature(enable = "avx512dq")]
6480#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6481#[rustc_legacy_const_generics(2)]
6482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6483pub fn _mm512_maskz_reduce_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
6484    static_assert_uimm_bits!(IMM8, 8);
6485    _mm512_mask_reduce_ps::<IMM8>(_mm512_setzero_ps(), k, a)
6486}
6487
6488/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6489/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6490/// the upper element from a to the upper element of dst.
6491/// Rounding is done according to the imm8 parameter, which can be one of:
6492///
6493/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6494/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6495/// * [`_MM_FROUND_TO_POS_INF`] : round up
6496/// * [`_MM_FROUND_TO_ZERO`] : truncate
6497/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6498///
6499/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6500///
6501/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_sd&ig_expand=5447)
6502#[inline]
6503#[target_feature(enable = "avx512dq")]
6504#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
6505#[rustc_legacy_const_generics(2, 3)]
6506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6507pub fn _mm_reduce_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
6508    static_assert_uimm_bits!(IMM8, 8);
6509    static_assert_sae!(SAE);
6510    _mm_mask_reduce_round_sd::<IMM8, SAE>(_mm_undefined_pd(), 0xff, a, b)
6511}
6512
6513/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6514/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6515/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a
6516/// to the upper element of dst.
6517/// Rounding is done according to the imm8 parameter, which can be one of:
6518///
6519/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6520/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6521/// * [`_MM_FROUND_TO_POS_INF`] : round up
6522/// * [`_MM_FROUND_TO_ZERO`] : truncate
6523/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6524///
6525/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6526///
6527/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_sd&ig_expand=5445)
6528#[inline]
6529#[target_feature(enable = "avx512dq")]
6530#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
6531#[rustc_legacy_const_generics(4, 5)]
6532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6533pub fn _mm_mask_reduce_round_sd<const IMM8: i32, const SAE: i32>(
6534    src: __m128d,
6535    k: __mmask8,
6536    a: __m128d,
6537    b: __m128d,
6538) -> __m128d {
6539    unsafe {
6540        static_assert_uimm_bits!(IMM8, 8);
6541        static_assert_sae!(SAE);
6542        transmute(vreducesd(
6543            a.as_f64x2(),
6544            b.as_f64x2(),
6545            src.as_f64x2(),
6546            k,
6547            IMM8,
6548            SAE,
6549        ))
6550    }
6551}
6552
6553/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6554/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6555/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a
6556/// to the upper element of dst.
6557/// Rounding is done according to the imm8 parameter, which can be one of:
6558///
6559/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6560/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6561/// * [`_MM_FROUND_TO_POS_INF`] : round up
6562/// * [`_MM_FROUND_TO_ZERO`] : truncate
6563/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6564///
6565/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6566///
6567/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_sd&ig_expand=5446)
6568#[inline]
6569#[target_feature(enable = "avx512dq")]
6570#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
6571#[rustc_legacy_const_generics(3, 4)]
6572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6573pub fn _mm_maskz_reduce_round_sd<const IMM8: i32, const SAE: i32>(
6574    k: __mmask8,
6575    a: __m128d,
6576    b: __m128d,
6577) -> __m128d {
6578    static_assert_uimm_bits!(IMM8, 8);
6579    static_assert_sae!(SAE);
6580    _mm_mask_reduce_round_sd::<IMM8, SAE>(_mm_setzero_pd(), k, a, b)
6581}
6582
6583/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6584/// by the number of bits specified by imm8, store the result in the lower element of dst using, and
6585/// copy the upper element from a.
6586/// to the upper element of dst.
6587/// Rounding is done according to the imm8 parameter, which can be one of:
6588///
6589/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6590/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6591/// * [`_MM_FROUND_TO_POS_INF`] : round up
6592/// * [`_MM_FROUND_TO_ZERO`] : truncate
6593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6594///
6595/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_sd&ig_expand=5456)
6596#[inline]
6597#[target_feature(enable = "avx512dq")]
6598#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
6599#[rustc_legacy_const_generics(2)]
6600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6601pub fn _mm_reduce_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
6602    static_assert_uimm_bits!(IMM8, 8);
6603    _mm_mask_reduce_sd::<IMM8>(_mm_undefined_pd(), 0xff, a, b)
6604}
6605
6606/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6607/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6608/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a
6609/// to the upper element of dst.
6610/// Rounding is done according to the imm8 parameter, which can be one of:
6611///
6612/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6613/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6614/// * [`_MM_FROUND_TO_POS_INF`] : round up
6615/// * [`_MM_FROUND_TO_ZERO`] : truncate
6616/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6617///
6618/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_sd&ig_expand=5454)
6619#[inline]
6620#[target_feature(enable = "avx512dq")]
6621#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
6622#[rustc_legacy_const_generics(4)]
6623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6624pub fn _mm_mask_reduce_sd<const IMM8: i32>(
6625    src: __m128d,
6626    k: __mmask8,
6627    a: __m128d,
6628    b: __m128d,
6629) -> __m128d {
6630    unsafe {
6631        static_assert_uimm_bits!(IMM8, 8);
6632        transmute(vreducesd(
6633            a.as_f64x2(),
6634            b.as_f64x2(),
6635            src.as_f64x2(),
6636            k,
6637            IMM8,
6638            _MM_FROUND_CUR_DIRECTION,
6639        ))
6640    }
6641}
6642
6643/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6644/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6645/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a
6646/// to the upper element of dst.
6647/// Rounding is done according to the imm8 parameter, which can be one of:
6648///
6649/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6650/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6651/// * [`_MM_FROUND_TO_POS_INF`] : round up
6652/// * [`_MM_FROUND_TO_ZERO`] : truncate
6653/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6654///
6655/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_sd&ig_expand=5455)
6656#[inline]
6657#[target_feature(enable = "avx512dq")]
6658#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
6659#[rustc_legacy_const_generics(3)]
6660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6661pub fn _mm_maskz_reduce_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6662    static_assert_uimm_bits!(IMM8, 8);
6663    _mm_mask_reduce_sd::<IMM8>(_mm_setzero_pd(), k, a, b)
6664}
6665
6666/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6667/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6668/// the upper element from a.
6669/// to the upper element of dst.
6670/// Rounding is done according to the imm8 parameter, which can be one of:
6671///
6672/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6673/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6674/// * [`_MM_FROUND_TO_POS_INF`] : round up
6675/// * [`_MM_FROUND_TO_ZERO`] : truncate
6676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6677///
6678/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6679///
6680/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_ss&ig_expand=5453)
6681#[inline]
6682#[target_feature(enable = "avx512dq")]
6683#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
6684#[rustc_legacy_const_generics(2, 3)]
6685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6686pub fn _mm_reduce_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
6687    static_assert_uimm_bits!(IMM8, 8);
6688    static_assert_sae!(SAE);
6689    _mm_mask_reduce_round_ss::<IMM8, SAE>(_mm_undefined_ps(), 0xff, a, b)
6690}
6691
6692/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6693/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6694/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a.
6695/// to the upper element of dst.
6696/// Rounding is done according to the imm8 parameter, which can be one of:
6697///
6698/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6699/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6700/// * [`_MM_FROUND_TO_POS_INF`] : round up
6701/// * [`_MM_FROUND_TO_ZERO`] : truncate
6702/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6703///
6704/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6705///
6706/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_ss&ig_expand=5451)
6707#[inline]
6708#[target_feature(enable = "avx512dq")]
6709#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
6710#[rustc_legacy_const_generics(4, 5)]
6711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6712pub fn _mm_mask_reduce_round_ss<const IMM8: i32, const SAE: i32>(
6713    src: __m128,
6714    k: __mmask8,
6715    a: __m128,
6716    b: __m128,
6717) -> __m128 {
6718    unsafe {
6719        static_assert_uimm_bits!(IMM8, 8);
6720        static_assert_sae!(SAE);
6721        transmute(vreducess(
6722            a.as_f32x4(),
6723            b.as_f32x4(),
6724            src.as_f32x4(),
6725            k,
6726            IMM8,
6727            SAE,
6728        ))
6729    }
6730}
6731
6732/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6733/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6734/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a.
6735/// to the upper element of dst.
6736/// Rounding is done according to the imm8 parameter, which can be one of:
6737///
6738/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6739/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6740/// * [`_MM_FROUND_TO_POS_INF`] : round up
6741/// * [`_MM_FROUND_TO_ZERO`] : truncate
6742/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6743///
6744/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6745///
6746/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_ss&ig_expand=5452)
6747#[inline]
6748#[target_feature(enable = "avx512dq")]
6749#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
6750#[rustc_legacy_const_generics(3, 4)]
6751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6752pub fn _mm_maskz_reduce_round_ss<const IMM8: i32, const SAE: i32>(
6753    k: __mmask8,
6754    a: __m128,
6755    b: __m128,
6756) -> __m128 {
6757    static_assert_uimm_bits!(IMM8, 8);
6758    static_assert_sae!(SAE);
6759    _mm_mask_reduce_round_ss::<IMM8, SAE>(_mm_setzero_ps(), k, a, b)
6760}
6761
6762/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6763/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6764/// the upper element from a.
6765/// to the upper element of dst.
6766/// Rounding is done according to the imm8 parameter, which can be one of:
6767///
6768/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6769/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6770/// * [`_MM_FROUND_TO_POS_INF`] : round up
6771/// * [`_MM_FROUND_TO_ZERO`] : truncate
6772/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6773///
6774/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ss&ig_expand=5462)
6775#[inline]
6776#[target_feature(enable = "avx512dq")]
6777#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
6778#[rustc_legacy_const_generics(2)]
6779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6780pub fn _mm_reduce_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
6781    static_assert_uimm_bits!(IMM8, 8);
6782    _mm_mask_reduce_ss::<IMM8>(_mm_undefined_ps(), 0xff, a, b)
6783}
6784
6785/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6786/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6787/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a.
6788/// to the upper element of dst.
6789/// Rounding is done according to the imm8 parameter, which can be one of:
6790///
6791/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6792/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6793/// * [`_MM_FROUND_TO_POS_INF`] : round up
6794/// * [`_MM_FROUND_TO_ZERO`] : truncate
6795/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6796///
6797/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ss&ig_expand=5460)
6798#[inline]
6799#[target_feature(enable = "avx512dq")]
6800#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
6801#[rustc_legacy_const_generics(4)]
6802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6803pub fn _mm_mask_reduce_ss<const IMM8: i32>(
6804    src: __m128,
6805    k: __mmask8,
6806    a: __m128,
6807    b: __m128,
6808) -> __m128 {
6809    unsafe {
6810        static_assert_uimm_bits!(IMM8, 8);
6811        transmute(vreducess(
6812            a.as_f32x4(),
6813            b.as_f32x4(),
6814            src.as_f32x4(),
6815            k,
6816            IMM8,
6817            _MM_FROUND_CUR_DIRECTION,
6818        ))
6819    }
6820}
6821
6822/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6823/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6824/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a.
6825/// to the upper element of dst.
6826/// Rounding is done according to the imm8 parameter, which can be one of:
6827///
6828/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6829/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6830/// * [`_MM_FROUND_TO_POS_INF`] : round up
6831/// * [`_MM_FROUND_TO_ZERO`] : truncate
6832/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6833///
6834/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ss&ig_expand=5461)
6835#[inline]
6836#[target_feature(enable = "avx512dq")]
6837#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
6838#[rustc_legacy_const_generics(3)]
6839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6840pub fn _mm_maskz_reduce_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6841    static_assert_uimm_bits!(IMM8, 8);
6842    _mm_mask_reduce_ss::<IMM8>(_mm_setzero_ps(), k, a, b)
6843}
6844
6845// FP-Class
6846
6847/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6848/// by imm8, and store the results in mask vector k.
6849/// imm can be a combination of:
6850///
6851///     - 0x01 // QNaN
6852///     - 0x02 // Positive Zero
6853///     - 0x04 // Negative Zero
6854///     - 0x08 // Positive Infinity
6855///     - 0x10 // Negative Infinity
6856///     - 0x20 // Denormal
6857///     - 0x40 // Negative
6858///     - 0x80 // SNaN
6859///
6860/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_pd_mask&ig_expand=3493)
6861#[inline]
6862#[target_feature(enable = "avx512dq,avx512vl")]
6863#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6864#[rustc_legacy_const_generics(1)]
6865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6866pub fn _mm_fpclass_pd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
6867    static_assert_uimm_bits!(IMM8, 8);
6868    _mm_mask_fpclass_pd_mask::<IMM8>(0xff, a)
6869}
6870
6871/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6872/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6873/// corresponding mask bit is not set).
6874/// imm can be a combination of:
6875///
6876///     - 0x01 // QNaN
6877///     - 0x02 // Positive Zero
6878///     - 0x04 // Negative Zero
6879///     - 0x08 // Positive Infinity
6880///     - 0x10 // Negative Infinity
6881///     - 0x20 // Denormal
6882///     - 0x40 // Negative
6883///     - 0x80 // SNaN
6884///
6885/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_pd_mask&ig_expand=3494)
6886#[inline]
6887#[target_feature(enable = "avx512dq,avx512vl")]
6888#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6889#[rustc_legacy_const_generics(2)]
6890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6891pub fn _mm_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 {
6892    unsafe {
6893        static_assert_uimm_bits!(IMM8, 8);
6894        transmute(vfpclasspd_128(a.as_f64x2(), IMM8, k1))
6895    }
6896}
6897
6898/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6899/// by imm8, and store the results in mask vector k.
6900/// imm can be a combination of:
6901///
6902///     - 0x01 // QNaN
6903///     - 0x02 // Positive Zero
6904///     - 0x04 // Negative Zero
6905///     - 0x08 // Positive Infinity
6906///     - 0x10 // Negative Infinity
6907///     - 0x20 // Denormal
6908///     - 0x40 // Negative
6909///     - 0x80 // SNaN
6910///
6911/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_pd_mask&ig_expand=3495)
6912#[inline]
6913#[target_feature(enable = "avx512dq,avx512vl")]
6914#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6915#[rustc_legacy_const_generics(1)]
6916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6917pub fn _mm256_fpclass_pd_mask<const IMM8: i32>(a: __m256d) -> __mmask8 {
6918    static_assert_uimm_bits!(IMM8, 8);
6919    _mm256_mask_fpclass_pd_mask::<IMM8>(0xff, a)
6920}
6921
6922/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6923/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6924/// corresponding mask bit is not set).
6925/// imm can be a combination of:
6926///
6927///     - 0x01 // QNaN
6928///     - 0x02 // Positive Zero
6929///     - 0x04 // Negative Zero
6930///     - 0x08 // Positive Infinity
6931///     - 0x10 // Negative Infinity
6932///     - 0x20 // Denormal
6933///     - 0x40 // Negative
6934///     - 0x80 // SNaN
6935///
6936/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_pd_mask&ig_expand=3496)
6937#[inline]
6938#[target_feature(enable = "avx512dq,avx512vl")]
6939#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6940#[rustc_legacy_const_generics(2)]
6941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6942pub fn _mm256_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d) -> __mmask8 {
6943    unsafe {
6944        static_assert_uimm_bits!(IMM8, 8);
6945        transmute(vfpclasspd_256(a.as_f64x4(), IMM8, k1))
6946    }
6947}
6948
6949/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6950/// by imm8, and store the results in mask vector k.
6951/// imm can be a combination of:
6952///
6953///     - 0x01 // QNaN
6954///     - 0x02 // Positive Zero
6955///     - 0x04 // Negative Zero
6956///     - 0x08 // Positive Infinity
6957///     - 0x10 // Negative Infinity
6958///     - 0x20 // Denormal
6959///     - 0x40 // Negative
6960///     - 0x80 // SNaN
6961///
6962/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_pd_mask&ig_expand=3497)
6963#[inline]
6964#[target_feature(enable = "avx512dq")]
6965#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6966#[rustc_legacy_const_generics(1)]
6967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6968pub fn _mm512_fpclass_pd_mask<const IMM8: i32>(a: __m512d) -> __mmask8 {
6969    static_assert_uimm_bits!(IMM8, 8);
6970    _mm512_mask_fpclass_pd_mask::<IMM8>(0xff, a)
6971}
6972
6973/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6974/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6975/// corresponding mask bit is not set).
6976/// imm can be a combination of:
6977///
6978///     - 0x01 // QNaN
6979///     - 0x02 // Positive Zero
6980///     - 0x04 // Negative Zero
6981///     - 0x08 // Positive Infinity
6982///     - 0x10 // Negative Infinity
6983///     - 0x20 // Denormal
6984///     - 0x40 // Negative
6985///     - 0x80 // SNaN
6986///
6987/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_pd_mask&ig_expand=3498)
6988#[inline]
6989#[target_feature(enable = "avx512dq")]
6990#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6991#[rustc_legacy_const_generics(2)]
6992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6993pub fn _mm512_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d) -> __mmask8 {
6994    unsafe {
6995        static_assert_uimm_bits!(IMM8, 8);
6996        transmute(vfpclasspd_512(a.as_f64x8(), IMM8, k1))
6997    }
6998}
6999
7000/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7001/// by imm8, and store the results in mask vector k.
7002/// imm can be a combination of:
7003///
7004///     - 0x01 // QNaN
7005///     - 0x02 // Positive Zero
7006///     - 0x04 // Negative Zero
7007///     - 0x08 // Positive Infinity
7008///     - 0x10 // Negative Infinity
7009///     - 0x20 // Denormal
7010///     - 0x40 // Negative
7011///     - 0x80 // SNaN
7012///
7013/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ps_mask&ig_expand=3505)
7014#[inline]
7015#[target_feature(enable = "avx512dq,avx512vl")]
7016#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7017#[rustc_legacy_const_generics(1)]
7018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7019pub fn _mm_fpclass_ps_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
7020    static_assert_uimm_bits!(IMM8, 8);
7021    _mm_mask_fpclass_ps_mask::<IMM8>(0xff, a)
7022}
7023
7024/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7025/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7026/// corresponding mask bit is not set).
7027/// imm can be a combination of:
7028///
7029///     - 0x01 // QNaN
7030///     - 0x02 // Positive Zero
7031///     - 0x04 // Negative Zero
7032///     - 0x08 // Positive Infinity
7033///     - 0x10 // Negative Infinity
7034///     - 0x20 // Denormal
7035///     - 0x40 // Negative
7036///     - 0x80 // SNaN
7037///
7038/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ps_mask&ig_expand=3506)
7039#[inline]
7040#[target_feature(enable = "avx512dq,avx512vl")]
7041#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7042#[rustc_legacy_const_generics(2)]
7043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7044pub fn _mm_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 {
7045    unsafe {
7046        static_assert_uimm_bits!(IMM8, 8);
7047        transmute(vfpclassps_128(a.as_f32x4(), IMM8, k1))
7048    }
7049}
7050
7051/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7052/// by imm8, and store the results in mask vector k.
7053/// imm can be a combination of:
7054///
7055///     - 0x01 // QNaN
7056///     - 0x02 // Positive Zero
7057///     - 0x04 // Negative Zero
7058///     - 0x08 // Positive Infinity
7059///     - 0x10 // Negative Infinity
7060///     - 0x20 // Denormal
7061///     - 0x40 // Negative
7062///     - 0x80 // SNaN
7063///
7064/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_ps_mask&ig_expand=3507)
7065#[inline]
7066#[target_feature(enable = "avx512dq,avx512vl")]
7067#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7068#[rustc_legacy_const_generics(1)]
7069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7070pub fn _mm256_fpclass_ps_mask<const IMM8: i32>(a: __m256) -> __mmask8 {
7071    static_assert_uimm_bits!(IMM8, 8);
7072    _mm256_mask_fpclass_ps_mask::<IMM8>(0xff, a)
7073}
7074
7075/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7076/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7077/// corresponding mask bit is not set).
7078/// imm can be a combination of:
7079///
7080///     - 0x01 // QNaN
7081///     - 0x02 // Positive Zero
7082///     - 0x04 // Negative Zero
7083///     - 0x08 // Positive Infinity
7084///     - 0x10 // Negative Infinity
7085///     - 0x20 // Denormal
7086///     - 0x40 // Negative
7087///     - 0x80 // SNaN
7088///
7089/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_ps_mask&ig_expand=3508)
7090#[inline]
7091#[target_feature(enable = "avx512dq,avx512vl")]
7092#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7093#[rustc_legacy_const_generics(2)]
7094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7095pub fn _mm256_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256) -> __mmask8 {
7096    unsafe {
7097        static_assert_uimm_bits!(IMM8, 8);
7098        transmute(vfpclassps_256(a.as_f32x8(), IMM8, k1))
7099    }
7100}
7101
7102/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7103/// by imm8, and store the results in mask vector k.
7104/// imm can be a combination of:
7105///
7106///     - 0x01 // QNaN
7107///     - 0x02 // Positive Zero
7108///     - 0x04 // Negative Zero
7109///     - 0x08 // Positive Infinity
7110///     - 0x10 // Negative Infinity
7111///     - 0x20 // Denormal
7112///     - 0x40 // Negative
7113///     - 0x80 // SNaN
7114///
7115/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_ps_mask&ig_expand=3509)
7116#[inline]
7117#[target_feature(enable = "avx512dq")]
7118#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7119#[rustc_legacy_const_generics(1)]
7120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7121pub fn _mm512_fpclass_ps_mask<const IMM8: i32>(a: __m512) -> __mmask16 {
7122    static_assert_uimm_bits!(IMM8, 8);
7123    _mm512_mask_fpclass_ps_mask::<IMM8>(0xffff, a)
7124}
7125
7126/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7127/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7128/// corresponding mask bit is not set).
7129/// imm can be a combination of:
7130///
7131///     - 0x01 // QNaN
7132///     - 0x02 // Positive Zero
7133///     - 0x04 // Negative Zero
7134///     - 0x08 // Positive Infinity
7135///     - 0x10 // Negative Infinity
7136///     - 0x20 // Denormal
7137///     - 0x40 // Negative
7138///     - 0x80 // SNaN
7139///
7140/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_ps_mask&ig_expand=3510)
7141#[inline]
7142#[target_feature(enable = "avx512dq")]
7143#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7144#[rustc_legacy_const_generics(2)]
7145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7146pub fn _mm512_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512) -> __mmask16 {
7147    unsafe {
7148        static_assert_uimm_bits!(IMM8, 8);
7149        transmute(vfpclassps_512(a.as_f32x16(), IMM8, k1))
7150    }
7151}
7152
7153/// Test the lower double-precision (64-bit) floating-point element in a for special categories specified
7154/// by imm8, and store the results in mask vector k.
7155/// imm can be a combination of:
7156///
7157///     - 0x01 // QNaN
7158///     - 0x02 // Positive Zero
7159///     - 0x04 // Negative Zero
7160///     - 0x08 // Positive Infinity
7161///     - 0x10 // Negative Infinity
7162///     - 0x20 // Denormal
7163///     - 0x40 // Negative
7164///     - 0x80 // SNaN
7165///
7166/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_sd_mask&ig_expand=3511)
7167#[inline]
7168#[target_feature(enable = "avx512dq")]
7169#[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))]
7170#[rustc_legacy_const_generics(1)]
7171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7172pub fn _mm_fpclass_sd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
7173    static_assert_uimm_bits!(IMM8, 8);
7174    _mm_mask_fpclass_sd_mask::<IMM8>(0xff, a)
7175}
7176
7177/// Test the lower double-precision (64-bit) floating-point element in a for special categories specified
7178/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7179/// corresponding mask bit is not set).
7180/// imm can be a combination of:
7181///
7182///     - 0x01 // QNaN
7183///     - 0x02 // Positive Zero
7184///     - 0x04 // Negative Zero
7185///     - 0x08 // Positive Infinity
7186///     - 0x10 // Negative Infinity
7187///     - 0x20 // Denormal
7188///     - 0x40 // Negative
7189///     - 0x80 // SNaN
7190///
7191/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_sd_mask&ig_expand=3512)
7192#[inline]
7193#[target_feature(enable = "avx512dq")]
7194#[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))]
7195#[rustc_legacy_const_generics(2)]
7196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7197pub fn _mm_mask_fpclass_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 {
7198    unsafe {
7199        static_assert_uimm_bits!(IMM8, 8);
7200        vfpclasssd(a.as_f64x2(), IMM8, k1)
7201    }
7202}
7203
7204/// Test the lower single-precision (32-bit) floating-point element in a for special categories specified
7205/// by imm8, and store the results in mask vector k.
7206/// imm can be a combination of:
7207///
7208///     - 0x01 // QNaN
7209///     - 0x02 // Positive Zero
7210///     - 0x04 // Negative Zero
7211///     - 0x08 // Positive Infinity
7212///     - 0x10 // Negative Infinity
7213///     - 0x20 // Denormal
7214///     - 0x40 // Negative
7215///     - 0x80 // SNaN
7216///
7217/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ss_mask&ig_expand=3515)
7218#[inline]
7219#[target_feature(enable = "avx512dq")]
7220#[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))]
7221#[rustc_legacy_const_generics(1)]
7222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7223pub fn _mm_fpclass_ss_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
7224    static_assert_uimm_bits!(IMM8, 8);
7225    _mm_mask_fpclass_ss_mask::<IMM8>(0xff, a)
7226}
7227
7228/// Test the lower single-precision (32-bit) floating-point element in a for special categories specified
7229/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7230/// corresponding mask bit is not set).
7231/// imm can be a combination of:
7232///
7233///     - 0x01 // QNaN
7234///     - 0x02 // Positive Zero
7235///     - 0x04 // Negative Zero
7236///     - 0x08 // Positive Infinity
7237///     - 0x10 // Negative Infinity
7238///     - 0x20 // Denormal
7239///     - 0x40 // Negative
7240///     - 0x80 // SNaN
7241///
7242/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ss_mask&ig_expand=3516)
7243#[inline]
7244#[target_feature(enable = "avx512dq")]
7245#[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))]
7246#[rustc_legacy_const_generics(2)]
7247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7248pub fn _mm_mask_fpclass_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 {
7249    unsafe {
7250        static_assert_uimm_bits!(IMM8, 8);
7251        vfpclassss(a.as_f32x4(), IMM8, k1)
7252    }
7253}
7254
7255#[allow(improper_ctypes)]
7256unsafe extern "C" {
7257    #[link_name = "llvm.x86.avx512.sitofp.round.v2f64.v2i64"]
7258    fn vcvtqq2pd_128(a: i64x2, rounding: i32) -> f64x2;
7259    #[link_name = "llvm.x86.avx512.sitofp.round.v4f64.v4i64"]
7260    fn vcvtqq2pd_256(a: i64x4, rounding: i32) -> f64x4;
7261    #[link_name = "llvm.x86.avx512.sitofp.round.v8f64.v8i64"]
7262    fn vcvtqq2pd_512(a: i64x8, rounding: i32) -> f64x8;
7263
7264    #[link_name = "llvm.x86.avx512.mask.cvtqq2ps.128"]
7265    fn vcvtqq2ps_128(a: i64x2, src: f32x4, k: __mmask8) -> f32x4;
7266    #[link_name = "llvm.x86.avx512.sitofp.round.v4f32.v4i64"]
7267    fn vcvtqq2ps_256(a: i64x4, rounding: i32) -> f32x4;
7268    #[link_name = "llvm.x86.avx512.sitofp.round.v8f32.v8i64"]
7269    fn vcvtqq2ps_512(a: i64x8, rounding: i32) -> f32x8;
7270
7271    #[link_name = "llvm.x86.avx512.uitofp.round.v2f64.v2i64"]
7272    fn vcvtuqq2pd_128(a: u64x2, rounding: i32) -> f64x2;
7273    #[link_name = "llvm.x86.avx512.uitofp.round.v4f64.v4i64"]
7274    fn vcvtuqq2pd_256(a: u64x4, rounding: i32) -> f64x4;
7275    #[link_name = "llvm.x86.avx512.uitofp.round.v8f64.v8i64"]
7276    fn vcvtuqq2pd_512(a: u64x8, rounding: i32) -> f64x8;
7277
7278    #[link_name = "llvm.x86.avx512.mask.cvtuqq2ps.128"]
7279    fn vcvtuqq2ps_128(a: u64x2, src: f32x4, k: __mmask8) -> f32x4;
7280    #[link_name = "llvm.x86.avx512.uitofp.round.v4f32.v4i64"]
7281    fn vcvtuqq2ps_256(a: u64x4, rounding: i32) -> f32x4;
7282    #[link_name = "llvm.x86.avx512.uitofp.round.v8f32.v8i64"]
7283    fn vcvtuqq2ps_512(a: u64x8, rounding: i32) -> f32x8;
7284
7285    #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.128"]
7286    fn vcvtpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2;
7287    #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.256"]
7288    fn vcvtpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4;
7289    #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.512"]
7290    fn vcvtpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
7291
7292    #[link_name = "llvm.x86.avx512.mask.cvtps2qq.128"]
7293    fn vcvtps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2;
7294    #[link_name = "llvm.x86.avx512.mask.cvtps2qq.256"]
7295    fn vcvtps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4;
7296    #[link_name = "llvm.x86.avx512.mask.cvtps2qq.512"]
7297    fn vcvtps2qq_512(a: f32x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
7298
7299    #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.128"]
7300    fn vcvtpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2;
7301    #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.256"]
7302    fn vcvtpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4;
7303    #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.512"]
7304    fn vcvtpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
7305
7306    #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.128"]
7307    fn vcvtps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2;
7308    #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.256"]
7309    fn vcvtps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4;
7310    #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.512"]
7311    fn vcvtps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
7312
7313    #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.128"]
7314    fn vcvttpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2;
7315    #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.256"]
7316    fn vcvttpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4;
7317    #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.512"]
7318    fn vcvttpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
7319
7320    #[link_name = "llvm.x86.avx512.mask.cvttps2qq.128"]
7321    fn vcvttps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2;
7322    #[link_name = "llvm.x86.avx512.mask.cvttps2qq.256"]
7323    fn vcvttps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4;
7324    #[link_name = "llvm.x86.avx512.mask.cvttps2qq.512"]
7325    fn vcvttps2qq_512(a: f32x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
7326
7327    #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.128"]
7328    fn vcvttpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2;
7329    #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.256"]
7330    fn vcvttpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4;
7331    #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.512"]
7332    fn vcvttpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
7333
7334    #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.128"]
7335    fn vcvttps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2;
7336    #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.256"]
7337    fn vcvttps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4;
7338    #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.512"]
7339    fn vcvttps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
7340
7341    #[link_name = "llvm.x86.avx512.mask.range.pd.128"]
7342    fn vrangepd_128(a: f64x2, b: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2;
7343    #[link_name = "llvm.x86.avx512.mask.range.pd.256"]
7344    fn vrangepd_256(a: f64x4, b: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4;
7345    #[link_name = "llvm.x86.avx512.mask.range.pd.512"]
7346    fn vrangepd_512(a: f64x8, b: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8;
7347
7348    #[link_name = "llvm.x86.avx512.mask.range.ps.128"]
7349    fn vrangeps_128(a: f32x4, b: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4;
7350    #[link_name = "llvm.x86.avx512.mask.range.ps.256"]
7351    fn vrangeps_256(a: f32x8, b: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8;
7352    #[link_name = "llvm.x86.avx512.mask.range.ps.512"]
7353    fn vrangeps_512(a: f32x16, b: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32)
7354    -> f32x16;
7355
7356    #[link_name = "llvm.x86.avx512.mask.range.sd"]
7357    fn vrangesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2;
7358    #[link_name = "llvm.x86.avx512.mask.range.ss"]
7359    fn vrangess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4;
7360
7361    #[link_name = "llvm.x86.avx512.mask.reduce.pd.128"]
7362    fn vreducepd_128(a: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2;
7363    #[link_name = "llvm.x86.avx512.mask.reduce.pd.256"]
7364    fn vreducepd_256(a: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4;
7365    #[link_name = "llvm.x86.avx512.mask.reduce.pd.512"]
7366    fn vreducepd_512(a: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8;
7367
7368    #[link_name = "llvm.x86.avx512.mask.reduce.ps.128"]
7369    fn vreduceps_128(a: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4;
7370    #[link_name = "llvm.x86.avx512.mask.reduce.ps.256"]
7371    fn vreduceps_256(a: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8;
7372    #[link_name = "llvm.x86.avx512.mask.reduce.ps.512"]
7373    fn vreduceps_512(a: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32) -> f32x16;
7374
7375    #[link_name = "llvm.x86.avx512.mask.reduce.sd"]
7376    fn vreducesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2;
7377    #[link_name = "llvm.x86.avx512.mask.reduce.ss"]
7378    fn vreducess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4;
7379
7380    #[link_name = "llvm.x86.avx512.mask.fpclass.pd.128"]
7381    fn vfpclasspd_128(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8;
7382    #[link_name = "llvm.x86.avx512.mask.fpclass.pd.256"]
7383    fn vfpclasspd_256(a: f64x4, imm8: i32, k: __mmask8) -> __mmask8;
7384    #[link_name = "llvm.x86.avx512.mask.fpclass.pd.512"]
7385    fn vfpclasspd_512(a: f64x8, imm8: i32, k: __mmask8) -> __mmask8;
7386
7387    #[link_name = "llvm.x86.avx512.mask.fpclass.ps.128"]
7388    fn vfpclassps_128(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8;
7389    #[link_name = "llvm.x86.avx512.mask.fpclass.ps.256"]
7390    fn vfpclassps_256(a: f32x8, imm8: i32, k: __mmask8) -> __mmask8;
7391    #[link_name = "llvm.x86.avx512.mask.fpclass.ps.512"]
7392    fn vfpclassps_512(a: f32x16, imm8: i32, k: __mmask16) -> __mmask16;
7393
7394    #[link_name = "llvm.x86.avx512.mask.fpclass.sd"]
7395    fn vfpclasssd(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8;
7396    #[link_name = "llvm.x86.avx512.mask.fpclass.ss"]
7397    fn vfpclassss(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8;
7398}
7399
7400#[cfg(test)]
7401mod tests {
7402    use super::*;
7403    use crate::core_arch::assert_eq_const as assert_eq;
7404
7405    use stdarch_test::simd_test;
7406
7407    use crate::core_arch::x86::*;
7408    use crate::mem::transmute;
7409
7410    const OPRND1_64: f64 = unsafe { transmute(0x3333333333333333_u64) };
7411    const OPRND2_64: f64 = unsafe { transmute(0x5555555555555555_u64) };
7412
7413    const AND_64: f64 = unsafe { transmute(0x1111111111111111_u64) };
7414    const ANDN_64: f64 = unsafe { transmute(0x4444444444444444_u64) };
7415    const OR_64: f64 = unsafe { transmute(0x7777777777777777_u64) };
7416    const XOR_64: f64 = unsafe { transmute(0x6666666666666666_u64) };
7417
7418    const OPRND1_32: f32 = unsafe { transmute(0x33333333_u32) };
7419    const OPRND2_32: f32 = unsafe { transmute(0x55555555_u32) };
7420
7421    const AND_32: f32 = unsafe { transmute(0x11111111_u32) };
7422    const ANDN_32: f32 = unsafe { transmute(0x44444444_u32) };
7423    const OR_32: f32 = unsafe { transmute(0x77777777_u32) };
7424    const XOR_32: f32 = unsafe { transmute(0x66666666_u32) };
7425
7426    #[simd_test(enable = "avx512dq,avx512vl")]
7427    const unsafe fn test_mm_mask_and_pd() {
7428        let a = _mm_set1_pd(OPRND1_64);
7429        let b = _mm_set1_pd(OPRND2_64);
7430        let src = _mm_set_pd(1., 2.);
7431        let r = _mm_mask_and_pd(src, 0b01, a, b);
7432        let e = _mm_set_pd(1., AND_64);
7433        assert_eq_m128d(r, e);
7434    }
7435
7436    #[simd_test(enable = "avx512dq,avx512vl")]
7437    const unsafe fn test_mm_maskz_and_pd() {
7438        let a = _mm_set1_pd(OPRND1_64);
7439        let b = _mm_set1_pd(OPRND2_64);
7440        let r = _mm_maskz_and_pd(0b01, a, b);
7441        let e = _mm_set_pd(0.0, AND_64);
7442        assert_eq_m128d(r, e);
7443    }
7444
7445    #[simd_test(enable = "avx512dq,avx512vl")]
7446    const unsafe fn test_mm256_mask_and_pd() {
7447        let a = _mm256_set1_pd(OPRND1_64);
7448        let b = _mm256_set1_pd(OPRND2_64);
7449        let src = _mm256_set_pd(1., 2., 3., 4.);
7450        let r = _mm256_mask_and_pd(src, 0b0101, a, b);
7451        let e = _mm256_set_pd(1., AND_64, 3., AND_64);
7452        assert_eq_m256d(r, e);
7453    }
7454
7455    #[simd_test(enable = "avx512dq,avx512vl")]
7456    const unsafe fn test_mm256_maskz_and_pd() {
7457        let a = _mm256_set1_pd(OPRND1_64);
7458        let b = _mm256_set1_pd(OPRND2_64);
7459        let r = _mm256_maskz_and_pd(0b0101, a, b);
7460        let e = _mm256_set_pd(0.0, AND_64, 0.0, AND_64);
7461        assert_eq_m256d(r, e);
7462    }
7463
7464    #[simd_test(enable = "avx512dq")]
7465    const unsafe fn test_mm512_and_pd() {
7466        let a = _mm512_set1_pd(OPRND1_64);
7467        let b = _mm512_set1_pd(OPRND2_64);
7468        let r = _mm512_and_pd(a, b);
7469        let e = _mm512_set1_pd(AND_64);
7470        assert_eq_m512d(r, e);
7471    }
7472
7473    #[simd_test(enable = "avx512dq")]
7474    const unsafe fn test_mm512_mask_and_pd() {
7475        let a = _mm512_set1_pd(OPRND1_64);
7476        let b = _mm512_set1_pd(OPRND2_64);
7477        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7478        let r = _mm512_mask_and_pd(src, 0b01010101, a, b);
7479        let e = _mm512_set_pd(1., AND_64, 3., AND_64, 5., AND_64, 7., AND_64);
7480        assert_eq_m512d(r, e);
7481    }
7482
7483    #[simd_test(enable = "avx512dq")]
7484    const unsafe fn test_mm512_maskz_and_pd() {
7485        let a = _mm512_set1_pd(OPRND1_64);
7486        let b = _mm512_set1_pd(OPRND2_64);
7487        let r = _mm512_maskz_and_pd(0b01010101, a, b);
7488        let e = _mm512_set_pd(0.0, AND_64, 0.0, AND_64, 0.0, AND_64, 0.0, AND_64);
7489        assert_eq_m512d(r, e);
7490    }
7491
7492    #[simd_test(enable = "avx512dq,avx512vl")]
7493    const unsafe fn test_mm_mask_and_ps() {
7494        let a = _mm_set1_ps(OPRND1_32);
7495        let b = _mm_set1_ps(OPRND2_32);
7496        let src = _mm_set_ps(1., 2., 3., 4.);
7497        let r = _mm_mask_and_ps(src, 0b0101, a, b);
7498        let e = _mm_set_ps(1., AND_32, 3., AND_32);
7499        assert_eq_m128(r, e);
7500    }
7501
7502    #[simd_test(enable = "avx512dq,avx512vl")]
7503    const unsafe fn test_mm_maskz_and_ps() {
7504        let a = _mm_set1_ps(OPRND1_32);
7505        let b = _mm_set1_ps(OPRND2_32);
7506        let r = _mm_maskz_and_ps(0b0101, a, b);
7507        let e = _mm_set_ps(0.0, AND_32, 0.0, AND_32);
7508        assert_eq_m128(r, e);
7509    }
7510
7511    #[simd_test(enable = "avx512dq,avx512vl")]
7512    const unsafe fn test_mm256_mask_and_ps() {
7513        let a = _mm256_set1_ps(OPRND1_32);
7514        let b = _mm256_set1_ps(OPRND2_32);
7515        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7516        let r = _mm256_mask_and_ps(src, 0b01010101, a, b);
7517        let e = _mm256_set_ps(1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32);
7518        assert_eq_m256(r, e);
7519    }
7520
7521    #[simd_test(enable = "avx512dq,avx512vl")]
7522    const unsafe fn test_mm256_maskz_and_ps() {
7523        let a = _mm256_set1_ps(OPRND1_32);
7524        let b = _mm256_set1_ps(OPRND2_32);
7525        let r = _mm256_maskz_and_ps(0b01010101, a, b);
7526        let e = _mm256_set_ps(0.0, AND_32, 0.0, AND_32, 0.0, AND_32, 0.0, AND_32);
7527        assert_eq_m256(r, e);
7528    }
7529
7530    #[simd_test(enable = "avx512dq")]
7531    const unsafe fn test_mm512_and_ps() {
7532        let a = _mm512_set1_ps(OPRND1_32);
7533        let b = _mm512_set1_ps(OPRND2_32);
7534        let r = _mm512_and_ps(a, b);
7535        let e = _mm512_set1_ps(AND_32);
7536        assert_eq_m512(r, e);
7537    }
7538
7539    #[simd_test(enable = "avx512dq")]
7540    const unsafe fn test_mm512_mask_and_ps() {
7541        let a = _mm512_set1_ps(OPRND1_32);
7542        let b = _mm512_set1_ps(OPRND2_32);
7543        let src = _mm512_set_ps(
7544            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7545        );
7546        let r = _mm512_mask_and_ps(src, 0b0101010101010101, a, b);
7547        let e = _mm512_set_ps(
7548            1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32, 9., AND_32, 11., AND_32, 13., AND_32,
7549            15., AND_32,
7550        );
7551        assert_eq_m512(r, e);
7552    }
7553
7554    #[simd_test(enable = "avx512dq")]
7555    const unsafe fn test_mm512_maskz_and_ps() {
7556        let a = _mm512_set1_ps(OPRND1_32);
7557        let b = _mm512_set1_ps(OPRND2_32);
7558        let r = _mm512_maskz_and_ps(0b0101010101010101, a, b);
7559        let e = _mm512_set_ps(
7560            0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0.,
7561            AND_32,
7562        );
7563        assert_eq_m512(r, e);
7564    }
7565
7566    #[simd_test(enable = "avx512dq,avx512vl")]
7567    const unsafe fn test_mm_mask_andnot_pd() {
7568        let a = _mm_set1_pd(OPRND1_64);
7569        let b = _mm_set1_pd(OPRND2_64);
7570        let src = _mm_set_pd(1., 2.);
7571        let r = _mm_mask_andnot_pd(src, 0b01, a, b);
7572        let e = _mm_set_pd(1., ANDN_64);
7573        assert_eq_m128d(r, e);
7574    }
7575
7576    #[simd_test(enable = "avx512dq,avx512vl")]
7577    const unsafe fn test_mm_maskz_andnot_pd() {
7578        let a = _mm_set1_pd(OPRND1_64);
7579        let b = _mm_set1_pd(OPRND2_64);
7580        let r = _mm_maskz_andnot_pd(0b01, a, b);
7581        let e = _mm_set_pd(0.0, ANDN_64);
7582        assert_eq_m128d(r, e);
7583    }
7584
7585    #[simd_test(enable = "avx512dq,avx512vl")]
7586    const unsafe fn test_mm256_mask_andnot_pd() {
7587        let a = _mm256_set1_pd(OPRND1_64);
7588        let b = _mm256_set1_pd(OPRND2_64);
7589        let src = _mm256_set_pd(1., 2., 3., 4.);
7590        let r = _mm256_mask_andnot_pd(src, 0b0101, a, b);
7591        let e = _mm256_set_pd(1., ANDN_64, 3., ANDN_64);
7592        assert_eq_m256d(r, e);
7593    }
7594
7595    #[simd_test(enable = "avx512dq,avx512vl")]
7596    const unsafe fn test_mm256_maskz_andnot_pd() {
7597        let a = _mm256_set1_pd(OPRND1_64);
7598        let b = _mm256_set1_pd(OPRND2_64);
7599        let r = _mm256_maskz_andnot_pd(0b0101, a, b);
7600        let e = _mm256_set_pd(0.0, ANDN_64, 0.0, ANDN_64);
7601        assert_eq_m256d(r, e);
7602    }
7603
7604    #[simd_test(enable = "avx512dq")]
7605    const unsafe fn test_mm512_andnot_pd() {
7606        let a = _mm512_set1_pd(OPRND1_64);
7607        let b = _mm512_set1_pd(OPRND2_64);
7608        let r = _mm512_andnot_pd(a, b);
7609        let e = _mm512_set1_pd(ANDN_64);
7610        assert_eq_m512d(r, e);
7611    }
7612
7613    #[simd_test(enable = "avx512dq")]
7614    const unsafe fn test_mm512_mask_andnot_pd() {
7615        let a = _mm512_set1_pd(OPRND1_64);
7616        let b = _mm512_set1_pd(OPRND2_64);
7617        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7618        let r = _mm512_mask_andnot_pd(src, 0b01010101, a, b);
7619        let e = _mm512_set_pd(1., ANDN_64, 3., ANDN_64, 5., ANDN_64, 7., ANDN_64);
7620        assert_eq_m512d(r, e);
7621    }
7622
7623    #[simd_test(enable = "avx512dq")]
7624    const unsafe fn test_mm512_maskz_andnot_pd() {
7625        let a = _mm512_set1_pd(OPRND1_64);
7626        let b = _mm512_set1_pd(OPRND2_64);
7627        let r = _mm512_maskz_andnot_pd(0b01010101, a, b);
7628        let e = _mm512_set_pd(0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64);
7629        assert_eq_m512d(r, e);
7630    }
7631
7632    #[simd_test(enable = "avx512dq,avx512vl")]
7633    const unsafe fn test_mm_mask_andnot_ps() {
7634        let a = _mm_set1_ps(OPRND1_32);
7635        let b = _mm_set1_ps(OPRND2_32);
7636        let src = _mm_set_ps(1., 2., 3., 4.);
7637        let r = _mm_mask_andnot_ps(src, 0b0101, a, b);
7638        let e = _mm_set_ps(1., ANDN_32, 3., ANDN_32);
7639        assert_eq_m128(r, e);
7640    }
7641
7642    #[simd_test(enable = "avx512dq,avx512vl")]
7643    const unsafe fn test_mm_maskz_andnot_ps() {
7644        let a = _mm_set1_ps(OPRND1_32);
7645        let b = _mm_set1_ps(OPRND2_32);
7646        let r = _mm_maskz_andnot_ps(0b0101, a, b);
7647        let e = _mm_set_ps(0.0, ANDN_32, 0.0, ANDN_32);
7648        assert_eq_m128(r, e);
7649    }
7650
7651    #[simd_test(enable = "avx512dq,avx512vl")]
7652    const unsafe fn test_mm256_mask_andnot_ps() {
7653        let a = _mm256_set1_ps(OPRND1_32);
7654        let b = _mm256_set1_ps(OPRND2_32);
7655        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7656        let r = _mm256_mask_andnot_ps(src, 0b01010101, a, b);
7657        let e = _mm256_set_ps(1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32);
7658        assert_eq_m256(r, e);
7659    }
7660
7661    #[simd_test(enable = "avx512dq,avx512vl")]
7662    const unsafe fn test_mm256_maskz_andnot_ps() {
7663        let a = _mm256_set1_ps(OPRND1_32);
7664        let b = _mm256_set1_ps(OPRND2_32);
7665        let r = _mm256_maskz_andnot_ps(0b01010101, a, b);
7666        let e = _mm256_set_ps(0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32);
7667        assert_eq_m256(r, e);
7668    }
7669
7670    #[simd_test(enable = "avx512dq")]
7671    const unsafe fn test_mm512_andnot_ps() {
7672        let a = _mm512_set1_ps(OPRND1_32);
7673        let b = _mm512_set1_ps(OPRND2_32);
7674        let r = _mm512_andnot_ps(a, b);
7675        let e = _mm512_set1_ps(ANDN_32);
7676        assert_eq_m512(r, e);
7677    }
7678
7679    #[simd_test(enable = "avx512dq")]
7680    const unsafe fn test_mm512_mask_andnot_ps() {
7681        let a = _mm512_set1_ps(OPRND1_32);
7682        let b = _mm512_set1_ps(OPRND2_32);
7683        let src = _mm512_set_ps(
7684            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7685        );
7686        let r = _mm512_mask_andnot_ps(src, 0b0101010101010101, a, b);
7687        let e = _mm512_set_ps(
7688            1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32, 9., ANDN_32, 11., ANDN_32, 13.,
7689            ANDN_32, 15., ANDN_32,
7690        );
7691        assert_eq_m512(r, e);
7692    }
7693
7694    #[simd_test(enable = "avx512dq")]
7695    const unsafe fn test_mm512_maskz_andnot_ps() {
7696        let a = _mm512_set1_ps(OPRND1_32);
7697        let b = _mm512_set1_ps(OPRND2_32);
7698        let r = _mm512_maskz_andnot_ps(0b0101010101010101, a, b);
7699        let e = _mm512_set_ps(
7700            0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0.,
7701            ANDN_32, 0., ANDN_32,
7702        );
7703        assert_eq_m512(r, e);
7704    }
7705
7706    #[simd_test(enable = "avx512dq,avx512vl")]
7707    const unsafe fn test_mm_mask_or_pd() {
7708        let a = _mm_set1_pd(OPRND1_64);
7709        let b = _mm_set1_pd(OPRND2_64);
7710        let src = _mm_set_pd(1., 2.);
7711        let r = _mm_mask_or_pd(src, 0b01, a, b);
7712        let e = _mm_set_pd(1., OR_64);
7713        assert_eq_m128d(r, e);
7714    }
7715
7716    #[simd_test(enable = "avx512dq,avx512vl")]
7717    const unsafe fn test_mm_maskz_or_pd() {
7718        let a = _mm_set1_pd(OPRND1_64);
7719        let b = _mm_set1_pd(OPRND2_64);
7720        let r = _mm_maskz_or_pd(0b01, a, b);
7721        let e = _mm_set_pd(0.0, OR_64);
7722        assert_eq_m128d(r, e);
7723    }
7724
7725    #[simd_test(enable = "avx512dq,avx512vl")]
7726    const unsafe fn test_mm256_mask_or_pd() {
7727        let a = _mm256_set1_pd(OPRND1_64);
7728        let b = _mm256_set1_pd(OPRND2_64);
7729        let src = _mm256_set_pd(1., 2., 3., 4.);
7730        let r = _mm256_mask_or_pd(src, 0b0101, a, b);
7731        let e = _mm256_set_pd(1., OR_64, 3., OR_64);
7732        assert_eq_m256d(r, e);
7733    }
7734
7735    #[simd_test(enable = "avx512dq,avx512vl")]
7736    const unsafe fn test_mm256_maskz_or_pd() {
7737        let a = _mm256_set1_pd(OPRND1_64);
7738        let b = _mm256_set1_pd(OPRND2_64);
7739        let r = _mm256_maskz_or_pd(0b0101, a, b);
7740        let e = _mm256_set_pd(0.0, OR_64, 0.0, OR_64);
7741        assert_eq_m256d(r, e);
7742    }
7743
7744    #[simd_test(enable = "avx512dq")]
7745    const unsafe fn test_mm512_or_pd() {
7746        let a = _mm512_set1_pd(OPRND1_64);
7747        let b = _mm512_set1_pd(OPRND2_64);
7748        let r = _mm512_or_pd(a, b);
7749        let e = _mm512_set1_pd(OR_64);
7750        assert_eq_m512d(r, e);
7751    }
7752
7753    #[simd_test(enable = "avx512dq")]
7754    const unsafe fn test_mm512_mask_or_pd() {
7755        let a = _mm512_set1_pd(OPRND1_64);
7756        let b = _mm512_set1_pd(OPRND2_64);
7757        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7758        let r = _mm512_mask_or_pd(src, 0b01010101, a, b);
7759        let e = _mm512_set_pd(1., OR_64, 3., OR_64, 5., OR_64, 7., OR_64);
7760        assert_eq_m512d(r, e);
7761    }
7762
7763    #[simd_test(enable = "avx512dq")]
7764    const unsafe fn test_mm512_maskz_or_pd() {
7765        let a = _mm512_set1_pd(OPRND1_64);
7766        let b = _mm512_set1_pd(OPRND2_64);
7767        let r = _mm512_maskz_or_pd(0b01010101, a, b);
7768        let e = _mm512_set_pd(0.0, OR_64, 0.0, OR_64, 0.0, OR_64, 0.0, OR_64);
7769        assert_eq_m512d(r, e);
7770    }
7771
7772    #[simd_test(enable = "avx512dq,avx512vl")]
7773    const unsafe fn test_mm_mask_or_ps() {
7774        let a = _mm_set1_ps(OPRND1_32);
7775        let b = _mm_set1_ps(OPRND2_32);
7776        let src = _mm_set_ps(1., 2., 3., 4.);
7777        let r = _mm_mask_or_ps(src, 0b0101, a, b);
7778        let e = _mm_set_ps(1., OR_32, 3., OR_32);
7779        assert_eq_m128(r, e);
7780    }
7781
7782    #[simd_test(enable = "avx512dq,avx512vl")]
7783    const unsafe fn test_mm_maskz_or_ps() {
7784        let a = _mm_set1_ps(OPRND1_32);
7785        let b = _mm_set1_ps(OPRND2_32);
7786        let r = _mm_maskz_or_ps(0b0101, a, b);
7787        let e = _mm_set_ps(0.0, OR_32, 0.0, OR_32);
7788        assert_eq_m128(r, e);
7789    }
7790
7791    #[simd_test(enable = "avx512dq,avx512vl")]
7792    const unsafe fn test_mm256_mask_or_ps() {
7793        let a = _mm256_set1_ps(OPRND1_32);
7794        let b = _mm256_set1_ps(OPRND2_32);
7795        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7796        let r = _mm256_mask_or_ps(src, 0b01010101, a, b);
7797        let e = _mm256_set_ps(1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32);
7798        assert_eq_m256(r, e);
7799    }
7800
7801    #[simd_test(enable = "avx512dq,avx512vl")]
7802    const unsafe fn test_mm256_maskz_or_ps() {
7803        let a = _mm256_set1_ps(OPRND1_32);
7804        let b = _mm256_set1_ps(OPRND2_32);
7805        let r = _mm256_maskz_or_ps(0b01010101, a, b);
7806        let e = _mm256_set_ps(0.0, OR_32, 0.0, OR_32, 0.0, OR_32, 0.0, OR_32);
7807        assert_eq_m256(r, e);
7808    }
7809
7810    #[simd_test(enable = "avx512dq")]
7811    const unsafe fn test_mm512_or_ps() {
7812        let a = _mm512_set1_ps(OPRND1_32);
7813        let b = _mm512_set1_ps(OPRND2_32);
7814        let r = _mm512_or_ps(a, b);
7815        let e = _mm512_set1_ps(OR_32);
7816        assert_eq_m512(r, e);
7817    }
7818
7819    #[simd_test(enable = "avx512dq")]
7820    const unsafe fn test_mm512_mask_or_ps() {
7821        let a = _mm512_set1_ps(OPRND1_32);
7822        let b = _mm512_set1_ps(OPRND2_32);
7823        let src = _mm512_set_ps(
7824            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7825        );
7826        let r = _mm512_mask_or_ps(src, 0b0101010101010101, a, b);
7827        let e = _mm512_set_ps(
7828            1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32, 9., OR_32, 11., OR_32, 13., OR_32, 15.,
7829            OR_32,
7830        );
7831        assert_eq_m512(r, e);
7832    }
7833
7834    #[simd_test(enable = "avx512dq")]
7835    const unsafe fn test_mm512_maskz_or_ps() {
7836        let a = _mm512_set1_ps(OPRND1_32);
7837        let b = _mm512_set1_ps(OPRND2_32);
7838        let r = _mm512_maskz_or_ps(0b0101010101010101, a, b);
7839        let e = _mm512_set_ps(
7840            0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32,
7841        );
7842        assert_eq_m512(r, e);
7843    }
7844
7845    #[simd_test(enable = "avx512dq,avx512vl")]
7846    const unsafe fn test_mm_mask_xor_pd() {
7847        let a = _mm_set1_pd(OPRND1_64);
7848        let b = _mm_set1_pd(OPRND2_64);
7849        let src = _mm_set_pd(1., 2.);
7850        let r = _mm_mask_xor_pd(src, 0b01, a, b);
7851        let e = _mm_set_pd(1., XOR_64);
7852        assert_eq_m128d(r, e);
7853    }
7854
7855    #[simd_test(enable = "avx512dq,avx512vl")]
7856    const unsafe fn test_mm_maskz_xor_pd() {
7857        let a = _mm_set1_pd(OPRND1_64);
7858        let b = _mm_set1_pd(OPRND2_64);
7859        let r = _mm_maskz_xor_pd(0b01, a, b);
7860        let e = _mm_set_pd(0.0, XOR_64);
7861        assert_eq_m128d(r, e);
7862    }
7863
7864    #[simd_test(enable = "avx512dq,avx512vl")]
7865    const unsafe fn test_mm256_mask_xor_pd() {
7866        let a = _mm256_set1_pd(OPRND1_64);
7867        let b = _mm256_set1_pd(OPRND2_64);
7868        let src = _mm256_set_pd(1., 2., 3., 4.);
7869        let r = _mm256_mask_xor_pd(src, 0b0101, a, b);
7870        let e = _mm256_set_pd(1., XOR_64, 3., XOR_64);
7871        assert_eq_m256d(r, e);
7872    }
7873
7874    #[simd_test(enable = "avx512dq,avx512vl")]
7875    const unsafe fn test_mm256_maskz_xor_pd() {
7876        let a = _mm256_set1_pd(OPRND1_64);
7877        let b = _mm256_set1_pd(OPRND2_64);
7878        let r = _mm256_maskz_xor_pd(0b0101, a, b);
7879        let e = _mm256_set_pd(0.0, XOR_64, 0.0, XOR_64);
7880        assert_eq_m256d(r, e);
7881    }
7882
7883    #[simd_test(enable = "avx512dq")]
7884    const unsafe fn test_mm512_xor_pd() {
7885        let a = _mm512_set1_pd(OPRND1_64);
7886        let b = _mm512_set1_pd(OPRND2_64);
7887        let r = _mm512_xor_pd(a, b);
7888        let e = _mm512_set1_pd(XOR_64);
7889        assert_eq_m512d(r, e);
7890    }
7891
7892    #[simd_test(enable = "avx512dq")]
7893    const unsafe fn test_mm512_mask_xor_pd() {
7894        let a = _mm512_set1_pd(OPRND1_64);
7895        let b = _mm512_set1_pd(OPRND2_64);
7896        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7897        let r = _mm512_mask_xor_pd(src, 0b01010101, a, b);
7898        let e = _mm512_set_pd(1., XOR_64, 3., XOR_64, 5., XOR_64, 7., XOR_64);
7899        assert_eq_m512d(r, e);
7900    }
7901
7902    #[simd_test(enable = "avx512dq")]
7903    const unsafe fn test_mm512_maskz_xor_pd() {
7904        let a = _mm512_set1_pd(OPRND1_64);
7905        let b = _mm512_set1_pd(OPRND2_64);
7906        let r = _mm512_maskz_xor_pd(0b01010101, a, b);
7907        let e = _mm512_set_pd(0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64);
7908        assert_eq_m512d(r, e);
7909    }
7910
7911    #[simd_test(enable = "avx512dq,avx512vl")]
7912    const unsafe fn test_mm_mask_xor_ps() {
7913        let a = _mm_set1_ps(OPRND1_32);
7914        let b = _mm_set1_ps(OPRND2_32);
7915        let src = _mm_set_ps(1., 2., 3., 4.);
7916        let r = _mm_mask_xor_ps(src, 0b0101, a, b);
7917        let e = _mm_set_ps(1., XOR_32, 3., XOR_32);
7918        assert_eq_m128(r, e);
7919    }
7920
7921    #[simd_test(enable = "avx512dq,avx512vl")]
7922    const unsafe fn test_mm_maskz_xor_ps() {
7923        let a = _mm_set1_ps(OPRND1_32);
7924        let b = _mm_set1_ps(OPRND2_32);
7925        let r = _mm_maskz_xor_ps(0b0101, a, b);
7926        let e = _mm_set_ps(0.0, XOR_32, 0.0, XOR_32);
7927        assert_eq_m128(r, e);
7928    }
7929
7930    #[simd_test(enable = "avx512dq,avx512vl")]
7931    const unsafe fn test_mm256_mask_xor_ps() {
7932        let a = _mm256_set1_ps(OPRND1_32);
7933        let b = _mm256_set1_ps(OPRND2_32);
7934        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7935        let r = _mm256_mask_xor_ps(src, 0b01010101, a, b);
7936        let e = _mm256_set_ps(1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32);
7937        assert_eq_m256(r, e);
7938    }
7939
7940    #[simd_test(enable = "avx512dq,avx512vl")]
7941    const unsafe fn test_mm256_maskz_xor_ps() {
7942        let a = _mm256_set1_ps(OPRND1_32);
7943        let b = _mm256_set1_ps(OPRND2_32);
7944        let r = _mm256_maskz_xor_ps(0b01010101, a, b);
7945        let e = _mm256_set_ps(0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32);
7946        assert_eq_m256(r, e);
7947    }
7948
7949    #[simd_test(enable = "avx512dq")]
7950    const unsafe fn test_mm512_xor_ps() {
7951        let a = _mm512_set1_ps(OPRND1_32);
7952        let b = _mm512_set1_ps(OPRND2_32);
7953        let r = _mm512_xor_ps(a, b);
7954        let e = _mm512_set1_ps(XOR_32);
7955        assert_eq_m512(r, e);
7956    }
7957
7958    #[simd_test(enable = "avx512dq")]
7959    const unsafe fn test_mm512_mask_xor_ps() {
7960        let a = _mm512_set1_ps(OPRND1_32);
7961        let b = _mm512_set1_ps(OPRND2_32);
7962        let src = _mm512_set_ps(
7963            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7964        );
7965        let r = _mm512_mask_xor_ps(src, 0b0101010101010101, a, b);
7966        let e = _mm512_set_ps(
7967            1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32, 9., XOR_32, 11., XOR_32, 13., XOR_32,
7968            15., XOR_32,
7969        );
7970        assert_eq_m512(r, e);
7971    }
7972
7973    #[simd_test(enable = "avx512dq")]
7974    const unsafe fn test_mm512_maskz_xor_ps() {
7975        let a = _mm512_set1_ps(OPRND1_32);
7976        let b = _mm512_set1_ps(OPRND2_32);
7977        let r = _mm512_maskz_xor_ps(0b0101010101010101, a, b);
7978        let e = _mm512_set_ps(
7979            0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0.,
7980            XOR_32,
7981        );
7982        assert_eq_m512(r, e);
7983    }
7984
7985    #[simd_test(enable = "avx512dq,avx512vl")]
7986    const unsafe fn test_mm256_broadcast_f32x2() {
7987        let a = _mm_set_ps(1., 2., 3., 4.);
7988        let r = _mm256_broadcast_f32x2(a);
7989        let e = _mm256_set_ps(3., 4., 3., 4., 3., 4., 3., 4.);
7990        assert_eq_m256(r, e);
7991    }
7992
7993    #[simd_test(enable = "avx512dq,avx512vl")]
7994    const unsafe fn test_mm256_mask_broadcast_f32x2() {
7995        let a = _mm_set_ps(1., 2., 3., 4.);
7996        let b = _mm256_set_ps(5., 6., 7., 8., 9., 10., 11., 12.);
7997        let r = _mm256_mask_broadcast_f32x2(b, 0b01101001, a);
7998        let e = _mm256_set_ps(5., 4., 3., 8., 3., 10., 11., 4.);
7999        assert_eq_m256(r, e);
8000    }
8001
8002    #[simd_test(enable = "avx512dq,avx512vl")]
8003    const unsafe fn test_mm256_maskz_broadcast_f32x2() {
8004        let a = _mm_set_ps(1., 2., 3., 4.);
8005        let r = _mm256_maskz_broadcast_f32x2(0b01101001, a);
8006        let e = _mm256_set_ps(0., 4., 3., 0., 3., 0., 0., 4.);
8007        assert_eq_m256(r, e);
8008    }
8009
8010    #[simd_test(enable = "avx512dq")]
8011    const unsafe fn test_mm512_broadcast_f32x2() {
8012        let a = _mm_set_ps(1., 2., 3., 4.);
8013        let r = _mm512_broadcast_f32x2(a);
8014        let e = _mm512_set_ps(
8015            3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4.,
8016        );
8017        assert_eq_m512(r, e);
8018    }
8019
8020    #[simd_test(enable = "avx512dq")]
8021    const unsafe fn test_mm512_mask_broadcast_f32x2() {
8022        let a = _mm_set_ps(1., 2., 3., 4.);
8023        let b = _mm512_set_ps(
8024            5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
8025        );
8026        let r = _mm512_mask_broadcast_f32x2(b, 0b0110100100111100, a);
8027        let e = _mm512_set_ps(
8028            5., 4., 3., 8., 3., 10., 11., 4., 13., 14., 3., 4., 3., 4., 19., 20.,
8029        );
8030        assert_eq_m512(r, e);
8031    }
8032
8033    #[simd_test(enable = "avx512dq")]
8034    const unsafe fn test_mm512_maskz_broadcast_f32x2() {
8035        let a = _mm_set_ps(1., 2., 3., 4.);
8036        let r = _mm512_maskz_broadcast_f32x2(0b0110100100111100, a);
8037        let e = _mm512_set_ps(
8038            0., 4., 3., 0., 3., 0., 0., 4., 0., 0., 3., 4., 3., 4., 0., 0.,
8039        );
8040        assert_eq_m512(r, e);
8041    }
8042
8043    #[simd_test(enable = "avx512dq")]
8044    const unsafe fn test_mm512_broadcast_f32x8() {
8045        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8046        let r = _mm512_broadcast_f32x8(a);
8047        let e = _mm512_set_ps(
8048            1., 2., 3., 4., 5., 6., 7., 8., 1., 2., 3., 4., 5., 6., 7., 8.,
8049        );
8050        assert_eq_m512(r, e);
8051    }
8052
8053    #[simd_test(enable = "avx512dq")]
8054    const unsafe fn test_mm512_mask_broadcast_f32x8() {
8055        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8056        let b = _mm512_set_ps(
8057            9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.,
8058        );
8059        let r = _mm512_mask_broadcast_f32x8(b, 0b0110100100111100, a);
8060        let e = _mm512_set_ps(
8061            9., 2., 3., 12., 5., 14., 15., 8., 17., 18., 3., 4., 5., 6., 23., 24.,
8062        );
8063        assert_eq_m512(r, e);
8064    }
8065
8066    #[simd_test(enable = "avx512dq")]
8067    const unsafe fn test_mm512_maskz_broadcast_f32x8() {
8068        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8069        let r = _mm512_maskz_broadcast_f32x8(0b0110100100111100, a);
8070        let e = _mm512_set_ps(
8071            0., 2., 3., 0., 5., 0., 0., 8., 0., 0., 3., 4., 5., 6., 0., 0.,
8072        );
8073        assert_eq_m512(r, e);
8074    }
8075
8076    #[simd_test(enable = "avx512dq,avx512vl")]
8077    const unsafe fn test_mm256_broadcast_f64x2() {
8078        let a = _mm_set_pd(1., 2.);
8079        let r = _mm256_broadcast_f64x2(a);
8080        let e = _mm256_set_pd(1., 2., 1., 2.);
8081        assert_eq_m256d(r, e);
8082    }
8083
8084    #[simd_test(enable = "avx512dq,avx512vl")]
8085    const unsafe fn test_mm256_mask_broadcast_f64x2() {
8086        let a = _mm_set_pd(1., 2.);
8087        let b = _mm256_set_pd(3., 4., 5., 6.);
8088        let r = _mm256_mask_broadcast_f64x2(b, 0b0110, a);
8089        let e = _mm256_set_pd(3., 2., 1., 6.);
8090        assert_eq_m256d(r, e);
8091    }
8092
8093    #[simd_test(enable = "avx512dq,avx512vl")]
8094    const unsafe fn test_mm256_maskz_broadcast_f64x2() {
8095        let a = _mm_set_pd(1., 2.);
8096        let r = _mm256_maskz_broadcast_f64x2(0b0110, a);
8097        let e = _mm256_set_pd(0., 2., 1., 0.);
8098        assert_eq_m256d(r, e);
8099    }
8100
8101    #[simd_test(enable = "avx512dq")]
8102    const unsafe fn test_mm512_broadcast_f64x2() {
8103        let a = _mm_set_pd(1., 2.);
8104        let r = _mm512_broadcast_f64x2(a);
8105        let e = _mm512_set_pd(1., 2., 1., 2., 1., 2., 1., 2.);
8106        assert_eq_m512d(r, e);
8107    }
8108
8109    #[simd_test(enable = "avx512dq")]
8110    const unsafe fn test_mm512_mask_broadcast_f64x2() {
8111        let a = _mm_set_pd(1., 2.);
8112        let b = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
8113        let r = _mm512_mask_broadcast_f64x2(b, 0b01101001, a);
8114        let e = _mm512_set_pd(3., 2., 1., 6., 1., 8., 9., 2.);
8115        assert_eq_m512d(r, e);
8116    }
8117
8118    #[simd_test(enable = "avx512dq")]
8119    const unsafe fn test_mm512_maskz_broadcast_f64x2() {
8120        let a = _mm_set_pd(1., 2.);
8121        let r = _mm512_maskz_broadcast_f64x2(0b01101001, a);
8122        let e = _mm512_set_pd(0., 2., 1., 0., 1., 0., 0., 2.);
8123        assert_eq_m512d(r, e);
8124    }
8125
8126    #[simd_test(enable = "avx512dq,avx512vl")]
8127    const unsafe fn test_mm_broadcast_i32x2() {
8128        let a = _mm_set_epi32(1, 2, 3, 4);
8129        let r = _mm_broadcast_i32x2(a);
8130        let e = _mm_set_epi32(3, 4, 3, 4);
8131        assert_eq_m128i(r, e);
8132    }
8133
8134    #[simd_test(enable = "avx512dq,avx512vl")]
8135    const unsafe fn test_mm_mask_broadcast_i32x2() {
8136        let a = _mm_set_epi32(1, 2, 3, 4);
8137        let b = _mm_set_epi32(5, 6, 7, 8);
8138        let r = _mm_mask_broadcast_i32x2(b, 0b0110, a);
8139        let e = _mm_set_epi32(5, 4, 3, 8);
8140        assert_eq_m128i(r, e);
8141    }
8142
8143    #[simd_test(enable = "avx512dq,avx512vl")]
8144    const unsafe fn test_mm_maskz_broadcast_i32x2() {
8145        let a = _mm_set_epi32(1, 2, 3, 4);
8146        let r = _mm_maskz_broadcast_i32x2(0b0110, a);
8147        let e = _mm_set_epi32(0, 4, 3, 0);
8148        assert_eq_m128i(r, e);
8149    }
8150
8151    #[simd_test(enable = "avx512dq,avx512vl")]
8152    const unsafe fn test_mm256_broadcast_i32x2() {
8153        let a = _mm_set_epi32(1, 2, 3, 4);
8154        let r = _mm256_broadcast_i32x2(a);
8155        let e = _mm256_set_epi32(3, 4, 3, 4, 3, 4, 3, 4);
8156        assert_eq_m256i(r, e);
8157    }
8158
8159    #[simd_test(enable = "avx512dq,avx512vl")]
8160    const unsafe fn test_mm256_mask_broadcast_i32x2() {
8161        let a = _mm_set_epi32(1, 2, 3, 4);
8162        let b = _mm256_set_epi32(5, 6, 7, 8, 9, 10, 11, 12);
8163        let r = _mm256_mask_broadcast_i32x2(b, 0b01101001, a);
8164        let e = _mm256_set_epi32(5, 4, 3, 8, 3, 10, 11, 4);
8165        assert_eq_m256i(r, e);
8166    }
8167
8168    #[simd_test(enable = "avx512dq,avx512vl")]
8169    const unsafe fn test_mm256_maskz_broadcast_i32x2() {
8170        let a = _mm_set_epi32(1, 2, 3, 4);
8171        let r = _mm256_maskz_broadcast_i32x2(0b01101001, a);
8172        let e = _mm256_set_epi32(0, 4, 3, 0, 3, 0, 0, 4);
8173        assert_eq_m256i(r, e);
8174    }
8175
8176    #[simd_test(enable = "avx512dq")]
8177    const unsafe fn test_mm512_broadcast_i32x2() {
8178        let a = _mm_set_epi32(1, 2, 3, 4);
8179        let r = _mm512_broadcast_i32x2(a);
8180        let e = _mm512_set_epi32(3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4);
8181        assert_eq_m512i(r, e);
8182    }
8183
8184    #[simd_test(enable = "avx512dq")]
8185    const unsafe fn test_mm512_mask_broadcast_i32x2() {
8186        let a = _mm_set_epi32(1, 2, 3, 4);
8187        let b = _mm512_set_epi32(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
8188        let r = _mm512_mask_broadcast_i32x2(b, 0b0110100100111100, a);
8189        let e = _mm512_set_epi32(5, 4, 3, 8, 3, 10, 11, 4, 13, 14, 3, 4, 3, 4, 19, 20);
8190        assert_eq_m512i(r, e);
8191    }
8192
8193    #[simd_test(enable = "avx512dq")]
8194    const unsafe fn test_mm512_maskz_broadcast_i32x2() {
8195        let a = _mm_set_epi32(1, 2, 3, 4);
8196        let r = _mm512_maskz_broadcast_i32x2(0b0110100100111100, a);
8197        let e = _mm512_set_epi32(0, 4, 3, 0, 3, 0, 0, 4, 0, 0, 3, 4, 3, 4, 0, 0);
8198        assert_eq_m512i(r, e);
8199    }
8200
8201    #[simd_test(enable = "avx512dq")]
8202    const unsafe fn test_mm512_broadcast_i32x8() {
8203        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8204        let r = _mm512_broadcast_i32x8(a);
8205        let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
8206        assert_eq_m512i(r, e);
8207    }
8208
8209    #[simd_test(enable = "avx512dq")]
8210    const unsafe fn test_mm512_mask_broadcast_i32x8() {
8211        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8212        let b = _mm512_set_epi32(
8213            9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
8214        );
8215        let r = _mm512_mask_broadcast_i32x8(b, 0b0110100100111100, a);
8216        let e = _mm512_set_epi32(9, 2, 3, 12, 5, 14, 15, 8, 17, 18, 3, 4, 5, 6, 23, 24);
8217        assert_eq_m512i(r, e);
8218    }
8219
8220    #[simd_test(enable = "avx512dq")]
8221    const unsafe fn test_mm512_maskz_broadcast_i32x8() {
8222        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8223        let r = _mm512_maskz_broadcast_i32x8(0b0110100100111100, a);
8224        let e = _mm512_set_epi32(0, 2, 3, 0, 5, 0, 0, 8, 0, 0, 3, 4, 5, 6, 0, 0);
8225        assert_eq_m512i(r, e);
8226    }
8227
8228    #[simd_test(enable = "avx512dq,avx512vl")]
8229    const unsafe fn test_mm256_broadcast_i64x2() {
8230        let a = _mm_set_epi64x(1, 2);
8231        let r = _mm256_broadcast_i64x2(a);
8232        let e = _mm256_set_epi64x(1, 2, 1, 2);
8233        assert_eq_m256i(r, e);
8234    }
8235
8236    #[simd_test(enable = "avx512dq,avx512vl")]
8237    const unsafe fn test_mm256_mask_broadcast_i64x2() {
8238        let a = _mm_set_epi64x(1, 2);
8239        let b = _mm256_set_epi64x(3, 4, 5, 6);
8240        let r = _mm256_mask_broadcast_i64x2(b, 0b0110, a);
8241        let e = _mm256_set_epi64x(3, 2, 1, 6);
8242        assert_eq_m256i(r, e);
8243    }
8244
8245    #[simd_test(enable = "avx512dq,avx512vl")]
8246    const unsafe fn test_mm256_maskz_broadcast_i64x2() {
8247        let a = _mm_set_epi64x(1, 2);
8248        let r = _mm256_maskz_broadcast_i64x2(0b0110, a);
8249        let e = _mm256_set_epi64x(0, 2, 1, 0);
8250        assert_eq_m256i(r, e);
8251    }
8252
8253    #[simd_test(enable = "avx512dq")]
8254    const unsafe fn test_mm512_broadcast_i64x2() {
8255        let a = _mm_set_epi64x(1, 2);
8256        let r = _mm512_broadcast_i64x2(a);
8257        let e = _mm512_set_epi64(1, 2, 1, 2, 1, 2, 1, 2);
8258        assert_eq_m512i(r, e);
8259    }
8260
8261    #[simd_test(enable = "avx512dq")]
8262    const unsafe fn test_mm512_mask_broadcast_i64x2() {
8263        let a = _mm_set_epi64x(1, 2);
8264        let b = _mm512_set_epi64(3, 4, 5, 6, 7, 8, 9, 10);
8265        let r = _mm512_mask_broadcast_i64x2(b, 0b01101001, a);
8266        let e = _mm512_set_epi64(3, 2, 1, 6, 1, 8, 9, 2);
8267        assert_eq_m512i(r, e);
8268    }
8269
8270    #[simd_test(enable = "avx512dq")]
8271    const unsafe fn test_mm512_maskz_broadcast_i64x2() {
8272        let a = _mm_set_epi64x(1, 2);
8273        let r = _mm512_maskz_broadcast_i64x2(0b01101001, a);
8274        let e = _mm512_set_epi64(0, 2, 1, 0, 1, 0, 0, 2);
8275        assert_eq_m512i(r, e);
8276    }
8277
8278    #[simd_test(enable = "avx512dq")]
8279    const unsafe fn test_mm512_extractf32x8_ps() {
8280        let a = _mm512_set_ps(
8281            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8282        );
8283        let r = _mm512_extractf32x8_ps::<1>(a);
8284        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8285        assert_eq_m256(r, e);
8286    }
8287
8288    #[simd_test(enable = "avx512dq")]
8289    const unsafe fn test_mm512_mask_extractf32x8_ps() {
8290        let a = _mm512_set_ps(
8291            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8292        );
8293        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8294        let r = _mm512_mask_extractf32x8_ps::<1>(b, 0b01101001, a);
8295        let e = _mm256_set_ps(17., 2., 3., 20., 5., 22., 23., 8.);
8296        assert_eq_m256(r, e);
8297    }
8298
8299    #[simd_test(enable = "avx512dq")]
8300    const unsafe fn test_mm512_maskz_extractf32x8_ps() {
8301        let a = _mm512_set_ps(
8302            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8303        );
8304        let r = _mm512_maskz_extractf32x8_ps::<1>(0b01101001, a);
8305        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8306        assert_eq_m256(r, e);
8307    }
8308
8309    #[simd_test(enable = "avx512dq,avx512vl")]
8310    const unsafe fn test_mm256_extractf64x2_pd() {
8311        let a = _mm256_set_pd(1., 2., 3., 4.);
8312        let r = _mm256_extractf64x2_pd::<1>(a);
8313        let e = _mm_set_pd(1., 2.);
8314        assert_eq_m128d(r, e);
8315    }
8316
8317    #[simd_test(enable = "avx512dq,avx512vl")]
8318    const unsafe fn test_mm256_mask_extractf64x2_pd() {
8319        let a = _mm256_set_pd(1., 2., 3., 4.);
8320        let b = _mm_set_pd(5., 6.);
8321        let r = _mm256_mask_extractf64x2_pd::<1>(b, 0b01, a);
8322        let e = _mm_set_pd(5., 2.);
8323        assert_eq_m128d(r, e);
8324    }
8325
8326    #[simd_test(enable = "avx512dq,avx512vl")]
8327    const unsafe fn test_mm256_maskz_extractf64x2_pd() {
8328        let a = _mm256_set_pd(1., 2., 3., 4.);
8329        let r = _mm256_maskz_extractf64x2_pd::<1>(0b01, a);
8330        let e = _mm_set_pd(0., 2.);
8331        assert_eq_m128d(r, e);
8332    }
8333
8334    #[simd_test(enable = "avx512dq")]
8335    const unsafe fn test_mm512_extractf64x2_pd() {
8336        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8337        let r = _mm512_extractf64x2_pd::<2>(a);
8338        let e = _mm_set_pd(3., 4.);
8339        assert_eq_m128d(r, e);
8340    }
8341
8342    #[simd_test(enable = "avx512dq")]
8343    const unsafe fn test_mm512_mask_extractf64x2_pd() {
8344        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8345        let b = _mm_set_pd(9., 10.);
8346        let r = _mm512_mask_extractf64x2_pd::<2>(b, 0b01, a);
8347        let e = _mm_set_pd(9., 4.);
8348        assert_eq_m128d(r, e);
8349    }
8350
8351    #[simd_test(enable = "avx512dq")]
8352    const unsafe fn test_mm512_maskz_extractf64x2_pd() {
8353        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8354        let r = _mm512_maskz_extractf64x2_pd::<2>(0b01, a);
8355        let e = _mm_set_pd(0., 4.);
8356        assert_eq_m128d(r, e);
8357    }
8358
8359    #[simd_test(enable = "avx512dq")]
8360    const unsafe fn test_mm512_extracti32x8_epi32() {
8361        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8362        let r = _mm512_extracti32x8_epi32::<1>(a);
8363        let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8364        assert_eq_m256i(r, e);
8365    }
8366
8367    #[simd_test(enable = "avx512dq")]
8368    const unsafe fn test_mm512_mask_extracti32x8_epi32() {
8369        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8370        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8371        let r = _mm512_mask_extracti32x8_epi32::<1>(b, 0b01101001, a);
8372        let e = _mm256_set_epi32(17, 2, 3, 20, 5, 22, 23, 8);
8373        assert_eq_m256i(r, e);
8374    }
8375
8376    #[simd_test(enable = "avx512dq")]
8377    const unsafe fn test_mm512_maskz_extracti32x8_epi32() {
8378        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8379        let r = _mm512_maskz_extracti32x8_epi32::<1>(0b01101001, a);
8380        let e = _mm256_set_epi32(0, 2, 3, 0, 5, 0, 0, 8);
8381        assert_eq_m256i(r, e);
8382    }
8383
8384    #[simd_test(enable = "avx512dq,avx512vl")]
8385    const unsafe fn test_mm256_extracti64x2_epi64() {
8386        let a = _mm256_set_epi64x(1, 2, 3, 4);
8387        let r = _mm256_extracti64x2_epi64::<1>(a);
8388        let e = _mm_set_epi64x(1, 2);
8389        assert_eq_m128i(r, e);
8390    }
8391
8392    #[simd_test(enable = "avx512dq,avx512vl")]
8393    const unsafe fn test_mm256_mask_extracti64x2_epi64() {
8394        let a = _mm256_set_epi64x(1, 2, 3, 4);
8395        let b = _mm_set_epi64x(5, 6);
8396        let r = _mm256_mask_extracti64x2_epi64::<1>(b, 0b01, a);
8397        let e = _mm_set_epi64x(5, 2);
8398        assert_eq_m128i(r, e);
8399    }
8400
8401    #[simd_test(enable = "avx512dq,avx512vl")]
8402    const unsafe fn test_mm256_maskz_extracti64x2_epi64() {
8403        let a = _mm256_set_epi64x(1, 2, 3, 4);
8404        let r = _mm256_maskz_extracti64x2_epi64::<1>(0b01, a);
8405        let e = _mm_set_epi64x(0, 2);
8406        assert_eq_m128i(r, e);
8407    }
8408
8409    #[simd_test(enable = "avx512dq")]
8410    const unsafe fn test_mm512_extracti64x2_epi64() {
8411        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8412        let r = _mm512_extracti64x2_epi64::<2>(a);
8413        let e = _mm_set_epi64x(3, 4);
8414        assert_eq_m128i(r, e);
8415    }
8416
8417    #[simd_test(enable = "avx512dq")]
8418    const unsafe fn test_mm512_mask_extracti64x2_epi64() {
8419        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8420        let b = _mm_set_epi64x(9, 10);
8421        let r = _mm512_mask_extracti64x2_epi64::<2>(b, 0b01, a);
8422        let e = _mm_set_epi64x(9, 4);
8423        assert_eq_m128i(r, e);
8424    }
8425
8426    #[simd_test(enable = "avx512dq")]
8427    const unsafe fn test_mm512_maskz_extracti64x2_epi64() {
8428        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8429        let r = _mm512_maskz_extracti64x2_epi64::<2>(0b01, a);
8430        let e = _mm_set_epi64x(0, 4);
8431        assert_eq_m128i(r, e);
8432    }
8433
8434    #[simd_test(enable = "avx512dq")]
8435    const unsafe fn test_mm512_insertf32x8() {
8436        let a = _mm512_set_ps(
8437            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8438        );
8439        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8440        let r = _mm512_insertf32x8::<1>(a, b);
8441        let e = _mm512_set_ps(
8442            17., 18., 19., 20., 21., 22., 23., 24., 9., 10., 11., 12., 13., 14., 15., 16.,
8443        );
8444        assert_eq_m512(r, e);
8445    }
8446
8447    #[simd_test(enable = "avx512dq")]
8448    const unsafe fn test_mm512_mask_insertf32x8() {
8449        let a = _mm512_set_ps(
8450            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8451        );
8452        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8453        let src = _mm512_set_ps(
8454            25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40.,
8455        );
8456        let r = _mm512_mask_insertf32x8::<1>(src, 0b0110100100111100, a, b);
8457        let e = _mm512_set_ps(
8458            25., 18., 19., 28., 21., 30., 31., 24., 33., 34., 11., 12., 13., 14., 39., 40.,
8459        );
8460        assert_eq_m512(r, e);
8461    }
8462
8463    #[simd_test(enable = "avx512dq")]
8464    const unsafe fn test_mm512_maskz_insertf32x8() {
8465        let a = _mm512_set_ps(
8466            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8467        );
8468        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8469        let r = _mm512_maskz_insertf32x8::<1>(0b0110100100111100, a, b);
8470        let e = _mm512_set_ps(
8471            0., 18., 19., 0., 21., 0., 0., 24., 0., 0., 11., 12., 13., 14., 0., 0.,
8472        );
8473        assert_eq_m512(r, e);
8474    }
8475
8476    #[simd_test(enable = "avx512dq,avx512vl")]
8477    const unsafe fn test_mm256_insertf64x2() {
8478        let a = _mm256_set_pd(1., 2., 3., 4.);
8479        let b = _mm_set_pd(5., 6.);
8480        let r = _mm256_insertf64x2::<1>(a, b);
8481        let e = _mm256_set_pd(5., 6., 3., 4.);
8482        assert_eq_m256d(r, e);
8483    }
8484
8485    #[simd_test(enable = "avx512dq,avx512vl")]
8486    const unsafe fn test_mm256_mask_insertf64x2() {
8487        let a = _mm256_set_pd(1., 2., 3., 4.);
8488        let b = _mm_set_pd(5., 6.);
8489        let src = _mm256_set_pd(7., 8., 9., 10.);
8490        let r = _mm256_mask_insertf64x2::<1>(src, 0b0110, a, b);
8491        let e = _mm256_set_pd(7., 6., 3., 10.);
8492        assert_eq_m256d(r, e);
8493    }
8494
8495    #[simd_test(enable = "avx512dq,avx512vl")]
8496    const unsafe fn test_mm256_maskz_insertf64x2() {
8497        let a = _mm256_set_pd(1., 2., 3., 4.);
8498        let b = _mm_set_pd(5., 6.);
8499        let r = _mm256_maskz_insertf64x2::<1>(0b0110, a, b);
8500        let e = _mm256_set_pd(0., 6., 3., 0.);
8501        assert_eq_m256d(r, e);
8502    }
8503
8504    #[simd_test(enable = "avx512dq")]
8505    const unsafe fn test_mm512_insertf64x2() {
8506        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8507        let b = _mm_set_pd(9., 10.);
8508        let r = _mm512_insertf64x2::<2>(a, b);
8509        let e = _mm512_set_pd(1., 2., 9., 10., 5., 6., 7., 8.);
8510        assert_eq_m512d(r, e);
8511    }
8512
8513    #[simd_test(enable = "avx512dq")]
8514    const unsafe fn test_mm512_mask_insertf64x2() {
8515        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8516        let b = _mm_set_pd(9., 10.);
8517        let src = _mm512_set_pd(11., 12., 13., 14., 15., 16., 17., 18.);
8518        let r = _mm512_mask_insertf64x2::<2>(src, 0b01101001, a, b);
8519        let e = _mm512_set_pd(11., 2., 9., 14., 5., 16., 17., 8.);
8520        assert_eq_m512d(r, e);
8521    }
8522
8523    #[simd_test(enable = "avx512dq")]
8524    const unsafe fn test_mm512_maskz_insertf64x2() {
8525        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8526        let b = _mm_set_pd(9., 10.);
8527        let r = _mm512_maskz_insertf64x2::<2>(0b01101001, a, b);
8528        let e = _mm512_set_pd(0., 2., 9., 0., 5., 0., 0., 8.);
8529        assert_eq_m512d(r, e);
8530    }
8531
8532    #[simd_test(enable = "avx512dq")]
8533    const unsafe fn test_mm512_inserti32x8() {
8534        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8535        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8536        let r = _mm512_inserti32x8::<1>(a, b);
8537        let e = _mm512_set_epi32(
8538            17, 18, 19, 20, 21, 22, 23, 24, 9, 10, 11, 12, 13, 14, 15, 16,
8539        );
8540        assert_eq_m512i(r, e);
8541    }
8542
8543    #[simd_test(enable = "avx512dq")]
8544    const unsafe fn test_mm512_mask_inserti32x8() {
8545        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8546        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8547        let src = _mm512_set_epi32(
8548            25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
8549        );
8550        let r = _mm512_mask_inserti32x8::<1>(src, 0b0110100100111100, a, b);
8551        let e = _mm512_set_epi32(
8552            25, 18, 19, 28, 21, 30, 31, 24, 33, 34, 11, 12, 13, 14, 39, 40,
8553        );
8554        assert_eq_m512i(r, e);
8555    }
8556
8557    #[simd_test(enable = "avx512dq")]
8558    const unsafe fn test_mm512_maskz_inserti32x8() {
8559        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8560        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8561        let r = _mm512_maskz_inserti32x8::<1>(0b0110100100111100, a, b);
8562        let e = _mm512_set_epi32(0, 18, 19, 0, 21, 0, 0, 24, 0, 0, 11, 12, 13, 14, 0, 0);
8563        assert_eq_m512i(r, e);
8564    }
8565
8566    #[simd_test(enable = "avx512dq,avx512vl")]
8567    const unsafe fn test_mm256_inserti64x2() {
8568        let a = _mm256_set_epi64x(1, 2, 3, 4);
8569        let b = _mm_set_epi64x(5, 6);
8570        let r = _mm256_inserti64x2::<1>(a, b);
8571        let e = _mm256_set_epi64x(5, 6, 3, 4);
8572        assert_eq_m256i(r, e);
8573    }
8574
8575    #[simd_test(enable = "avx512dq,avx512vl")]
8576    const unsafe fn test_mm256_mask_inserti64x2() {
8577        let a = _mm256_set_epi64x(1, 2, 3, 4);
8578        let b = _mm_set_epi64x(5, 6);
8579        let src = _mm256_set_epi64x(7, 8, 9, 10);
8580        let r = _mm256_mask_inserti64x2::<1>(src, 0b0110, a, b);
8581        let e = _mm256_set_epi64x(7, 6, 3, 10);
8582        assert_eq_m256i(r, e);
8583    }
8584
8585    #[simd_test(enable = "avx512dq,avx512vl")]
8586    const unsafe fn test_mm256_maskz_inserti64x2() {
8587        let a = _mm256_set_epi64x(1, 2, 3, 4);
8588        let b = _mm_set_epi64x(5, 6);
8589        let r = _mm256_maskz_inserti64x2::<1>(0b0110, a, b);
8590        let e = _mm256_set_epi64x(0, 6, 3, 0);
8591        assert_eq_m256i(r, e);
8592    }
8593
8594    #[simd_test(enable = "avx512dq")]
8595    const unsafe fn test_mm512_inserti64x2() {
8596        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8597        let b = _mm_set_epi64x(9, 10);
8598        let r = _mm512_inserti64x2::<2>(a, b);
8599        let e = _mm512_set_epi64(1, 2, 9, 10, 5, 6, 7, 8);
8600        assert_eq_m512i(r, e);
8601    }
8602
8603    #[simd_test(enable = "avx512dq")]
8604    const unsafe fn test_mm512_mask_inserti64x2() {
8605        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8606        let b = _mm_set_epi64x(9, 10);
8607        let src = _mm512_set_epi64(11, 12, 13, 14, 15, 16, 17, 18);
8608        let r = _mm512_mask_inserti64x2::<2>(src, 0b01101001, a, b);
8609        let e = _mm512_set_epi64(11, 2, 9, 14, 5, 16, 17, 8);
8610        assert_eq_m512i(r, e);
8611    }
8612
8613    #[simd_test(enable = "avx512dq")]
8614    const unsafe fn test_mm512_maskz_inserti64x2() {
8615        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8616        let b = _mm_set_epi64x(9, 10);
8617        let r = _mm512_maskz_inserti64x2::<2>(0b01101001, a, b);
8618        let e = _mm512_set_epi64(0, 2, 9, 0, 5, 0, 0, 8);
8619        assert_eq_m512i(r, e);
8620    }
8621
8622    #[simd_test(enable = "avx512dq")]
8623    unsafe fn test_mm512_cvt_roundepi64_pd() {
8624        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8625        let r = _mm512_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8626        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8627        assert_eq_m512d(r, e);
8628    }
8629
8630    #[simd_test(enable = "avx512dq")]
8631    unsafe fn test_mm512_mask_cvt_roundepi64_pd() {
8632        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8633        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8634        let r = _mm512_mask_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8635            b, 0b01101001, a,
8636        );
8637        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8638        assert_eq_m512d(r, e);
8639    }
8640
8641    #[simd_test(enable = "avx512dq")]
8642    unsafe fn test_mm512_maskz_cvt_roundepi64_pd() {
8643        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8644        let r = _mm512_maskz_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8645            0b01101001, a,
8646        );
8647        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8648        assert_eq_m512d(r, e);
8649    }
8650
8651    #[simd_test(enable = "avx512dq,avx512vl")]
8652    unsafe fn test_mm_cvtepi64_pd() {
8653        let a = _mm_set_epi64x(1, 2);
8654        let r = _mm_cvtepi64_pd(a);
8655        let e = _mm_set_pd(1., 2.);
8656        assert_eq_m128d(r, e);
8657    }
8658
8659    #[simd_test(enable = "avx512dq,avx512vl")]
8660    unsafe fn test_mm_mask_cvtepi64_pd() {
8661        let a = _mm_set_epi64x(1, 2);
8662        let b = _mm_set_pd(3., 4.);
8663        let r = _mm_mask_cvtepi64_pd(b, 0b01, a);
8664        let e = _mm_set_pd(3., 2.);
8665        assert_eq_m128d(r, e);
8666    }
8667
8668    #[simd_test(enable = "avx512dq,avx512vl")]
8669    unsafe fn test_mm_maskz_cvtepi64_pd() {
8670        let a = _mm_set_epi64x(1, 2);
8671        let r = _mm_maskz_cvtepi64_pd(0b01, a);
8672        let e = _mm_set_pd(0., 2.);
8673        assert_eq_m128d(r, e);
8674    }
8675
8676    #[simd_test(enable = "avx512dq,avx512vl")]
8677    unsafe fn test_mm256_cvtepi64_pd() {
8678        let a = _mm256_set_epi64x(1, 2, 3, 4);
8679        let r = _mm256_cvtepi64_pd(a);
8680        let e = _mm256_set_pd(1., 2., 3., 4.);
8681        assert_eq_m256d(r, e);
8682    }
8683
8684    #[simd_test(enable = "avx512dq,avx512vl")]
8685    unsafe fn test_mm256_mask_cvtepi64_pd() {
8686        let a = _mm256_set_epi64x(1, 2, 3, 4);
8687        let b = _mm256_set_pd(5., 6., 7., 8.);
8688        let r = _mm256_mask_cvtepi64_pd(b, 0b0110, a);
8689        let e = _mm256_set_pd(5., 2., 3., 8.);
8690        assert_eq_m256d(r, e);
8691    }
8692
8693    #[simd_test(enable = "avx512dq,avx512vl")]
8694    unsafe fn test_mm256_maskz_cvtepi64_pd() {
8695        let a = _mm256_set_epi64x(1, 2, 3, 4);
8696        let r = _mm256_maskz_cvtepi64_pd(0b0110, a);
8697        let e = _mm256_set_pd(0., 2., 3., 0.);
8698        assert_eq_m256d(r, e);
8699    }
8700
8701    #[simd_test(enable = "avx512dq")]
8702    unsafe fn test_mm512_cvtepi64_pd() {
8703        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8704        let r = _mm512_cvtepi64_pd(a);
8705        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8706        assert_eq_m512d(r, e);
8707    }
8708
8709    #[simd_test(enable = "avx512dq")]
8710    unsafe fn test_mm512_mask_cvtepi64_pd() {
8711        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8712        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8713        let r = _mm512_mask_cvtepi64_pd(b, 0b01101001, a);
8714        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8715        assert_eq_m512d(r, e);
8716    }
8717
8718    #[simd_test(enable = "avx512dq")]
8719    unsafe fn test_mm512_maskz_cvtepi64_pd() {
8720        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8721        let r = _mm512_maskz_cvtepi64_pd(0b01101001, a);
8722        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8723        assert_eq_m512d(r, e);
8724    }
8725
8726    #[simd_test(enable = "avx512dq")]
8727    unsafe fn test_mm512_cvt_roundepi64_ps() {
8728        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8729        let r = _mm512_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8730        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8731        assert_eq_m256(r, e);
8732    }
8733
8734    #[simd_test(enable = "avx512dq")]
8735    unsafe fn test_mm512_mask_cvt_roundepi64_ps() {
8736        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8737        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8738        let r = _mm512_mask_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8739            b, 0b01101001, a,
8740        );
8741        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8742        assert_eq_m256(r, e);
8743    }
8744
8745    #[simd_test(enable = "avx512dq")]
8746    unsafe fn test_mm512_maskz_cvt_roundepi64_ps() {
8747        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8748        let r = _mm512_maskz_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8749            0b01101001, a,
8750        );
8751        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8752        assert_eq_m256(r, e);
8753    }
8754
8755    #[simd_test(enable = "avx512dq,avx512vl")]
8756    unsafe fn test_mm_cvtepi64_ps() {
8757        let a = _mm_set_epi64x(1, 2);
8758        let r = _mm_cvtepi64_ps(a);
8759        let e = _mm_set_ps(0., 0., 1., 2.);
8760        assert_eq_m128(r, e);
8761    }
8762
8763    #[simd_test(enable = "avx512dq,avx512vl")]
8764    unsafe fn test_mm_mask_cvtepi64_ps() {
8765        let a = _mm_set_epi64x(1, 2);
8766        let b = _mm_set_ps(3., 4., 5., 6.);
8767        let r = _mm_mask_cvtepi64_ps(b, 0b01, a);
8768        let e = _mm_set_ps(0., 0., 5., 2.);
8769        assert_eq_m128(r, e);
8770    }
8771
8772    #[simd_test(enable = "avx512dq,avx512vl")]
8773    unsafe fn test_mm_maskz_cvtepi64_ps() {
8774        let a = _mm_set_epi64x(1, 2);
8775        let r = _mm_maskz_cvtepi64_ps(0b01, a);
8776        let e = _mm_set_ps(0., 0., 0., 2.);
8777        assert_eq_m128(r, e);
8778    }
8779
8780    #[simd_test(enable = "avx512dq,avx512vl")]
8781    unsafe fn test_mm256_cvtepi64_ps() {
8782        let a = _mm256_set_epi64x(1, 2, 3, 4);
8783        let r = _mm256_cvtepi64_ps(a);
8784        let e = _mm_set_ps(1., 2., 3., 4.);
8785        assert_eq_m128(r, e);
8786    }
8787
8788    #[simd_test(enable = "avx512dq,avx512vl")]
8789    unsafe fn test_mm256_mask_cvtepi64_ps() {
8790        let a = _mm256_set_epi64x(1, 2, 3, 4);
8791        let b = _mm_set_ps(5., 6., 7., 8.);
8792        let r = _mm256_mask_cvtepi64_ps(b, 0b0110, a);
8793        let e = _mm_set_ps(5., 2., 3., 8.);
8794        assert_eq_m128(r, e);
8795    }
8796
8797    #[simd_test(enable = "avx512dq,avx512vl")]
8798    unsafe fn test_mm256_maskz_cvtepi64_ps() {
8799        let a = _mm256_set_epi64x(1, 2, 3, 4);
8800        let r = _mm256_maskz_cvtepi64_ps(0b0110, a);
8801        let e = _mm_set_ps(0., 2., 3., 0.);
8802        assert_eq_m128(r, e);
8803    }
8804
8805    #[simd_test(enable = "avx512dq")]
8806    unsafe fn test_mm512_cvtepi64_ps() {
8807        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8808        let r = _mm512_cvtepi64_ps(a);
8809        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8810        assert_eq_m256(r, e);
8811    }
8812
8813    #[simd_test(enable = "avx512dq")]
8814    unsafe fn test_mm512_mask_cvtepi64_ps() {
8815        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8816        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8817        let r = _mm512_mask_cvtepi64_ps(b, 0b01101001, a);
8818        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8819        assert_eq_m256(r, e);
8820    }
8821
8822    #[simd_test(enable = "avx512dq")]
8823    unsafe fn test_mm512_maskz_cvtepi64_ps() {
8824        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8825        let r = _mm512_maskz_cvtepi64_ps(0b01101001, a);
8826        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8827        assert_eq_m256(r, e);
8828    }
8829
8830    #[simd_test(enable = "avx512dq")]
8831    unsafe fn test_mm512_cvt_roundepu64_pd() {
8832        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8833        let r = _mm512_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8834        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8835        assert_eq_m512d(r, e);
8836    }
8837
8838    #[simd_test(enable = "avx512dq")]
8839    unsafe fn test_mm512_mask_cvt_roundepu64_pd() {
8840        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8841        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8842        let r = _mm512_mask_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8843            b, 0b01101001, a,
8844        );
8845        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8846        assert_eq_m512d(r, e);
8847    }
8848
8849    #[simd_test(enable = "avx512dq")]
8850    unsafe fn test_mm512_maskz_cvt_roundepu64_pd() {
8851        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8852        let r = _mm512_maskz_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8853            0b01101001, a,
8854        );
8855        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8856        assert_eq_m512d(r, e);
8857    }
8858
8859    #[simd_test(enable = "avx512dq,avx512vl")]
8860    unsafe fn test_mm_cvtepu64_pd() {
8861        let a = _mm_set_epi64x(1, 2);
8862        let r = _mm_cvtepu64_pd(a);
8863        let e = _mm_set_pd(1., 2.);
8864        assert_eq_m128d(r, e);
8865    }
8866
8867    #[simd_test(enable = "avx512dq,avx512vl")]
8868    unsafe fn test_mm_mask_cvtepu64_pd() {
8869        let a = _mm_set_epi64x(1, 2);
8870        let b = _mm_set_pd(3., 4.);
8871        let r = _mm_mask_cvtepu64_pd(b, 0b01, a);
8872        let e = _mm_set_pd(3., 2.);
8873        assert_eq_m128d(r, e);
8874    }
8875
8876    #[simd_test(enable = "avx512dq,avx512vl")]
8877    unsafe fn test_mm_maskz_cvtepu64_pd() {
8878        let a = _mm_set_epi64x(1, 2);
8879        let r = _mm_maskz_cvtepu64_pd(0b01, a);
8880        let e = _mm_set_pd(0., 2.);
8881        assert_eq_m128d(r, e);
8882    }
8883
8884    #[simd_test(enable = "avx512dq,avx512vl")]
8885    unsafe fn test_mm256_cvtepu64_pd() {
8886        let a = _mm256_set_epi64x(1, 2, 3, 4);
8887        let r = _mm256_cvtepu64_pd(a);
8888        let e = _mm256_set_pd(1., 2., 3., 4.);
8889        assert_eq_m256d(r, e);
8890    }
8891
8892    #[simd_test(enable = "avx512dq,avx512vl")]
8893    unsafe fn test_mm256_mask_cvtepu64_pd() {
8894        let a = _mm256_set_epi64x(1, 2, 3, 4);
8895        let b = _mm256_set_pd(5., 6., 7., 8.);
8896        let r = _mm256_mask_cvtepu64_pd(b, 0b0110, a);
8897        let e = _mm256_set_pd(5., 2., 3., 8.);
8898        assert_eq_m256d(r, e);
8899    }
8900
8901    #[simd_test(enable = "avx512dq,avx512vl")]
8902    unsafe fn test_mm256_maskz_cvtepu64_pd() {
8903        let a = _mm256_set_epi64x(1, 2, 3, 4);
8904        let r = _mm256_maskz_cvtepu64_pd(0b0110, a);
8905        let e = _mm256_set_pd(0., 2., 3., 0.);
8906        assert_eq_m256d(r, e);
8907    }
8908
8909    #[simd_test(enable = "avx512dq")]
8910    unsafe fn test_mm512_cvtepu64_pd() {
8911        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8912        let r = _mm512_cvtepu64_pd(a);
8913        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8914        assert_eq_m512d(r, e);
8915    }
8916
8917    #[simd_test(enable = "avx512dq")]
8918    unsafe fn test_mm512_mask_cvtepu64_pd() {
8919        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8920        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8921        let r = _mm512_mask_cvtepu64_pd(b, 0b01101001, a);
8922        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8923        assert_eq_m512d(r, e);
8924    }
8925
8926    #[simd_test(enable = "avx512dq")]
8927    unsafe fn test_mm512_maskz_cvtepu64_pd() {
8928        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8929        let r = _mm512_maskz_cvtepu64_pd(0b01101001, a);
8930        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8931        assert_eq_m512d(r, e);
8932    }
8933
8934    #[simd_test(enable = "avx512dq")]
8935    unsafe fn test_mm512_cvt_roundepu64_ps() {
8936        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8937        let r = _mm512_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8938        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8939        assert_eq_m256(r, e);
8940    }
8941
8942    #[simd_test(enable = "avx512dq")]
8943    unsafe fn test_mm512_mask_cvt_roundepu64_ps() {
8944        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8945        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8946        let r = _mm512_mask_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8947            b, 0b01101001, a,
8948        );
8949        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8950        assert_eq_m256(r, e);
8951    }
8952
8953    #[simd_test(enable = "avx512dq")]
8954    unsafe fn test_mm512_maskz_cvt_roundepu64_ps() {
8955        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8956        let r = _mm512_maskz_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8957            0b01101001, a,
8958        );
8959        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8960        assert_eq_m256(r, e);
8961    }
8962
8963    #[simd_test(enable = "avx512dq,avx512vl")]
8964    unsafe fn test_mm_cvtepu64_ps() {
8965        let a = _mm_set_epi64x(1, 2);
8966        let r = _mm_cvtepu64_ps(a);
8967        let e = _mm_set_ps(0., 0., 1., 2.);
8968        assert_eq_m128(r, e);
8969    }
8970
8971    #[simd_test(enable = "avx512dq,avx512vl")]
8972    unsafe fn test_mm_mask_cvtepu64_ps() {
8973        let a = _mm_set_epi64x(1, 2);
8974        let b = _mm_set_ps(3., 4., 5., 6.);
8975        let r = _mm_mask_cvtepu64_ps(b, 0b01, a);
8976        let e = _mm_set_ps(0., 0., 5., 2.);
8977        assert_eq_m128(r, e);
8978    }
8979
8980    #[simd_test(enable = "avx512dq,avx512vl")]
8981    unsafe fn test_mm_maskz_cvtepu64_ps() {
8982        let a = _mm_set_epi64x(1, 2);
8983        let r = _mm_maskz_cvtepu64_ps(0b01, a);
8984        let e = _mm_set_ps(0., 0., 0., 2.);
8985        assert_eq_m128(r, e);
8986    }
8987
8988    #[simd_test(enable = "avx512dq,avx512vl")]
8989    unsafe fn test_mm256_cvtepu64_ps() {
8990        let a = _mm256_set_epi64x(1, 2, 3, 4);
8991        let r = _mm256_cvtepu64_ps(a);
8992        let e = _mm_set_ps(1., 2., 3., 4.);
8993        assert_eq_m128(r, e);
8994    }
8995
8996    #[simd_test(enable = "avx512dq,avx512vl")]
8997    unsafe fn test_mm256_mask_cvtepu64_ps() {
8998        let a = _mm256_set_epi64x(1, 2, 3, 4);
8999        let b = _mm_set_ps(5., 6., 7., 8.);
9000        let r = _mm256_mask_cvtepu64_ps(b, 0b0110, a);
9001        let e = _mm_set_ps(5., 2., 3., 8.);
9002        assert_eq_m128(r, e);
9003    }
9004
9005    #[simd_test(enable = "avx512dq,avx512vl")]
9006    unsafe fn test_mm256_maskz_cvtepu64_ps() {
9007        let a = _mm256_set_epi64x(1, 2, 3, 4);
9008        let r = _mm256_maskz_cvtepu64_ps(0b0110, a);
9009        let e = _mm_set_ps(0., 2., 3., 0.);
9010        assert_eq_m128(r, e);
9011    }
9012
9013    #[simd_test(enable = "avx512dq")]
9014    unsafe fn test_mm512_cvtepu64_ps() {
9015        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9016        let r = _mm512_cvtepu64_ps(a);
9017        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9018        assert_eq_m256(r, e);
9019    }
9020
9021    #[simd_test(enable = "avx512dq")]
9022    unsafe fn test_mm512_mask_cvtepu64_ps() {
9023        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9024        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
9025        let r = _mm512_mask_cvtepu64_ps(b, 0b01101001, a);
9026        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
9027        assert_eq_m256(r, e);
9028    }
9029
9030    #[simd_test(enable = "avx512dq")]
9031    unsafe fn test_mm512_maskz_cvtepu64_ps() {
9032        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9033        let r = _mm512_maskz_cvtepu64_ps(0b01101001, a);
9034        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
9035        assert_eq_m256(r, e);
9036    }
9037
9038    #[simd_test(enable = "avx512dq")]
9039    unsafe fn test_mm512_cvt_roundpd_epi64() {
9040        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9041        let r = _mm512_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
9042        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9043        assert_eq_m512i(r, e);
9044    }
9045
9046    #[simd_test(enable = "avx512dq")]
9047    unsafe fn test_mm512_mask_cvt_roundpd_epi64() {
9048        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9049        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9050        let r = _mm512_mask_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9051            b, 0b01101001, a,
9052        );
9053        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9054        assert_eq_m512i(r, e);
9055    }
9056
9057    #[simd_test(enable = "avx512dq")]
9058    unsafe fn test_mm512_maskz_cvt_roundpd_epi64() {
9059        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9060        let r = _mm512_maskz_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9061            0b01101001, a,
9062        );
9063        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9064        assert_eq_m512i(r, e);
9065    }
9066
9067    #[simd_test(enable = "avx512dq,avx512vl")]
9068    unsafe fn test_mm_cvtpd_epi64() {
9069        let a = _mm_set_pd(1., 2.);
9070        let r = _mm_cvtpd_epi64(a);
9071        let e = _mm_set_epi64x(1, 2);
9072        assert_eq_m128i(r, e);
9073    }
9074
9075    #[simd_test(enable = "avx512dq,avx512vl")]
9076    unsafe fn test_mm_mask_cvtpd_epi64() {
9077        let a = _mm_set_pd(1., 2.);
9078        let b = _mm_set_epi64x(3, 4);
9079        let r = _mm_mask_cvtpd_epi64(b, 0b01, a);
9080        let e = _mm_set_epi64x(3, 2);
9081        assert_eq_m128i(r, e);
9082    }
9083
9084    #[simd_test(enable = "avx512dq,avx512vl")]
9085    unsafe fn test_mm_maskz_cvtpd_epi64() {
9086        let a = _mm_set_pd(1., 2.);
9087        let r = _mm_maskz_cvtpd_epi64(0b01, a);
9088        let e = _mm_set_epi64x(0, 2);
9089        assert_eq_m128i(r, e);
9090    }
9091
9092    #[simd_test(enable = "avx512dq,avx512vl")]
9093    unsafe fn test_mm256_cvtpd_epi64() {
9094        let a = _mm256_set_pd(1., 2., 3., 4.);
9095        let r = _mm256_cvtpd_epi64(a);
9096        let e = _mm256_set_epi64x(1, 2, 3, 4);
9097        assert_eq_m256i(r, e);
9098    }
9099
9100    #[simd_test(enable = "avx512dq,avx512vl")]
9101    unsafe fn test_mm256_mask_cvtpd_epi64() {
9102        let a = _mm256_set_pd(1., 2., 3., 4.);
9103        let b = _mm256_set_epi64x(5, 6, 7, 8);
9104        let r = _mm256_mask_cvtpd_epi64(b, 0b0110, a);
9105        let e = _mm256_set_epi64x(5, 2, 3, 8);
9106        assert_eq_m256i(r, e);
9107    }
9108
9109    #[simd_test(enable = "avx512dq,avx512vl")]
9110    unsafe fn test_mm256_maskz_cvtpd_epi64() {
9111        let a = _mm256_set_pd(1., 2., 3., 4.);
9112        let r = _mm256_maskz_cvtpd_epi64(0b0110, a);
9113        let e = _mm256_set_epi64x(0, 2, 3, 0);
9114        assert_eq_m256i(r, e);
9115    }
9116
9117    #[simd_test(enable = "avx512dq")]
9118    unsafe fn test_mm512_cvtpd_epi64() {
9119        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9120        let r = _mm512_cvtpd_epi64(a);
9121        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9122        assert_eq_m512i(r, e);
9123    }
9124
9125    #[simd_test(enable = "avx512dq")]
9126    unsafe fn test_mm512_mask_cvtpd_epi64() {
9127        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9128        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9129        let r = _mm512_mask_cvtpd_epi64(b, 0b01101001, a);
9130        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9131        assert_eq_m512i(r, e);
9132    }
9133
9134    #[simd_test(enable = "avx512dq")]
9135    unsafe fn test_mm512_maskz_cvtpd_epi64() {
9136        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9137        let r = _mm512_maskz_cvtpd_epi64(0b01101001, a);
9138        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9139        assert_eq_m512i(r, e);
9140    }
9141
9142    #[simd_test(enable = "avx512dq")]
9143    unsafe fn test_mm512_cvt_roundps_epi64() {
9144        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9145        let r = _mm512_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
9146        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9147        assert_eq_m512i(r, e);
9148    }
9149
9150    #[simd_test(enable = "avx512dq")]
9151    unsafe fn test_mm512_mask_cvt_roundps_epi64() {
9152        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9153        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9154        let r = _mm512_mask_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9155            b, 0b01101001, a,
9156        );
9157        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9158        assert_eq_m512i(r, e);
9159    }
9160
9161    #[simd_test(enable = "avx512dq")]
9162    unsafe fn test_mm512_maskz_cvt_roundps_epi64() {
9163        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9164        let r = _mm512_maskz_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9165            0b01101001, a,
9166        );
9167        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9168        assert_eq_m512i(r, e);
9169    }
9170
9171    #[simd_test(enable = "avx512dq,avx512vl")]
9172    unsafe fn test_mm_cvtps_epi64() {
9173        let a = _mm_set_ps(1., 2., 3., 4.);
9174        let r = _mm_cvtps_epi64(a);
9175        let e = _mm_set_epi64x(3, 4);
9176        assert_eq_m128i(r, e);
9177    }
9178
9179    #[simd_test(enable = "avx512dq,avx512vl")]
9180    unsafe fn test_mm_mask_cvtps_epi64() {
9181        let a = _mm_set_ps(1., 2., 3., 4.);
9182        let b = _mm_set_epi64x(5, 6);
9183        let r = _mm_mask_cvtps_epi64(b, 0b01, a);
9184        let e = _mm_set_epi64x(5, 4);
9185        assert_eq_m128i(r, e);
9186    }
9187
9188    #[simd_test(enable = "avx512dq,avx512vl")]
9189    unsafe fn test_mm_maskz_cvtps_epi64() {
9190        let a = _mm_set_ps(1., 2., 3., 4.);
9191        let r = _mm_maskz_cvtps_epi64(0b01, a);
9192        let e = _mm_set_epi64x(0, 4);
9193        assert_eq_m128i(r, e);
9194    }
9195
9196    #[simd_test(enable = "avx512dq,avx512vl")]
9197    unsafe fn test_mm256_cvtps_epi64() {
9198        let a = _mm_set_ps(1., 2., 3., 4.);
9199        let r = _mm256_cvtps_epi64(a);
9200        let e = _mm256_set_epi64x(1, 2, 3, 4);
9201        assert_eq_m256i(r, e);
9202    }
9203
9204    #[simd_test(enable = "avx512dq,avx512vl")]
9205    unsafe fn test_mm256_mask_cvtps_epi64() {
9206        let a = _mm_set_ps(1., 2., 3., 4.);
9207        let b = _mm256_set_epi64x(5, 6, 7, 8);
9208        let r = _mm256_mask_cvtps_epi64(b, 0b0110, a);
9209        let e = _mm256_set_epi64x(5, 2, 3, 8);
9210        assert_eq_m256i(r, e);
9211    }
9212
9213    #[simd_test(enable = "avx512dq,avx512vl")]
9214    unsafe fn test_mm256_maskz_cvtps_epi64() {
9215        let a = _mm_set_ps(1., 2., 3., 4.);
9216        let r = _mm256_maskz_cvtps_epi64(0b0110, a);
9217        let e = _mm256_set_epi64x(0, 2, 3, 0);
9218        assert_eq_m256i(r, e);
9219    }
9220
9221    #[simd_test(enable = "avx512dq")]
9222    unsafe fn test_mm512_cvtps_epi64() {
9223        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9224        let r = _mm512_cvtps_epi64(a);
9225        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9226        assert_eq_m512i(r, e);
9227    }
9228
9229    #[simd_test(enable = "avx512dq")]
9230    unsafe fn test_mm512_mask_cvtps_epi64() {
9231        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9232        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9233        let r = _mm512_mask_cvtps_epi64(b, 0b01101001, a);
9234        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9235        assert_eq_m512i(r, e);
9236    }
9237
9238    #[simd_test(enable = "avx512dq")]
9239    unsafe fn test_mm512_maskz_cvtps_epi64() {
9240        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9241        let r = _mm512_maskz_cvtps_epi64(0b01101001, a);
9242        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9243        assert_eq_m512i(r, e);
9244    }
9245
9246    #[simd_test(enable = "avx512dq")]
9247    unsafe fn test_mm512_cvt_roundpd_epu64() {
9248        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9249        let r = _mm512_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
9250        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9251        assert_eq_m512i(r, e);
9252    }
9253
9254    #[simd_test(enable = "avx512dq")]
9255    unsafe fn test_mm512_mask_cvt_roundpd_epu64() {
9256        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9257        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9258        let r = _mm512_mask_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9259            b, 0b01101001, a,
9260        );
9261        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9262        assert_eq_m512i(r, e);
9263    }
9264
9265    #[simd_test(enable = "avx512dq")]
9266    unsafe fn test_mm512_maskz_cvt_roundpd_epu64() {
9267        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9268        let r = _mm512_maskz_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9269            0b01101001, a,
9270        );
9271        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9272        assert_eq_m512i(r, e);
9273    }
9274
9275    #[simd_test(enable = "avx512dq,avx512vl")]
9276    unsafe fn test_mm_cvtpd_epu64() {
9277        let a = _mm_set_pd(1., 2.);
9278        let r = _mm_cvtpd_epu64(a);
9279        let e = _mm_set_epi64x(1, 2);
9280        assert_eq_m128i(r, e);
9281    }
9282
9283    #[simd_test(enable = "avx512dq,avx512vl")]
9284    unsafe fn test_mm_mask_cvtpd_epu64() {
9285        let a = _mm_set_pd(1., 2.);
9286        let b = _mm_set_epi64x(3, 4);
9287        let r = _mm_mask_cvtpd_epu64(b, 0b01, a);
9288        let e = _mm_set_epi64x(3, 2);
9289        assert_eq_m128i(r, e);
9290    }
9291
9292    #[simd_test(enable = "avx512dq,avx512vl")]
9293    unsafe fn test_mm_maskz_cvtpd_epu64() {
9294        let a = _mm_set_pd(1., 2.);
9295        let r = _mm_maskz_cvtpd_epu64(0b01, a);
9296        let e = _mm_set_epi64x(0, 2);
9297        assert_eq_m128i(r, e);
9298    }
9299
9300    #[simd_test(enable = "avx512dq,avx512vl")]
9301    unsafe fn test_mm256_cvtpd_epu64() {
9302        let a = _mm256_set_pd(1., 2., 3., 4.);
9303        let r = _mm256_cvtpd_epu64(a);
9304        let e = _mm256_set_epi64x(1, 2, 3, 4);
9305        assert_eq_m256i(r, e);
9306    }
9307
9308    #[simd_test(enable = "avx512dq,avx512vl")]
9309    unsafe fn test_mm256_mask_cvtpd_epu64() {
9310        let a = _mm256_set_pd(1., 2., 3., 4.);
9311        let b = _mm256_set_epi64x(5, 6, 7, 8);
9312        let r = _mm256_mask_cvtpd_epu64(b, 0b0110, a);
9313        let e = _mm256_set_epi64x(5, 2, 3, 8);
9314        assert_eq_m256i(r, e);
9315    }
9316
9317    #[simd_test(enable = "avx512dq,avx512vl")]
9318    unsafe fn test_mm256_maskz_cvtpd_epu64() {
9319        let a = _mm256_set_pd(1., 2., 3., 4.);
9320        let r = _mm256_maskz_cvtpd_epu64(0b0110, a);
9321        let e = _mm256_set_epi64x(0, 2, 3, 0);
9322        assert_eq_m256i(r, e);
9323    }
9324
9325    #[simd_test(enable = "avx512dq")]
9326    unsafe fn test_mm512_cvtpd_epu64() {
9327        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9328        let r = _mm512_cvtpd_epu64(a);
9329        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9330        assert_eq_m512i(r, e);
9331    }
9332
9333    #[simd_test(enable = "avx512dq")]
9334    unsafe fn test_mm512_mask_cvtpd_epu64() {
9335        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9336        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9337        let r = _mm512_mask_cvtpd_epu64(b, 0b01101001, a);
9338        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9339        assert_eq_m512i(r, e);
9340    }
9341
9342    #[simd_test(enable = "avx512dq")]
9343    unsafe fn test_mm512_maskz_cvtpd_epu64() {
9344        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9345        let r = _mm512_maskz_cvtpd_epu64(0b01101001, a);
9346        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9347        assert_eq_m512i(r, e);
9348    }
9349
9350    #[simd_test(enable = "avx512dq")]
9351    unsafe fn test_mm512_cvt_roundps_epu64() {
9352        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9353        let r = _mm512_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
9354        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9355        assert_eq_m512i(r, e);
9356    }
9357
9358    #[simd_test(enable = "avx512dq")]
9359    unsafe fn test_mm512_mask_cvt_roundps_epu64() {
9360        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9361        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9362        let r = _mm512_mask_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9363            b, 0b01101001, a,
9364        );
9365        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9366        assert_eq_m512i(r, e);
9367    }
9368
9369    #[simd_test(enable = "avx512dq")]
9370    unsafe fn test_mm512_maskz_cvt_roundps_epu64() {
9371        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9372        let r = _mm512_maskz_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9373            0b01101001, a,
9374        );
9375        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9376        assert_eq_m512i(r, e);
9377    }
9378
9379    #[simd_test(enable = "avx512dq,avx512vl")]
9380    unsafe fn test_mm_cvtps_epu64() {
9381        let a = _mm_set_ps(1., 2., 3., 4.);
9382        let r = _mm_cvtps_epu64(a);
9383        let e = _mm_set_epi64x(3, 4);
9384        assert_eq_m128i(r, e);
9385    }
9386
9387    #[simd_test(enable = "avx512dq,avx512vl")]
9388    unsafe fn test_mm_mask_cvtps_epu64() {
9389        let a = _mm_set_ps(1., 2., 3., 4.);
9390        let b = _mm_set_epi64x(5, 6);
9391        let r = _mm_mask_cvtps_epu64(b, 0b01, a);
9392        let e = _mm_set_epi64x(5, 4);
9393        assert_eq_m128i(r, e);
9394    }
9395
9396    #[simd_test(enable = "avx512dq,avx512vl")]
9397    unsafe fn test_mm_maskz_cvtps_epu64() {
9398        let a = _mm_set_ps(1., 2., 3., 4.);
9399        let r = _mm_maskz_cvtps_epu64(0b01, a);
9400        let e = _mm_set_epi64x(0, 4);
9401        assert_eq_m128i(r, e);
9402    }
9403
9404    #[simd_test(enable = "avx512dq,avx512vl")]
9405    unsafe fn test_mm256_cvtps_epu64() {
9406        let a = _mm_set_ps(1., 2., 3., 4.);
9407        let r = _mm256_cvtps_epu64(a);
9408        let e = _mm256_set_epi64x(1, 2, 3, 4);
9409        assert_eq_m256i(r, e);
9410    }
9411
9412    #[simd_test(enable = "avx512dq,avx512vl")]
9413    unsafe fn test_mm256_mask_cvtps_epu64() {
9414        let a = _mm_set_ps(1., 2., 3., 4.);
9415        let b = _mm256_set_epi64x(5, 6, 7, 8);
9416        let r = _mm256_mask_cvtps_epu64(b, 0b0110, a);
9417        let e = _mm256_set_epi64x(5, 2, 3, 8);
9418        assert_eq_m256i(r, e);
9419    }
9420
9421    #[simd_test(enable = "avx512dq,avx512vl")]
9422    unsafe fn test_mm256_maskz_cvtps_epu64() {
9423        let a = _mm_set_ps(1., 2., 3., 4.);
9424        let r = _mm256_maskz_cvtps_epu64(0b0110, a);
9425        let e = _mm256_set_epi64x(0, 2, 3, 0);
9426        assert_eq_m256i(r, e);
9427    }
9428
9429    #[simd_test(enable = "avx512dq")]
9430    unsafe fn test_mm512_cvtps_epu64() {
9431        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9432        let r = _mm512_cvtps_epu64(a);
9433        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9434        assert_eq_m512i(r, e);
9435    }
9436
9437    #[simd_test(enable = "avx512dq")]
9438    unsafe fn test_mm512_mask_cvtps_epu64() {
9439        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9440        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9441        let r = _mm512_mask_cvtps_epu64(b, 0b01101001, a);
9442        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9443        assert_eq_m512i(r, e);
9444    }
9445
9446    #[simd_test(enable = "avx512dq")]
9447    unsafe fn test_mm512_maskz_cvtps_epu64() {
9448        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9449        let r = _mm512_maskz_cvtps_epu64(0b01101001, a);
9450        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9451        assert_eq_m512i(r, e);
9452    }
9453
9454    #[simd_test(enable = "avx512dq")]
9455    unsafe fn test_mm512_cvtt_roundpd_epi64() {
9456        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9457        let r = _mm512_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(a);
9458        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9459        assert_eq_m512i(r, e);
9460    }
9461
9462    #[simd_test(enable = "avx512dq")]
9463    unsafe fn test_mm512_mask_cvtt_roundpd_epi64() {
9464        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9465        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9466        let r = _mm512_mask_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9467        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9468        assert_eq_m512i(r, e);
9469    }
9470
9471    #[simd_test(enable = "avx512dq")]
9472    unsafe fn test_mm512_maskz_cvtt_roundpd_epi64() {
9473        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9474        let r = _mm512_maskz_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9475        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9476        assert_eq_m512i(r, e);
9477    }
9478
9479    #[simd_test(enable = "avx512dq,avx512vl")]
9480    unsafe fn test_mm_cvttpd_epi64() {
9481        let a = _mm_set_pd(1., 2.);
9482        let r = _mm_cvttpd_epi64(a);
9483        let e = _mm_set_epi64x(1, 2);
9484        assert_eq_m128i(r, e);
9485    }
9486
9487    #[simd_test(enable = "avx512dq,avx512vl")]
9488    unsafe fn test_mm_mask_cvttpd_epi64() {
9489        let a = _mm_set_pd(1., 2.);
9490        let b = _mm_set_epi64x(3, 4);
9491        let r = _mm_mask_cvttpd_epi64(b, 0b01, a);
9492        let e = _mm_set_epi64x(3, 2);
9493        assert_eq_m128i(r, e);
9494    }
9495
9496    #[simd_test(enable = "avx512dq,avx512vl")]
9497    unsafe fn test_mm_maskz_cvttpd_epi64() {
9498        let a = _mm_set_pd(1., 2.);
9499        let r = _mm_maskz_cvttpd_epi64(0b01, a);
9500        let e = _mm_set_epi64x(0, 2);
9501        assert_eq_m128i(r, e);
9502    }
9503
9504    #[simd_test(enable = "avx512dq,avx512vl")]
9505    unsafe fn test_mm256_cvttpd_epi64() {
9506        let a = _mm256_set_pd(1., 2., 3., 4.);
9507        let r = _mm256_cvttpd_epi64(a);
9508        let e = _mm256_set_epi64x(1, 2, 3, 4);
9509        assert_eq_m256i(r, e);
9510    }
9511
9512    #[simd_test(enable = "avx512dq,avx512vl")]
9513    unsafe fn test_mm256_mask_cvttpd_epi64() {
9514        let a = _mm256_set_pd(1., 2., 3., 4.);
9515        let b = _mm256_set_epi64x(5, 6, 7, 8);
9516        let r = _mm256_mask_cvttpd_epi64(b, 0b0110, a);
9517        let e = _mm256_set_epi64x(5, 2, 3, 8);
9518        assert_eq_m256i(r, e);
9519    }
9520
9521    #[simd_test(enable = "avx512dq,avx512vl")]
9522    unsafe fn test_mm256_maskz_cvttpd_epi64() {
9523        let a = _mm256_set_pd(1., 2., 3., 4.);
9524        let r = _mm256_maskz_cvttpd_epi64(0b0110, a);
9525        let e = _mm256_set_epi64x(0, 2, 3, 0);
9526        assert_eq_m256i(r, e);
9527    }
9528
9529    #[simd_test(enable = "avx512dq")]
9530    unsafe fn test_mm512_cvttpd_epi64() {
9531        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9532        let r = _mm512_cvttpd_epi64(a);
9533        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9534        assert_eq_m512i(r, e);
9535    }
9536
9537    #[simd_test(enable = "avx512dq")]
9538    unsafe fn test_mm512_mask_cvttpd_epi64() {
9539        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9540        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9541        let r = _mm512_mask_cvttpd_epi64(b, 0b01101001, a);
9542        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9543        assert_eq_m512i(r, e);
9544    }
9545
9546    #[simd_test(enable = "avx512dq")]
9547    unsafe fn test_mm512_maskz_cvttpd_epi64() {
9548        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9549        let r = _mm512_maskz_cvttpd_epi64(0b01101001, a);
9550        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9551        assert_eq_m512i(r, e);
9552    }
9553
9554    #[simd_test(enable = "avx512dq")]
9555    unsafe fn test_mm512_cvtt_roundps_epi64() {
9556        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9557        let r = _mm512_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(a);
9558        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9559        assert_eq_m512i(r, e);
9560    }
9561
9562    #[simd_test(enable = "avx512dq")]
9563    unsafe fn test_mm512_mask_cvtt_roundps_epi64() {
9564        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9565        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9566        let r = _mm512_mask_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9567        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9568        assert_eq_m512i(r, e);
9569    }
9570
9571    #[simd_test(enable = "avx512dq")]
9572    unsafe fn test_mm512_maskz_cvtt_roundps_epi64() {
9573        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9574        let r = _mm512_maskz_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9575        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9576        assert_eq_m512i(r, e);
9577    }
9578
9579    #[simd_test(enable = "avx512dq,avx512vl")]
9580    unsafe fn test_mm_cvttps_epi64() {
9581        let a = _mm_set_ps(1., 2., 3., 4.);
9582        let r = _mm_cvttps_epi64(a);
9583        let e = _mm_set_epi64x(3, 4);
9584        assert_eq_m128i(r, e);
9585    }
9586
9587    #[simd_test(enable = "avx512dq,avx512vl")]
9588    unsafe fn test_mm_mask_cvttps_epi64() {
9589        let a = _mm_set_ps(1., 2., 3., 4.);
9590        let b = _mm_set_epi64x(5, 6);
9591        let r = _mm_mask_cvttps_epi64(b, 0b01, a);
9592        let e = _mm_set_epi64x(5, 4);
9593        assert_eq_m128i(r, e);
9594    }
9595
9596    #[simd_test(enable = "avx512dq,avx512vl")]
9597    unsafe fn test_mm_maskz_cvttps_epi64() {
9598        let a = _mm_set_ps(1., 2., 3., 4.);
9599        let r = _mm_maskz_cvttps_epi64(0b01, a);
9600        let e = _mm_set_epi64x(0, 4);
9601        assert_eq_m128i(r, e);
9602    }
9603
9604    #[simd_test(enable = "avx512dq,avx512vl")]
9605    unsafe fn test_mm256_cvttps_epi64() {
9606        let a = _mm_set_ps(1., 2., 3., 4.);
9607        let r = _mm256_cvttps_epi64(a);
9608        let e = _mm256_set_epi64x(1, 2, 3, 4);
9609        assert_eq_m256i(r, e);
9610    }
9611
9612    #[simd_test(enable = "avx512dq,avx512vl")]
9613    unsafe fn test_mm256_mask_cvttps_epi64() {
9614        let a = _mm_set_ps(1., 2., 3., 4.);
9615        let b = _mm256_set_epi64x(5, 6, 7, 8);
9616        let r = _mm256_mask_cvttps_epi64(b, 0b0110, a);
9617        let e = _mm256_set_epi64x(5, 2, 3, 8);
9618        assert_eq_m256i(r, e);
9619    }
9620
9621    #[simd_test(enable = "avx512dq,avx512vl")]
9622    unsafe fn test_mm256_maskz_cvttps_epi64() {
9623        let a = _mm_set_ps(1., 2., 3., 4.);
9624        let r = _mm256_maskz_cvttps_epi64(0b0110, a);
9625        let e = _mm256_set_epi64x(0, 2, 3, 0);
9626        assert_eq_m256i(r, e);
9627    }
9628
9629    #[simd_test(enable = "avx512dq")]
9630    unsafe fn test_mm512_cvttps_epi64() {
9631        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9632        let r = _mm512_cvttps_epi64(a);
9633        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9634        assert_eq_m512i(r, e);
9635    }
9636
9637    #[simd_test(enable = "avx512dq")]
9638    unsafe fn test_mm512_mask_cvttps_epi64() {
9639        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9640        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9641        let r = _mm512_mask_cvttps_epi64(b, 0b01101001, a);
9642        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9643        assert_eq_m512i(r, e);
9644    }
9645
9646    #[simd_test(enable = "avx512dq")]
9647    unsafe fn test_mm512_maskz_cvttps_epi64() {
9648        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9649        let r = _mm512_maskz_cvttps_epi64(0b01101001, a);
9650        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9651        assert_eq_m512i(r, e);
9652    }
9653
9654    #[simd_test(enable = "avx512dq")]
9655    unsafe fn test_mm512_cvtt_roundpd_epu64() {
9656        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9657        let r = _mm512_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(a);
9658        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9659        assert_eq_m512i(r, e);
9660    }
9661
9662    #[simd_test(enable = "avx512dq")]
9663    unsafe fn test_mm512_mask_cvtt_roundpd_epu64() {
9664        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9665        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9666        let r = _mm512_mask_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9667        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9668        assert_eq_m512i(r, e);
9669    }
9670
9671    #[simd_test(enable = "avx512dq")]
9672    unsafe fn test_mm512_maskz_cvtt_roundpd_epu64() {
9673        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9674        let r = _mm512_maskz_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9675        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9676        assert_eq_m512i(r, e);
9677    }
9678
9679    #[simd_test(enable = "avx512dq,avx512vl")]
9680    unsafe fn test_mm_cvttpd_epu64() {
9681        let a = _mm_set_pd(1., 2.);
9682        let r = _mm_cvttpd_epu64(a);
9683        let e = _mm_set_epi64x(1, 2);
9684        assert_eq_m128i(r, e);
9685    }
9686
9687    #[simd_test(enable = "avx512dq,avx512vl")]
9688    unsafe fn test_mm_mask_cvttpd_epu64() {
9689        let a = _mm_set_pd(1., 2.);
9690        let b = _mm_set_epi64x(3, 4);
9691        let r = _mm_mask_cvttpd_epu64(b, 0b01, a);
9692        let e = _mm_set_epi64x(3, 2);
9693        assert_eq_m128i(r, e);
9694    }
9695
9696    #[simd_test(enable = "avx512dq,avx512vl")]
9697    unsafe fn test_mm_maskz_cvttpd_epu64() {
9698        let a = _mm_set_pd(1., 2.);
9699        let r = _mm_maskz_cvttpd_epu64(0b01, a);
9700        let e = _mm_set_epi64x(0, 2);
9701        assert_eq_m128i(r, e);
9702    }
9703
9704    #[simd_test(enable = "avx512dq,avx512vl")]
9705    unsafe fn test_mm256_cvttpd_epu64() {
9706        let a = _mm256_set_pd(1., 2., 3., 4.);
9707        let r = _mm256_cvttpd_epu64(a);
9708        let e = _mm256_set_epi64x(1, 2, 3, 4);
9709        assert_eq_m256i(r, e);
9710    }
9711
9712    #[simd_test(enable = "avx512dq,avx512vl")]
9713    unsafe fn test_mm256_mask_cvttpd_epu64() {
9714        let a = _mm256_set_pd(1., 2., 3., 4.);
9715        let b = _mm256_set_epi64x(5, 6, 7, 8);
9716        let r = _mm256_mask_cvttpd_epu64(b, 0b0110, a);
9717        let e = _mm256_set_epi64x(5, 2, 3, 8);
9718        assert_eq_m256i(r, e);
9719    }
9720
9721    #[simd_test(enable = "avx512dq,avx512vl")]
9722    unsafe fn test_mm256_maskz_cvttpd_epu64() {
9723        let a = _mm256_set_pd(1., 2., 3., 4.);
9724        let r = _mm256_maskz_cvttpd_epu64(0b0110, a);
9725        let e = _mm256_set_epi64x(0, 2, 3, 0);
9726        assert_eq_m256i(r, e);
9727    }
9728
9729    #[simd_test(enable = "avx512dq")]
9730    unsafe fn test_mm512_cvttpd_epu64() {
9731        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9732        let r = _mm512_cvttpd_epu64(a);
9733        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9734        assert_eq_m512i(r, e);
9735    }
9736
9737    #[simd_test(enable = "avx512dq")]
9738    unsafe fn test_mm512_mask_cvttpd_epu64() {
9739        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9740        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9741        let r = _mm512_mask_cvttpd_epu64(b, 0b01101001, a);
9742        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9743        assert_eq_m512i(r, e);
9744    }
9745
9746    #[simd_test(enable = "avx512dq")]
9747    unsafe fn test_mm512_maskz_cvttpd_epu64() {
9748        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9749        let r = _mm512_maskz_cvttpd_epu64(0b01101001, a);
9750        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9751        assert_eq_m512i(r, e);
9752    }
9753
9754    #[simd_test(enable = "avx512dq")]
9755    unsafe fn test_mm512_cvtt_roundps_epu64() {
9756        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9757        let r = _mm512_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(a);
9758        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9759        assert_eq_m512i(r, e);
9760    }
9761
9762    #[simd_test(enable = "avx512dq")]
9763    unsafe fn test_mm512_mask_cvtt_roundps_epu64() {
9764        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9765        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9766        let r = _mm512_mask_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9767        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9768        assert_eq_m512i(r, e);
9769    }
9770
9771    #[simd_test(enable = "avx512dq")]
9772    unsafe fn test_mm512_maskz_cvtt_roundps_epu64() {
9773        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9774        let r = _mm512_maskz_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9775        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9776        assert_eq_m512i(r, e);
9777    }
9778
9779    #[simd_test(enable = "avx512dq,avx512vl")]
9780    unsafe fn test_mm_cvttps_epu64() {
9781        let a = _mm_set_ps(1., 2., 3., 4.);
9782        let r = _mm_cvttps_epu64(a);
9783        let e = _mm_set_epi64x(3, 4);
9784        assert_eq_m128i(r, e);
9785    }
9786
9787    #[simd_test(enable = "avx512dq,avx512vl")]
9788    unsafe fn test_mm_mask_cvttps_epu64() {
9789        let a = _mm_set_ps(1., 2., 3., 4.);
9790        let b = _mm_set_epi64x(5, 6);
9791        let r = _mm_mask_cvttps_epu64(b, 0b01, a);
9792        let e = _mm_set_epi64x(5, 4);
9793        assert_eq_m128i(r, e);
9794    }
9795
9796    #[simd_test(enable = "avx512dq,avx512vl")]
9797    unsafe fn test_mm_maskz_cvttps_epu64() {
9798        let a = _mm_set_ps(1., 2., 3., 4.);
9799        let r = _mm_maskz_cvttps_epu64(0b01, a);
9800        let e = _mm_set_epi64x(0, 4);
9801        assert_eq_m128i(r, e);
9802    }
9803
9804    #[simd_test(enable = "avx512dq,avx512vl")]
9805    unsafe fn test_mm256_cvttps_epu64() {
9806        let a = _mm_set_ps(1., 2., 3., 4.);
9807        let r = _mm256_cvttps_epu64(a);
9808        let e = _mm256_set_epi64x(1, 2, 3, 4);
9809        assert_eq_m256i(r, e);
9810    }
9811
9812    #[simd_test(enable = "avx512dq,avx512vl")]
9813    unsafe fn test_mm256_mask_cvttps_epu64() {
9814        let a = _mm_set_ps(1., 2., 3., 4.);
9815        let b = _mm256_set_epi64x(5, 6, 7, 8);
9816        let r = _mm256_mask_cvttps_epu64(b, 0b0110, a);
9817        let e = _mm256_set_epi64x(5, 2, 3, 8);
9818        assert_eq_m256i(r, e);
9819    }
9820
9821    #[simd_test(enable = "avx512dq,avx512vl")]
9822    unsafe fn test_mm256_maskz_cvttps_epu64() {
9823        let a = _mm_set_ps(1., 2., 3., 4.);
9824        let r = _mm256_maskz_cvttps_epu64(0b0110, a);
9825        let e = _mm256_set_epi64x(0, 2, 3, 0);
9826        assert_eq_m256i(r, e);
9827    }
9828
9829    #[simd_test(enable = "avx512dq")]
9830    unsafe fn test_mm512_cvttps_epu64() {
9831        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9832        let r = _mm512_cvttps_epu64(a);
9833        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9834        assert_eq_m512i(r, e);
9835    }
9836
9837    #[simd_test(enable = "avx512dq")]
9838    unsafe fn test_mm512_mask_cvttps_epu64() {
9839        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9840        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9841        let r = _mm512_mask_cvttps_epu64(b, 0b01101001, a);
9842        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9843        assert_eq_m512i(r, e);
9844    }
9845
9846    #[simd_test(enable = "avx512dq")]
9847    unsafe fn test_mm512_maskz_cvttps_epu64() {
9848        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9849        let r = _mm512_maskz_cvttps_epu64(0b01101001, a);
9850        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9851        assert_eq_m512i(r, e);
9852    }
9853
9854    #[simd_test(enable = "avx512dq,avx512vl")]
9855    const unsafe fn test_mm_mullo_epi64() {
9856        let a = _mm_set_epi64x(1, 2);
9857        let b = _mm_set_epi64x(3, 4);
9858        let r = _mm_mullo_epi64(a, b);
9859        let e = _mm_set_epi64x(3, 8);
9860        assert_eq_m128i(r, e);
9861    }
9862
9863    #[simd_test(enable = "avx512dq,avx512vl")]
9864    const unsafe fn test_mm_mask_mullo_epi64() {
9865        let a = _mm_set_epi64x(1, 2);
9866        let b = _mm_set_epi64x(3, 4);
9867        let c = _mm_set_epi64x(5, 6);
9868        let r = _mm_mask_mullo_epi64(c, 0b01, a, b);
9869        let e = _mm_set_epi64x(5, 8);
9870        assert_eq_m128i(r, e);
9871    }
9872
9873    #[simd_test(enable = "avx512dq,avx512vl")]
9874    const unsafe fn test_mm_maskz_mullo_epi64() {
9875        let a = _mm_set_epi64x(1, 2);
9876        let b = _mm_set_epi64x(3, 4);
9877        let r = _mm_maskz_mullo_epi64(0b01, a, b);
9878        let e = _mm_set_epi64x(0, 8);
9879        assert_eq_m128i(r, e);
9880    }
9881
9882    #[simd_test(enable = "avx512dq,avx512vl")]
9883    const unsafe fn test_mm256_mullo_epi64() {
9884        let a = _mm256_set_epi64x(1, 2, 3, 4);
9885        let b = _mm256_set_epi64x(5, 6, 7, 8);
9886        let r = _mm256_mullo_epi64(a, b);
9887        let e = _mm256_set_epi64x(5, 12, 21, 32);
9888        assert_eq_m256i(r, e);
9889    }
9890
9891    #[simd_test(enable = "avx512dq,avx512vl")]
9892    const unsafe fn test_mm256_mask_mullo_epi64() {
9893        let a = _mm256_set_epi64x(1, 2, 3, 4);
9894        let b = _mm256_set_epi64x(5, 6, 7, 8);
9895        let c = _mm256_set_epi64x(9, 10, 11, 12);
9896        let r = _mm256_mask_mullo_epi64(c, 0b0110, a, b);
9897        let e = _mm256_set_epi64x(9, 12, 21, 12);
9898        assert_eq_m256i(r, e);
9899    }
9900
9901    #[simd_test(enable = "avx512dq,avx512vl")]
9902    const unsafe fn test_mm256_maskz_mullo_epi64() {
9903        let a = _mm256_set_epi64x(1, 2, 3, 4);
9904        let b = _mm256_set_epi64x(5, 6, 7, 8);
9905        let r = _mm256_maskz_mullo_epi64(0b0110, a, b);
9906        let e = _mm256_set_epi64x(0, 12, 21, 0);
9907        assert_eq_m256i(r, e);
9908    }
9909
9910    #[simd_test(enable = "avx512dq")]
9911    const unsafe fn test_mm512_mullo_epi64() {
9912        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9913        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9914        let r = _mm512_mullo_epi64(a, b);
9915        let e = _mm512_set_epi64(9, 20, 33, 48, 65, 84, 105, 128);
9916        assert_eq_m512i(r, e);
9917    }
9918
9919    #[simd_test(enable = "avx512dq")]
9920    const unsafe fn test_mm512_mask_mullo_epi64() {
9921        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9922        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9923        let c = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
9924        let r = _mm512_mask_mullo_epi64(c, 0b01101001, a, b);
9925        let e = _mm512_set_epi64(17, 20, 33, 20, 65, 22, 23, 128);
9926        assert_eq_m512i(r, e);
9927    }
9928
9929    #[simd_test(enable = "avx512dq")]
9930    const unsafe fn test_mm512_maskz_mullo_epi64() {
9931        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9932        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9933        let r = _mm512_maskz_mullo_epi64(0b01101001, a, b);
9934        let e = _mm512_set_epi64(0, 20, 33, 0, 65, 0, 0, 128);
9935        assert_eq_m512i(r, e);
9936    }
9937
9938    #[simd_test(enable = "avx512dq")]
9939    const unsafe fn test_cvtmask8_u32() {
9940        let a: __mmask8 = 0b01101001;
9941        let r = _cvtmask8_u32(a);
9942        let e: u32 = 0b01101001;
9943        assert_eq!(r, e);
9944    }
9945
9946    #[simd_test(enable = "avx512dq")]
9947    const unsafe fn test_cvtu32_mask8() {
9948        let a: u32 = 0b01101001;
9949        let r = _cvtu32_mask8(a);
9950        let e: __mmask8 = 0b01101001;
9951        assert_eq!(r, e);
9952    }
9953
9954    #[simd_test(enable = "avx512dq")]
9955    const unsafe fn test_kadd_mask16() {
9956        let a: __mmask16 = 27549;
9957        let b: __mmask16 = 23434;
9958        let r = _kadd_mask16(a, b);
9959        let e: __mmask16 = 50983;
9960        assert_eq!(r, e);
9961    }
9962
9963    #[simd_test(enable = "avx512dq")]
9964    const unsafe fn test_kadd_mask8() {
9965        let a: __mmask8 = 98;
9966        let b: __mmask8 = 117;
9967        let r = _kadd_mask8(a, b);
9968        let e: __mmask8 = 215;
9969        assert_eq!(r, e);
9970    }
9971
9972    #[simd_test(enable = "avx512dq")]
9973    const unsafe fn test_kand_mask8() {
9974        let a: __mmask8 = 0b01101001;
9975        let b: __mmask8 = 0b10110011;
9976        let r = _kand_mask8(a, b);
9977        let e: __mmask8 = 0b00100001;
9978        assert_eq!(r, e);
9979    }
9980
9981    #[simd_test(enable = "avx512dq")]
9982    const unsafe fn test_kandn_mask8() {
9983        let a: __mmask8 = 0b01101001;
9984        let b: __mmask8 = 0b10110011;
9985        let r = _kandn_mask8(a, b);
9986        let e: __mmask8 = 0b10010010;
9987        assert_eq!(r, e);
9988    }
9989
9990    #[simd_test(enable = "avx512dq")]
9991    const unsafe fn test_knot_mask8() {
9992        let a: __mmask8 = 0b01101001;
9993        let r = _knot_mask8(a);
9994        let e: __mmask8 = 0b10010110;
9995        assert_eq!(r, e);
9996    }
9997
9998    #[simd_test(enable = "avx512dq")]
9999    const unsafe fn test_kor_mask8() {
10000        let a: __mmask8 = 0b01101001;
10001        let b: __mmask8 = 0b10110011;
10002        let r = _kor_mask8(a, b);
10003        let e: __mmask8 = 0b11111011;
10004        assert_eq!(r, e);
10005    }
10006
10007    #[simd_test(enable = "avx512dq")]
10008    const unsafe fn test_kxnor_mask8() {
10009        let a: __mmask8 = 0b01101001;
10010        let b: __mmask8 = 0b10110011;
10011        let r = _kxnor_mask8(a, b);
10012        let e: __mmask8 = 0b00100101;
10013        assert_eq!(r, e);
10014    }
10015
10016    #[simd_test(enable = "avx512dq")]
10017    const unsafe fn test_kxor_mask8() {
10018        let a: __mmask8 = 0b01101001;
10019        let b: __mmask8 = 0b10110011;
10020        let r = _kxor_mask8(a, b);
10021        let e: __mmask8 = 0b11011010;
10022        assert_eq!(r, e);
10023    }
10024
10025    #[simd_test(enable = "avx512dq")]
10026    const unsafe fn test_kortest_mask8_u8() {
10027        let a: __mmask8 = 0b01101001;
10028        let b: __mmask8 = 0b10110110;
10029        let mut all_ones: u8 = 0;
10030        let r = _kortest_mask8_u8(a, b, &mut all_ones);
10031        assert_eq!(r, 0);
10032        assert_eq!(all_ones, 1);
10033    }
10034
10035    #[simd_test(enable = "avx512dq")]
10036    const unsafe fn test_kortestc_mask8_u8() {
10037        let a: __mmask8 = 0b01101001;
10038        let b: __mmask8 = 0b10110110;
10039        let r = _kortestc_mask8_u8(a, b);
10040        assert_eq!(r, 1);
10041    }
10042
10043    #[simd_test(enable = "avx512dq")]
10044    const unsafe fn test_kortestz_mask8_u8() {
10045        let a: __mmask8 = 0b01101001;
10046        let b: __mmask8 = 0b10110110;
10047        let r = _kortestz_mask8_u8(a, b);
10048        assert_eq!(r, 0);
10049    }
10050
10051    #[simd_test(enable = "avx512dq")]
10052    const unsafe fn test_kshiftli_mask8() {
10053        let a: __mmask8 = 0b01101001;
10054        let r = _kshiftli_mask8::<3>(a);
10055        let e: __mmask8 = 0b01001000;
10056        assert_eq!(r, e);
10057
10058        let r = _kshiftli_mask8::<7>(a);
10059        let e: __mmask8 = 0b10000000;
10060        assert_eq!(r, e);
10061
10062        let r = _kshiftli_mask8::<8>(a);
10063        let e: __mmask8 = 0b00000000;
10064        assert_eq!(r, e);
10065
10066        let r = _kshiftli_mask8::<9>(a);
10067        let e: __mmask8 = 0b00000000;
10068        assert_eq!(r, e);
10069    }
10070
10071    #[simd_test(enable = "avx512dq")]
10072    const unsafe fn test_kshiftri_mask8() {
10073        let a: __mmask8 = 0b10101001;
10074        let r = _kshiftri_mask8::<3>(a);
10075        let e: __mmask8 = 0b00010101;
10076        assert_eq!(r, e);
10077
10078        let r = _kshiftri_mask8::<7>(a);
10079        let e: __mmask8 = 0b00000001;
10080        assert_eq!(r, e);
10081
10082        let r = _kshiftri_mask8::<8>(a);
10083        let e: __mmask8 = 0b00000000;
10084        assert_eq!(r, e);
10085
10086        let r = _kshiftri_mask8::<9>(a);
10087        let e: __mmask8 = 0b00000000;
10088        assert_eq!(r, e);
10089    }
10090
10091    #[simd_test(enable = "avx512dq")]
10092    const unsafe fn test_ktest_mask8_u8() {
10093        let a: __mmask8 = 0b01101001;
10094        let b: __mmask8 = 0b10010110;
10095        let mut and_not: u8 = 0;
10096        let r = _ktest_mask8_u8(a, b, &mut and_not);
10097        assert_eq!(r, 1);
10098        assert_eq!(and_not, 0);
10099    }
10100
10101    #[simd_test(enable = "avx512dq")]
10102    const unsafe fn test_ktestc_mask8_u8() {
10103        let a: __mmask8 = 0b01101001;
10104        let b: __mmask8 = 0b10010110;
10105        let r = _ktestc_mask8_u8(a, b);
10106        assert_eq!(r, 0);
10107    }
10108
10109    #[simd_test(enable = "avx512dq")]
10110    const unsafe fn test_ktestz_mask8_u8() {
10111        let a: __mmask8 = 0b01101001;
10112        let b: __mmask8 = 0b10010110;
10113        let r = _ktestz_mask8_u8(a, b);
10114        assert_eq!(r, 1);
10115    }
10116
10117    #[simd_test(enable = "avx512dq")]
10118    const unsafe fn test_ktest_mask16_u8() {
10119        let a: __mmask16 = 0b0110100100111100;
10120        let b: __mmask16 = 0b1001011011000011;
10121        let mut and_not: u8 = 0;
10122        let r = _ktest_mask16_u8(a, b, &mut and_not);
10123        assert_eq!(r, 1);
10124        assert_eq!(and_not, 0);
10125    }
10126
10127    #[simd_test(enable = "avx512dq")]
10128    const unsafe fn test_ktestc_mask16_u8() {
10129        let a: __mmask16 = 0b0110100100111100;
10130        let b: __mmask16 = 0b1001011011000011;
10131        let r = _ktestc_mask16_u8(a, b);
10132        assert_eq!(r, 0);
10133    }
10134
10135    #[simd_test(enable = "avx512dq")]
10136    const unsafe fn test_ktestz_mask16_u8() {
10137        let a: __mmask16 = 0b0110100100111100;
10138        let b: __mmask16 = 0b1001011011000011;
10139        let r = _ktestz_mask16_u8(a, b);
10140        assert_eq!(r, 1);
10141    }
10142
10143    #[simd_test(enable = "avx512dq")]
10144    const unsafe fn test_load_mask8() {
10145        let a: __mmask8 = 0b01101001;
10146        let r = _load_mask8(&a);
10147        let e: __mmask8 = 0b01101001;
10148        assert_eq!(r, e);
10149    }
10150
10151    #[simd_test(enable = "avx512dq")]
10152    const unsafe fn test_store_mask8() {
10153        let a: __mmask8 = 0b01101001;
10154        let mut r = 0;
10155        _store_mask8(&mut r, a);
10156        let e: __mmask8 = 0b01101001;
10157        assert_eq!(r, e);
10158    }
10159
10160    #[simd_test(enable = "avx512dq,avx512vl")]
10161    const unsafe fn test_mm_movepi32_mask() {
10162        let a = _mm_set_epi32(0, -2, -3, 4);
10163        let r = _mm_movepi32_mask(a);
10164        let e = 0b0110;
10165        assert_eq!(r, e);
10166    }
10167
10168    #[simd_test(enable = "avx512dq,avx512vl")]
10169    const unsafe fn test_mm256_movepi32_mask() {
10170        let a = _mm256_set_epi32(0, -2, -3, 4, -5, 6, 7, -8);
10171        let r = _mm256_movepi32_mask(a);
10172        let e = 0b01101001;
10173        assert_eq!(r, e);
10174    }
10175
10176    #[simd_test(enable = "avx512dq")]
10177    const unsafe fn test_mm512_movepi32_mask() {
10178        let a = _mm512_set_epi32(
10179            0, -2, -3, 4, -5, 6, 7, -8, 9, 10, -11, -12, -13, -14, 15, 16,
10180        );
10181        let r = _mm512_movepi32_mask(a);
10182        let e = 0b0110100100111100;
10183        assert_eq!(r, e);
10184    }
10185
10186    #[simd_test(enable = "avx512dq,avx512vl")]
10187    const unsafe fn test_mm_movepi64_mask() {
10188        let a = _mm_set_epi64x(0, -2);
10189        let r = _mm_movepi64_mask(a);
10190        let e = 0b01;
10191        assert_eq!(r, e);
10192    }
10193
10194    #[simd_test(enable = "avx512dq,avx512vl")]
10195    const unsafe fn test_mm256_movepi64_mask() {
10196        let a = _mm256_set_epi64x(0, -2, -3, 4);
10197        let r = _mm256_movepi64_mask(a);
10198        let e = 0b0110;
10199        assert_eq!(r, e);
10200    }
10201
10202    #[simd_test(enable = "avx512dq")]
10203    const unsafe fn test_mm512_movepi64_mask() {
10204        let a = _mm512_set_epi64(0, -2, -3, 4, -5, 6, 7, -8);
10205        let r = _mm512_movepi64_mask(a);
10206        let e = 0b01101001;
10207        assert_eq!(r, e);
10208    }
10209
10210    #[simd_test(enable = "avx512dq,avx512vl")]
10211    const unsafe fn test_mm_movm_epi32() {
10212        let a = 0b0110;
10213        let r = _mm_movm_epi32(a);
10214        let e = _mm_set_epi32(0, -1, -1, 0);
10215        assert_eq_m128i(r, e);
10216    }
10217
10218    #[simd_test(enable = "avx512dq,avx512vl")]
10219    const unsafe fn test_mm256_movm_epi32() {
10220        let a = 0b01101001;
10221        let r = _mm256_movm_epi32(a);
10222        let e = _mm256_set_epi32(0, -1, -1, 0, -1, 0, 0, -1);
10223        assert_eq_m256i(r, e);
10224    }
10225
10226    #[simd_test(enable = "avx512dq")]
10227    const unsafe fn test_mm512_movm_epi32() {
10228        let a = 0b0110100100111100;
10229        let r = _mm512_movm_epi32(a);
10230        let e = _mm512_set_epi32(0, -1, -1, 0, -1, 0, 0, -1, 0, 0, -1, -1, -1, -1, 0, 0);
10231        assert_eq_m512i(r, e);
10232    }
10233
10234    #[simd_test(enable = "avx512dq,avx512vl")]
10235    const unsafe fn test_mm_movm_epi64() {
10236        let a = 0b01;
10237        let r = _mm_movm_epi64(a);
10238        let e = _mm_set_epi64x(0, -1);
10239        assert_eq_m128i(r, e);
10240    }
10241
10242    #[simd_test(enable = "avx512dq,avx512vl")]
10243    const unsafe fn test_mm256_movm_epi64() {
10244        let a = 0b0110;
10245        let r = _mm256_movm_epi64(a);
10246        let e = _mm256_set_epi64x(0, -1, -1, 0);
10247        assert_eq_m256i(r, e);
10248    }
10249
10250    #[simd_test(enable = "avx512dq")]
10251    const unsafe fn test_mm512_movm_epi64() {
10252        let a = 0b01101001;
10253        let r = _mm512_movm_epi64(a);
10254        let e = _mm512_set_epi64(0, -1, -1, 0, -1, 0, 0, -1);
10255        assert_eq_m512i(r, e);
10256    }
10257
10258    #[simd_test(enable = "avx512dq")]
10259    unsafe fn test_mm512_range_round_pd() {
10260        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10261        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10262        let r = _mm512_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10263        let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.);
10264        assert_eq_m512d(r, e);
10265    }
10266
10267    #[simd_test(enable = "avx512dq")]
10268    unsafe fn test_mm512_mask_range_round_pd() {
10269        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10270        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10271        let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
10272        let r = _mm512_mask_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b01101001, a, b);
10273        let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.);
10274        assert_eq_m512d(r, e);
10275    }
10276
10277    #[simd_test(enable = "avx512dq")]
10278    unsafe fn test_mm512_maskz_range_round_pd() {
10279        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10280        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10281        let r = _mm512_maskz_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(0b01101001, a, b);
10282        let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.);
10283        assert_eq_m512d(r, e);
10284    }
10285
10286    #[simd_test(enable = "avx512dq,avx512vl")]
10287    unsafe fn test_mm_range_pd() {
10288        let a = _mm_set_pd(1., 2.);
10289        let b = _mm_set_pd(2., 1.);
10290        let r = _mm_range_pd::<0b0101>(a, b);
10291        let e = _mm_set_pd(2., 2.);
10292        assert_eq_m128d(r, e);
10293    }
10294
10295    #[simd_test(enable = "avx512dq,avx512vl")]
10296    unsafe fn test_mm_mask_range_pd() {
10297        let a = _mm_set_pd(1., 2.);
10298        let b = _mm_set_pd(2., 1.);
10299        let c = _mm_set_pd(3., 4.);
10300        let r = _mm_mask_range_pd::<0b0101>(c, 0b01, a, b);
10301        let e = _mm_set_pd(3., 2.);
10302        assert_eq_m128d(r, e);
10303    }
10304
10305    #[simd_test(enable = "avx512dq,avx512vl")]
10306    unsafe fn test_mm_maskz_range_pd() {
10307        let a = _mm_set_pd(1., 2.);
10308        let b = _mm_set_pd(2., 1.);
10309        let r = _mm_maskz_range_pd::<0b0101>(0b01, a, b);
10310        let e = _mm_set_pd(0., 2.);
10311        assert_eq_m128d(r, e);
10312    }
10313
10314    #[simd_test(enable = "avx512dq,avx512vl")]
10315    unsafe fn test_mm256_range_pd() {
10316        let a = _mm256_set_pd(1., 2., 3., 4.);
10317        let b = _mm256_set_pd(2., 1., 4., 3.);
10318        let r = _mm256_range_pd::<0b0101>(a, b);
10319        let e = _mm256_set_pd(2., 2., 4., 4.);
10320        assert_eq_m256d(r, e);
10321    }
10322
10323    #[simd_test(enable = "avx512dq,avx512vl")]
10324    unsafe fn test_mm256_mask_range_pd() {
10325        let a = _mm256_set_pd(1., 2., 3., 4.);
10326        let b = _mm256_set_pd(2., 1., 4., 3.);
10327        let c = _mm256_set_pd(5., 6., 7., 8.);
10328        let r = _mm256_mask_range_pd::<0b0101>(c, 0b0110, a, b);
10329        let e = _mm256_set_pd(5., 2., 4., 8.);
10330        assert_eq_m256d(r, e);
10331    }
10332
10333    #[simd_test(enable = "avx512dq,avx512vl")]
10334    unsafe fn test_mm256_maskz_range_pd() {
10335        let a = _mm256_set_pd(1., 2., 3., 4.);
10336        let b = _mm256_set_pd(2., 1., 4., 3.);
10337        let r = _mm256_maskz_range_pd::<0b0101>(0b0110, a, b);
10338        let e = _mm256_set_pd(0., 2., 4., 0.);
10339        assert_eq_m256d(r, e);
10340    }
10341
10342    #[simd_test(enable = "avx512dq")]
10343    unsafe fn test_mm512_range_pd() {
10344        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10345        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10346        let r = _mm512_range_pd::<0b0101>(a, b);
10347        let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.);
10348        assert_eq_m512d(r, e);
10349    }
10350
10351    #[simd_test(enable = "avx512dq")]
10352    unsafe fn test_mm512_mask_range_pd() {
10353        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10354        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10355        let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
10356        let r = _mm512_mask_range_pd::<0b0101>(c, 0b01101001, a, b);
10357        let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.);
10358        assert_eq_m512d(r, e);
10359    }
10360
10361    #[simd_test(enable = "avx512dq")]
10362    unsafe fn test_mm512_maskz_range_pd() {
10363        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10364        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10365        let r = _mm512_maskz_range_pd::<0b0101>(0b01101001, a, b);
10366        let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.);
10367        assert_eq_m512d(r, e);
10368    }
10369
10370    #[simd_test(enable = "avx512dq")]
10371    unsafe fn test_mm512_range_round_ps() {
10372        let a = _mm512_set_ps(
10373            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10374        );
10375        let b = _mm512_set_ps(
10376            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10377        );
10378        let r = _mm512_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10379        let e = _mm512_set_ps(
10380            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
10381        );
10382        assert_eq_m512(r, e);
10383    }
10384
10385    #[simd_test(enable = "avx512dq")]
10386    unsafe fn test_mm512_mask_range_round_ps() {
10387        let a = _mm512_set_ps(
10388            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10389        );
10390        let b = _mm512_set_ps(
10391            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10392        );
10393        let c = _mm512_set_ps(
10394            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
10395        );
10396        let r =
10397            _mm512_mask_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0110100100111100, a, b);
10398        let e = _mm512_set_ps(
10399            17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32.,
10400        );
10401        assert_eq_m512(r, e);
10402    }
10403
10404    #[simd_test(enable = "avx512dq")]
10405    unsafe fn test_mm512_maskz_range_round_ps() {
10406        let a = _mm512_set_ps(
10407            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10408        );
10409        let b = _mm512_set_ps(
10410            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10411        );
10412        let r = _mm512_maskz_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(0b0110100100111100, a, b);
10413        let e = _mm512_set_ps(
10414            0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0.,
10415        );
10416        assert_eq_m512(r, e);
10417    }
10418
10419    #[simd_test(enable = "avx512dq,avx512vl")]
10420    unsafe fn test_mm_range_ps() {
10421        let a = _mm_set_ps(1., 2., 3., 4.);
10422        let b = _mm_set_ps(2., 1., 4., 3.);
10423        let r = _mm_range_ps::<0b0101>(a, b);
10424        let e = _mm_set_ps(2., 2., 4., 4.);
10425        assert_eq_m128(r, e);
10426    }
10427
10428    #[simd_test(enable = "avx512dq,avx512vl")]
10429    unsafe fn test_mm_mask_range_ps() {
10430        let a = _mm_set_ps(1., 2., 3., 4.);
10431        let b = _mm_set_ps(2., 1., 4., 3.);
10432        let c = _mm_set_ps(5., 6., 7., 8.);
10433        let r = _mm_mask_range_ps::<0b0101>(c, 0b0110, a, b);
10434        let e = _mm_set_ps(5., 2., 4., 8.);
10435        assert_eq_m128(r, e);
10436    }
10437
10438    #[simd_test(enable = "avx512dq,avx512vl")]
10439    unsafe fn test_mm_maskz_range_ps() {
10440        let a = _mm_set_ps(1., 2., 3., 4.);
10441        let b = _mm_set_ps(2., 1., 4., 3.);
10442        let r = _mm_maskz_range_ps::<0b0101>(0b0110, a, b);
10443        let e = _mm_set_ps(0., 2., 4., 0.);
10444        assert_eq_m128(r, e);
10445    }
10446
10447    #[simd_test(enable = "avx512dq,avx512vl")]
10448    unsafe fn test_mm256_range_ps() {
10449        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
10450        let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
10451        let r = _mm256_range_ps::<0b0101>(a, b);
10452        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
10453        assert_eq_m256(r, e);
10454    }
10455
10456    #[simd_test(enable = "avx512dq,avx512vl")]
10457    unsafe fn test_mm256_mask_range_ps() {
10458        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
10459        let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
10460        let c = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
10461        let r = _mm256_mask_range_ps::<0b0101>(c, 0b01101001, a, b);
10462        let e = _mm256_set_ps(9., 2., 4., 12., 6., 14., 15., 8.);
10463        assert_eq_m256(r, e);
10464    }
10465
10466    #[simd_test(enable = "avx512dq,avx512vl")]
10467    unsafe fn test_mm256_maskz_range_ps() {
10468        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
10469        let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
10470        let r = _mm256_maskz_range_ps::<0b0101>(0b01101001, a, b);
10471        let e = _mm256_set_ps(0., 2., 4., 0., 6., 0., 0., 8.);
10472        assert_eq_m256(r, e);
10473    }
10474
10475    #[simd_test(enable = "avx512dq")]
10476    unsafe fn test_mm512_range_ps() {
10477        let a = _mm512_set_ps(
10478            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10479        );
10480        let b = _mm512_set_ps(
10481            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10482        );
10483        let r = _mm512_range_ps::<0b0101>(a, b);
10484        let e = _mm512_set_ps(
10485            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
10486        );
10487        assert_eq_m512(r, e);
10488    }
10489
10490    #[simd_test(enable = "avx512dq")]
10491    unsafe fn test_mm512_mask_range_ps() {
10492        let a = _mm512_set_ps(
10493            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10494        );
10495        let b = _mm512_set_ps(
10496            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10497        );
10498        let c = _mm512_set_ps(
10499            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
10500        );
10501        let r = _mm512_mask_range_ps::<0b0101>(c, 0b0110100100111100, a, b);
10502        let e = _mm512_set_ps(
10503            17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32.,
10504        );
10505        assert_eq_m512(r, e);
10506    }
10507
10508    #[simd_test(enable = "avx512dq")]
10509    unsafe fn test_mm512_maskz_range_ps() {
10510        let a = _mm512_set_ps(
10511            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10512        );
10513        let b = _mm512_set_ps(
10514            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10515        );
10516        let r = _mm512_maskz_range_ps::<0b0101>(0b0110100100111100, a, b);
10517        let e = _mm512_set_ps(
10518            0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0.,
10519        );
10520        assert_eq_m512(r, e);
10521    }
10522
10523    #[simd_test(enable = "avx512dq")]
10524    unsafe fn test_mm_range_round_sd() {
10525        let a = _mm_set_sd(1.);
10526        let b = _mm_set_sd(2.);
10527        let r = _mm_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10528        let e = _mm_set_sd(2.);
10529        assert_eq_m128d(r, e);
10530    }
10531
10532    #[simd_test(enable = "avx512dq")]
10533    unsafe fn test_mm_mask_range_round_sd() {
10534        let a = _mm_set_sd(1.);
10535        let b = _mm_set_sd(2.);
10536        let c = _mm_set_sd(3.);
10537        let r = _mm_mask_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b);
10538        let e = _mm_set_sd(3.);
10539        assert_eq_m128d(r, e);
10540    }
10541
10542    #[simd_test(enable = "avx512dq")]
10543    unsafe fn test_mm_maskz_range_round_sd() {
10544        let a = _mm_set_sd(1.);
10545        let b = _mm_set_sd(2.);
10546        let r = _mm_maskz_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b);
10547        let e = _mm_set_sd(0.);
10548        assert_eq_m128d(r, e);
10549    }
10550
10551    #[simd_test(enable = "avx512dq")]
10552    unsafe fn test_mm_mask_range_sd() {
10553        let a = _mm_set_sd(1.);
10554        let b = _mm_set_sd(2.);
10555        let c = _mm_set_sd(3.);
10556        let r = _mm_mask_range_sd::<0b0101>(c, 0b0, a, b);
10557        let e = _mm_set_sd(3.);
10558        assert_eq_m128d(r, e);
10559    }
10560
10561    #[simd_test(enable = "avx512dq")]
10562    unsafe fn test_mm_maskz_range_sd() {
10563        let a = _mm_set_sd(1.);
10564        let b = _mm_set_sd(2.);
10565        let r = _mm_maskz_range_sd::<0b0101>(0b0, a, b);
10566        let e = _mm_set_sd(0.);
10567        assert_eq_m128d(r, e);
10568    }
10569
10570    #[simd_test(enable = "avx512dq")]
10571    unsafe fn test_mm_range_round_ss() {
10572        let a = _mm_set_ss(1.);
10573        let b = _mm_set_ss(2.);
10574        let r = _mm_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10575        let e = _mm_set_ss(2.);
10576        assert_eq_m128(r, e);
10577    }
10578
10579    #[simd_test(enable = "avx512dq")]
10580    unsafe fn test_mm_mask_range_round_ss() {
10581        let a = _mm_set_ss(1.);
10582        let b = _mm_set_ss(2.);
10583        let c = _mm_set_ss(3.);
10584        let r = _mm_mask_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b);
10585        let e = _mm_set_ss(3.);
10586        assert_eq_m128(r, e);
10587    }
10588
10589    #[simd_test(enable = "avx512dq")]
10590    unsafe fn test_mm_maskz_range_round_ss() {
10591        let a = _mm_set_ss(1.);
10592        let b = _mm_set_ss(2.);
10593        let r = _mm_maskz_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b);
10594        let e = _mm_set_ss(0.);
10595        assert_eq_m128(r, e);
10596    }
10597
10598    #[simd_test(enable = "avx512dq")]
10599    unsafe fn test_mm_mask_range_ss() {
10600        let a = _mm_set_ss(1.);
10601        let b = _mm_set_ss(2.);
10602        let c = _mm_set_ss(3.);
10603        let r = _mm_mask_range_ss::<0b0101>(c, 0b0, a, b);
10604        let e = _mm_set_ss(3.);
10605        assert_eq_m128(r, e);
10606    }
10607
10608    #[simd_test(enable = "avx512dq")]
10609    unsafe fn test_mm_maskz_range_ss() {
10610        let a = _mm_set_ss(1.);
10611        let b = _mm_set_ss(2.);
10612        let r = _mm_maskz_range_ss::<0b0101>(0b0, a, b);
10613        let e = _mm_set_ss(0.);
10614        assert_eq_m128(r, e);
10615    }
10616
10617    #[simd_test(enable = "avx512dq")]
10618    unsafe fn test_mm512_reduce_round_pd() {
10619        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10620        let r = _mm512_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
10621        let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
10622        assert_eq_m512d(r, e);
10623    }
10624
10625    #[simd_test(enable = "avx512dq")]
10626    unsafe fn test_mm512_mask_reduce_round_pd() {
10627        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10628        let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
10629        let r = _mm512_mask_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10630            src, 0b01101001, a,
10631        );
10632        let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
10633        assert_eq_m512d(r, e);
10634    }
10635
10636    #[simd_test(enable = "avx512dq")]
10637    unsafe fn test_mm512_maskz_reduce_round_pd() {
10638        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10639        let r = _mm512_maskz_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10640            0b01101001, a,
10641        );
10642        let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
10643        assert_eq_m512d(r, e);
10644    }
10645
10646    #[simd_test(enable = "avx512dq,avx512vl")]
10647    unsafe fn test_mm_reduce_pd() {
10648        let a = _mm_set_pd(0.25, 0.50);
10649        let r = _mm_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10650        let e = _mm_set_pd(0.25, 0.);
10651        assert_eq_m128d(r, e);
10652    }
10653
10654    #[simd_test(enable = "avx512dq,avx512vl")]
10655    unsafe fn test_mm_mask_reduce_pd() {
10656        let a = _mm_set_pd(0.25, 0.50);
10657        let src = _mm_set_pd(3., 4.);
10658        let r = _mm_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01, a);
10659        let e = _mm_set_pd(3., 0.);
10660        assert_eq_m128d(r, e);
10661    }
10662
10663    #[simd_test(enable = "avx512dq,avx512vl")]
10664    unsafe fn test_mm_maskz_reduce_pd() {
10665        let a = _mm_set_pd(0.25, 0.50);
10666        let r = _mm_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01, a);
10667        let e = _mm_set_pd(0., 0.);
10668        assert_eq_m128d(r, e);
10669    }
10670
10671    #[simd_test(enable = "avx512dq,avx512vl")]
10672    unsafe fn test_mm256_reduce_pd() {
10673        let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
10674        let r = _mm256_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10675        let e = _mm256_set_pd(0.25, 0., 0.25, 0.);
10676        assert_eq_m256d(r, e);
10677    }
10678
10679    #[simd_test(enable = "avx512dq,avx512vl")]
10680    unsafe fn test_mm256_mask_reduce_pd() {
10681        let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
10682        let src = _mm256_set_pd(3., 4., 5., 6.);
10683        let r = _mm256_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a);
10684        let e = _mm256_set_pd(3., 0., 0.25, 6.);
10685        assert_eq_m256d(r, e);
10686    }
10687
10688    #[simd_test(enable = "avx512dq,avx512vl")]
10689    unsafe fn test_mm256_maskz_reduce_pd() {
10690        let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
10691        let r = _mm256_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a);
10692        let e = _mm256_set_pd(0., 0., 0.25, 0.);
10693        assert_eq_m256d(r, e);
10694    }
10695
10696    #[simd_test(enable = "avx512dq")]
10697    unsafe fn test_mm512_reduce_pd() {
10698        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10699        let r = _mm512_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10700        let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
10701        assert_eq_m512d(r, e);
10702    }
10703
10704    #[simd_test(enable = "avx512dq")]
10705    unsafe fn test_mm512_mask_reduce_pd() {
10706        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10707        let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
10708        let r = _mm512_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a);
10709        let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
10710        assert_eq_m512d(r, e);
10711    }
10712
10713    #[simd_test(enable = "avx512dq")]
10714    unsafe fn test_mm512_maskz_reduce_pd() {
10715        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10716        let r = _mm512_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a);
10717        let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
10718        assert_eq_m512d(r, e);
10719    }
10720
10721    #[simd_test(enable = "avx512dq")]
10722    unsafe fn test_mm512_reduce_round_ps() {
10723        let a = _mm512_set_ps(
10724            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10725            4.0,
10726        );
10727        let r = _mm512_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
10728        let e = _mm512_set_ps(
10729            0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.,
10730        );
10731        assert_eq_m512(r, e);
10732    }
10733
10734    #[simd_test(enable = "avx512dq")]
10735    unsafe fn test_mm512_mask_reduce_round_ps() {
10736        let a = _mm512_set_ps(
10737            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10738            4.0,
10739        );
10740        let src = _mm512_set_ps(
10741            5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
10742        );
10743        let r = _mm512_mask_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10744            src,
10745            0b0110100100111100,
10746            a,
10747        );
10748        let e = _mm512_set_ps(
10749            5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20.,
10750        );
10751        assert_eq_m512(r, e);
10752    }
10753
10754    #[simd_test(enable = "avx512dq")]
10755    unsafe fn test_mm512_maskz_reduce_round_ps() {
10756        let a = _mm512_set_ps(
10757            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10758            4.0,
10759        );
10760        let r = _mm512_maskz_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10761            0b0110100100111100,
10762            a,
10763        );
10764        let e = _mm512_set_ps(
10765            0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0.,
10766        );
10767        assert_eq_m512(r, e);
10768    }
10769
10770    #[simd_test(enable = "avx512dq,avx512vl")]
10771    unsafe fn test_mm_reduce_ps() {
10772        let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
10773        let r = _mm_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10774        let e = _mm_set_ps(0.25, 0., 0.25, 0.);
10775        assert_eq_m128(r, e);
10776    }
10777
10778    #[simd_test(enable = "avx512dq,avx512vl")]
10779    unsafe fn test_mm_mask_reduce_ps() {
10780        let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
10781        let src = _mm_set_ps(2., 3., 4., 5.);
10782        let r = _mm_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a);
10783        let e = _mm_set_ps(2., 0., 0.25, 5.);
10784        assert_eq_m128(r, e);
10785    }
10786
10787    #[simd_test(enable = "avx512dq,avx512vl")]
10788    unsafe fn test_mm_maskz_reduce_ps() {
10789        let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
10790        let r = _mm_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a);
10791        let e = _mm_set_ps(0., 0., 0.25, 0.);
10792        assert_eq_m128(r, e);
10793    }
10794
10795    #[simd_test(enable = "avx512dq,avx512vl")]
10796    unsafe fn test_mm256_reduce_ps() {
10797        let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10798        let r = _mm256_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10799        let e = _mm256_set_ps(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
10800        assert_eq_m256(r, e);
10801    }
10802
10803    #[simd_test(enable = "avx512dq,avx512vl")]
10804    unsafe fn test_mm256_mask_reduce_ps() {
10805        let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10806        let src = _mm256_set_ps(3., 4., 5., 6., 7., 8., 9., 10.);
10807        let r = _mm256_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a);
10808        let e = _mm256_set_ps(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
10809        assert_eq_m256(r, e);
10810    }
10811
10812    #[simd_test(enable = "avx512dq,avx512vl")]
10813    unsafe fn test_mm256_maskz_reduce_ps() {
10814        let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10815        let r = _mm256_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a);
10816        let e = _mm256_set_ps(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
10817        assert_eq_m256(r, e);
10818    }
10819
10820    #[simd_test(enable = "avx512dq")]
10821    unsafe fn test_mm512_reduce_ps() {
10822        let a = _mm512_set_ps(
10823            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10824            4.0,
10825        );
10826        let r = _mm512_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10827        let e = _mm512_set_ps(
10828            0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.,
10829        );
10830        assert_eq_m512(r, e);
10831    }
10832
10833    #[simd_test(enable = "avx512dq")]
10834    unsafe fn test_mm512_mask_reduce_ps() {
10835        let a = _mm512_set_ps(
10836            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10837            4.0,
10838        );
10839        let src = _mm512_set_ps(
10840            5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
10841        );
10842        let r = _mm512_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110100100111100, a);
10843        let e = _mm512_set_ps(
10844            5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20.,
10845        );
10846        assert_eq_m512(r, e);
10847    }
10848
10849    #[simd_test(enable = "avx512dq")]
10850    unsafe fn test_mm512_maskz_reduce_ps() {
10851        let a = _mm512_set_ps(
10852            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10853            4.0,
10854        );
10855        let r = _mm512_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110100100111100, a);
10856        let e = _mm512_set_ps(
10857            0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0.,
10858        );
10859        assert_eq_m512(r, e);
10860    }
10861
10862    #[simd_test(enable = "avx512dq")]
10863    unsafe fn test_mm_reduce_round_sd() {
10864        let a = _mm_set_pd(1., 2.);
10865        let b = _mm_set_sd(0.25);
10866        let r = _mm_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
10867        let e = _mm_set_pd(1., 0.25);
10868        assert_eq_m128d(r, e);
10869    }
10870
10871    #[simd_test(enable = "avx512dq")]
10872    unsafe fn test_mm_mask_reduce_round_sd() {
10873        let a = _mm_set_pd(1., 2.);
10874        let b = _mm_set_sd(0.25);
10875        let c = _mm_set_pd(3., 4.);
10876        let r = _mm_mask_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10877            c, 0b0, a, b,
10878        );
10879        let e = _mm_set_pd(1., 4.);
10880        assert_eq_m128d(r, e);
10881    }
10882
10883    #[simd_test(enable = "avx512dq")]
10884    unsafe fn test_mm_maskz_reduce_round_sd() {
10885        let a = _mm_set_pd(1., 2.);
10886        let b = _mm_set_sd(0.25);
10887        let r =
10888            _mm_maskz_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b);
10889        let e = _mm_set_pd(1., 0.);
10890        assert_eq_m128d(r, e);
10891    }
10892
10893    #[simd_test(enable = "avx512dq")]
10894    unsafe fn test_mm_reduce_sd() {
10895        let a = _mm_set_pd(1., 2.);
10896        let b = _mm_set_sd(0.25);
10897        let r = _mm_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
10898        let e = _mm_set_pd(1., 0.25);
10899        assert_eq_m128d(r, e);
10900    }
10901
10902    #[simd_test(enable = "avx512dq")]
10903    unsafe fn test_mm_mask_reduce_sd() {
10904        let a = _mm_set_pd(1., 2.);
10905        let b = _mm_set_sd(0.25);
10906        let c = _mm_set_pd(3., 4.);
10907        let r = _mm_mask_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b);
10908        let e = _mm_set_pd(1., 4.);
10909        assert_eq_m128d(r, e);
10910    }
10911
10912    #[simd_test(enable = "avx512dq")]
10913    unsafe fn test_mm_maskz_reduce_sd() {
10914        let a = _mm_set_pd(1., 2.);
10915        let b = _mm_set_sd(0.25);
10916        let r = _mm_maskz_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b);
10917        let e = _mm_set_pd(1., 0.);
10918        assert_eq_m128d(r, e);
10919    }
10920
10921    #[simd_test(enable = "avx512dq")]
10922    unsafe fn test_mm_reduce_round_ss() {
10923        let a = _mm_set_ps(1., 2., 3., 4.);
10924        let b = _mm_set_ss(0.25);
10925        let r = _mm_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
10926        let e = _mm_set_ps(1., 2., 3., 0.25);
10927        assert_eq_m128(r, e);
10928    }
10929
10930    #[simd_test(enable = "avx512dq")]
10931    unsafe fn test_mm_mask_reduce_round_ss() {
10932        let a = _mm_set_ps(1., 2., 3., 4.);
10933        let b = _mm_set_ss(0.25);
10934        let c = _mm_set_ps(5., 6., 7., 8.);
10935        let r = _mm_mask_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10936            c, 0b0, a, b,
10937        );
10938        let e = _mm_set_ps(1., 2., 3., 8.);
10939        assert_eq_m128(r, e);
10940    }
10941
10942    #[simd_test(enable = "avx512dq")]
10943    unsafe fn test_mm_maskz_reduce_round_ss() {
10944        let a = _mm_set_ps(1., 2., 3., 4.);
10945        let b = _mm_set_ss(0.25);
10946        let r =
10947            _mm_maskz_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b);
10948        let e = _mm_set_ps(1., 2., 3., 0.);
10949        assert_eq_m128(r, e);
10950    }
10951
10952    #[simd_test(enable = "avx512dq")]
10953    unsafe fn test_mm_reduce_ss() {
10954        let a = _mm_set_ps(1., 2., 3., 4.);
10955        let b = _mm_set_ss(0.25);
10956        let r = _mm_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
10957        let e = _mm_set_ps(1., 2., 3., 0.25);
10958        assert_eq_m128(r, e);
10959    }
10960
10961    #[simd_test(enable = "avx512dq")]
10962    unsafe fn test_mm_mask_reduce_ss() {
10963        let a = _mm_set_ps(1., 2., 3., 4.);
10964        let b = _mm_set_ss(0.25);
10965        let c = _mm_set_ps(5., 6., 7., 8.);
10966        let r = _mm_mask_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b);
10967        let e = _mm_set_ps(1., 2., 3., 8.);
10968        assert_eq_m128(r, e);
10969    }
10970
10971    #[simd_test(enable = "avx512dq")]
10972    unsafe fn test_mm_maskz_reduce_ss() {
10973        let a = _mm_set_ps(1., 2., 3., 4.);
10974        let b = _mm_set_ss(0.25);
10975        let r = _mm_maskz_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b);
10976        let e = _mm_set_ps(1., 2., 3., 0.);
10977        assert_eq_m128(r, e);
10978    }
10979
10980    #[simd_test(enable = "avx512dq,avx512vl")]
10981    unsafe fn test_mm_fpclass_pd_mask() {
10982        let a = _mm_set_pd(1., f64::INFINITY);
10983        let r = _mm_fpclass_pd_mask::<0x18>(a);
10984        let e = 0b01;
10985        assert_eq!(r, e);
10986    }
10987
10988    #[simd_test(enable = "avx512dq,avx512vl")]
10989    unsafe fn test_mm_mask_fpclass_pd_mask() {
10990        let a = _mm_set_pd(1., f64::INFINITY);
10991        let r = _mm_mask_fpclass_pd_mask::<0x18>(0b10, a);
10992        let e = 0b00;
10993        assert_eq!(r, e);
10994    }
10995
10996    #[simd_test(enable = "avx512dq,avx512vl")]
10997    unsafe fn test_mm256_fpclass_pd_mask() {
10998        let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0);
10999        let r = _mm256_fpclass_pd_mask::<0x18>(a);
11000        let e = 0b0110;
11001        assert_eq!(r, e);
11002    }
11003
11004    #[simd_test(enable = "avx512dq,avx512vl")]
11005    unsafe fn test_mm256_mask_fpclass_pd_mask() {
11006        let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0);
11007        let r = _mm256_mask_fpclass_pd_mask::<0x18>(0b1010, a);
11008        let e = 0b0010;
11009        assert_eq!(r, e);
11010    }
11011
11012    #[simd_test(enable = "avx512dq")]
11013    unsafe fn test_mm512_fpclass_pd_mask() {
11014        let a = _mm512_set_pd(
11015            1.,
11016            f64::INFINITY,
11017            f64::NEG_INFINITY,
11018            0.0,
11019            -0.0,
11020            -2.0,
11021            f64::NAN,
11022            1.0e-308,
11023        );
11024        let r = _mm512_fpclass_pd_mask::<0x18>(a);
11025        let e = 0b01100000;
11026        assert_eq!(r, e);
11027    }
11028
11029    #[simd_test(enable = "avx512dq")]
11030    unsafe fn test_mm512_mask_fpclass_pd_mask() {
11031        let a = _mm512_set_pd(
11032            1.,
11033            f64::INFINITY,
11034            f64::NEG_INFINITY,
11035            0.0,
11036            -0.0,
11037            -2.0,
11038            f64::NAN,
11039            1.0e-308,
11040        );
11041        let r = _mm512_mask_fpclass_pd_mask::<0x18>(0b10101010, a);
11042        let e = 0b00100000;
11043        assert_eq!(r, e);
11044    }
11045
11046    #[simd_test(enable = "avx512dq,avx512vl")]
11047    unsafe fn test_mm_fpclass_ps_mask() {
11048        let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0);
11049        let r = _mm_fpclass_ps_mask::<0x18>(a);
11050        let e = 0b0110;
11051        assert_eq!(r, e);
11052    }
11053
11054    #[simd_test(enable = "avx512dq,avx512vl")]
11055    unsafe fn test_mm_mask_fpclass_ps_mask() {
11056        let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0);
11057        let r = _mm_mask_fpclass_ps_mask::<0x18>(0b1010, a);
11058        let e = 0b0010;
11059        assert_eq!(r, e);
11060    }
11061
11062    #[simd_test(enable = "avx512dq,avx512vl")]
11063    unsafe fn test_mm256_fpclass_ps_mask() {
11064        let a = _mm256_set_ps(
11065            1.,
11066            f32::INFINITY,
11067            f32::NEG_INFINITY,
11068            0.0,
11069            -0.0,
11070            -2.0,
11071            f32::NAN,
11072            1.0e-38,
11073        );
11074        let r = _mm256_fpclass_ps_mask::<0x18>(a);
11075        let e = 0b01100000;
11076        assert_eq!(r, e);
11077    }
11078
11079    #[simd_test(enable = "avx512dq,avx512vl")]
11080    unsafe fn test_mm256_mask_fpclass_ps_mask() {
11081        let a = _mm256_set_ps(
11082            1.,
11083            f32::INFINITY,
11084            f32::NEG_INFINITY,
11085            0.0,
11086            -0.0,
11087            -2.0,
11088            f32::NAN,
11089            1.0e-38,
11090        );
11091        let r = _mm256_mask_fpclass_ps_mask::<0x18>(0b10101010, a);
11092        let e = 0b00100000;
11093        assert_eq!(r, e);
11094    }
11095
11096    #[simd_test(enable = "avx512dq")]
11097    unsafe fn test_mm512_fpclass_ps_mask() {
11098        let a = _mm512_set_ps(
11099            1.,
11100            f32::INFINITY,
11101            f32::NEG_INFINITY,
11102            0.0,
11103            -0.0,
11104            -2.0,
11105            f32::NAN,
11106            1.0e-38,
11107            -1.,
11108            f32::NEG_INFINITY,
11109            f32::INFINITY,
11110            -0.0,
11111            0.0,
11112            2.0,
11113            f32::NAN,
11114            -1.0e-38,
11115        );
11116        let r = _mm512_fpclass_ps_mask::<0x18>(a);
11117        let e = 0b0110000001100000;
11118        assert_eq!(r, e);
11119    }
11120
11121    #[simd_test(enable = "avx512dq")]
11122    unsafe fn test_mm512_mask_fpclass_ps_mask() {
11123        let a = _mm512_set_ps(
11124            1.,
11125            f32::INFINITY,
11126            f32::NEG_INFINITY,
11127            0.0,
11128            -0.0,
11129            -2.0,
11130            f32::NAN,
11131            1.0e-38,
11132            -1.,
11133            f32::NEG_INFINITY,
11134            f32::INFINITY,
11135            -0.0,
11136            0.0,
11137            2.0,
11138            f32::NAN,
11139            -1.0e-38,
11140        );
11141        let r = _mm512_mask_fpclass_ps_mask::<0x18>(0b1010101010101010, a);
11142        let e = 0b0010000000100000;
11143        assert_eq!(r, e);
11144    }
11145
11146    #[simd_test(enable = "avx512dq")]
11147    unsafe fn test_mm_fpclass_sd_mask() {
11148        let a = _mm_set_pd(1., f64::INFINITY);
11149        let r = _mm_fpclass_sd_mask::<0x18>(a);
11150        let e = 0b1;
11151        assert_eq!(r, e);
11152    }
11153
11154    #[simd_test(enable = "avx512dq")]
11155    unsafe fn test_mm_mask_fpclass_sd_mask() {
11156        let a = _mm_set_sd(f64::INFINITY);
11157        let r = _mm_mask_fpclass_sd_mask::<0x18>(0b0, a);
11158        let e = 0b0;
11159        assert_eq!(r, e);
11160    }
11161
11162    #[simd_test(enable = "avx512dq")]
11163    unsafe fn test_mm_fpclass_ss_mask() {
11164        let a = _mm_set_ss(f32::INFINITY);
11165        let r = _mm_fpclass_ss_mask::<0x18>(a);
11166        let e = 0b1;
11167        assert_eq!(r, e);
11168    }
11169
11170    #[simd_test(enable = "avx512dq")]
11171    unsafe fn test_mm_mask_fpclass_ss_mask() {
11172        let a = _mm_set_ss(f32::INFINITY);
11173        let r = _mm_mask_fpclass_ss_mask::<0x18>(0b0, a);
11174        let e = 0b0;
11175        assert_eq!(r, e);
11176    }
11177}