Skip to main content

core/stdarch/crates/core_arch/src/aarch64/neon/
mod.rs

1//! ARMv8 ASIMD intrinsics
2
3#![allow(non_camel_case_types)]
4
5#[rustfmt::skip]
6mod generated;
7#[rustfmt::skip]
8#[stable(feature = "neon_intrinsics", since = "1.59.0")]
9pub use self::generated::*;
10
11// FIXME: replace neon with asimd
12
13use crate::{
14    core_arch::{arm_shared::*, simd::*},
15    hint::unreachable_unchecked,
16    intrinsics::{simd::*, *},
17    mem::transmute,
18};
19#[cfg(test)]
20use stdarch_test::assert_instr;
21
22types! {
23    #![stable(feature = "neon_intrinsics", since = "1.59.0")]
24
25    /// ARM-specific 64-bit wide vector of one packed `f64`.
26    pub struct float64x1_t(1 x f64); // FIXME: check this!
27    /// ARM-specific 128-bit wide vector of two packed `f64`.
28    pub struct float64x2_t(2 x f64);
29}
30
31/// ARM-specific type containing two `float64x1_t` vectors.
32#[repr(C)]
33#[derive(Copy, Clone, Debug)]
34#[stable(feature = "neon_intrinsics", since = "1.59.0")]
35pub struct float64x1x2_t(pub float64x1_t, pub float64x1_t);
36/// ARM-specific type containing three `float64x1_t` vectors.
37#[repr(C)]
38#[derive(Copy, Clone, Debug)]
39#[stable(feature = "neon_intrinsics", since = "1.59.0")]
40pub struct float64x1x3_t(pub float64x1_t, pub float64x1_t, pub float64x1_t);
41/// ARM-specific type containing four `float64x1_t` vectors.
42#[repr(C)]
43#[derive(Copy, Clone, Debug)]
44#[stable(feature = "neon_intrinsics", since = "1.59.0")]
45pub struct float64x1x4_t(
46    pub float64x1_t,
47    pub float64x1_t,
48    pub float64x1_t,
49    pub float64x1_t,
50);
51
52/// ARM-specific type containing two `float64x2_t` vectors.
53#[repr(C)]
54#[derive(Copy, Clone, Debug)]
55#[stable(feature = "neon_intrinsics", since = "1.59.0")]
56pub struct float64x2x2_t(pub float64x2_t, pub float64x2_t);
57/// ARM-specific type containing three `float64x2_t` vectors.
58#[repr(C)]
59#[derive(Copy, Clone, Debug)]
60#[stable(feature = "neon_intrinsics", since = "1.59.0")]
61pub struct float64x2x3_t(pub float64x2_t, pub float64x2_t, pub float64x2_t);
62/// ARM-specific type containing four `float64x2_t` vectors.
63#[repr(C)]
64#[derive(Copy, Clone, Debug)]
65#[stable(feature = "neon_intrinsics", since = "1.59.0")]
66pub struct float64x2x4_t(
67    pub float64x2_t,
68    pub float64x2_t,
69    pub float64x2_t,
70    pub float64x2_t,
71);
72
73/// Duplicate vector element to vector or scalar
74#[inline]
75#[target_feature(enable = "neon")]
76#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
77#[rustc_legacy_const_generics(1, 3)]
78#[stable(feature = "neon_intrinsics", since = "1.59.0")]
79pub fn vcopy_lane_s64<const N1: i32, const N2: i32>(_a: int64x1_t, b: int64x1_t) -> int64x1_t {
80    static_assert!(N1 == 0);
81    static_assert!(N2 == 0);
82    b
83}
84
85/// Duplicate vector element to vector or scalar
86#[inline]
87#[target_feature(enable = "neon")]
88#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
89#[rustc_legacy_const_generics(1, 3)]
90#[stable(feature = "neon_intrinsics", since = "1.59.0")]
91pub fn vcopy_lane_u64<const N1: i32, const N2: i32>(_a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
92    static_assert!(N1 == 0);
93    static_assert!(N2 == 0);
94    b
95}
96
97/// Duplicate vector element to vector or scalar
98#[inline]
99#[target_feature(enable = "neon")]
100#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
101#[rustc_legacy_const_generics(1, 3)]
102#[stable(feature = "neon_intrinsics", since = "1.59.0")]
103pub fn vcopy_lane_p64<const N1: i32, const N2: i32>(_a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
104    static_assert!(N1 == 0);
105    static_assert!(N2 == 0);
106    b
107}
108
109/// Duplicate vector element to vector or scalar
110#[inline]
111#[target_feature(enable = "neon")]
112#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
113#[rustc_legacy_const_generics(1, 3)]
114#[stable(feature = "neon_intrinsics", since = "1.59.0")]
115pub fn vcopy_lane_f64<const N1: i32, const N2: i32>(
116    _a: float64x1_t,
117    b: float64x1_t,
118) -> float64x1_t {
119    static_assert!(N1 == 0);
120    static_assert!(N2 == 0);
121    b
122}
123
124/// Duplicate vector element to vector or scalar
125#[inline]
126#[target_feature(enable = "neon")]
127#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
128#[rustc_legacy_const_generics(1, 3)]
129#[stable(feature = "neon_intrinsics", since = "1.59.0")]
130pub fn vcopy_laneq_s64<const LANE1: i32, const LANE2: i32>(
131    _a: int64x1_t,
132    b: int64x2_t,
133) -> int64x1_t {
134    static_assert!(LANE1 == 0);
135    static_assert_uimm_bits!(LANE2, 1);
136    unsafe { transmute::<i64, _>(simd_extract!(b, LANE2 as u32)) }
137}
138
139/// Duplicate vector element to vector or scalar
140#[inline]
141#[target_feature(enable = "neon")]
142#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
143#[rustc_legacy_const_generics(1, 3)]
144#[stable(feature = "neon_intrinsics", since = "1.59.0")]
145pub fn vcopy_laneq_u64<const LANE1: i32, const LANE2: i32>(
146    _a: uint64x1_t,
147    b: uint64x2_t,
148) -> uint64x1_t {
149    static_assert!(LANE1 == 0);
150    static_assert_uimm_bits!(LANE2, 1);
151    unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
152}
153
154/// Duplicate vector element to vector or scalar
155#[inline]
156#[target_feature(enable = "neon")]
157#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
158#[rustc_legacy_const_generics(1, 3)]
159#[stable(feature = "neon_intrinsics", since = "1.59.0")]
160pub fn vcopy_laneq_p64<const LANE1: i32, const LANE2: i32>(
161    _a: poly64x1_t,
162    b: poly64x2_t,
163) -> poly64x1_t {
164    static_assert!(LANE1 == 0);
165    static_assert_uimm_bits!(LANE2, 1);
166    unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
167}
168
169/// Duplicate vector element to vector or scalar
170#[inline]
171#[target_feature(enable = "neon")]
172#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
173#[rustc_legacy_const_generics(1, 3)]
174#[stable(feature = "neon_intrinsics", since = "1.59.0")]
175pub fn vcopy_laneq_f64<const LANE1: i32, const LANE2: i32>(
176    _a: float64x1_t,
177    b: float64x2_t,
178) -> float64x1_t {
179    static_assert!(LANE1 == 0);
180    static_assert_uimm_bits!(LANE2, 1);
181    unsafe { transmute::<f64, _>(simd_extract!(b, LANE2 as u32)) }
182}
183
184/// Load multiple single-element structures to one, two, three, or four registers
185#[inline]
186#[target_feature(enable = "neon")]
187#[cfg_attr(test, assert_instr(ldr))]
188#[stable(feature = "neon_intrinsics", since = "1.59.0")]
189pub unsafe fn vld1_dup_f64(ptr: *const f64) -> float64x1_t {
190    vld1_f64(ptr)
191}
192
193/// Load multiple single-element structures to one, two, three, or four registers
194#[inline]
195#[target_feature(enable = "neon")]
196#[cfg_attr(test, assert_instr(ld1r))]
197#[stable(feature = "neon_intrinsics", since = "1.59.0")]
198pub unsafe fn vld1q_dup_f64(ptr: *const f64) -> float64x2_t {
199    let x = vld1q_lane_f64::<0>(ptr, transmute(f64x2::splat(0.)));
200    simd_shuffle!(x, x, [0, 0])
201}
202
203/// Load one single-element structure to one lane of one register.
204#[inline]
205#[target_feature(enable = "neon")]
206#[rustc_legacy_const_generics(2)]
207#[cfg_attr(test, assert_instr(ldr, LANE = 0))]
208#[stable(feature = "neon_intrinsics", since = "1.59.0")]
209pub unsafe fn vld1_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x1_t) -> float64x1_t {
210    static_assert!(LANE == 0);
211    simd_insert!(src, LANE as u32, *ptr)
212}
213
214/// Load one single-element structure to one lane of one register.
215#[inline]
216#[target_feature(enable = "neon")]
217#[rustc_legacy_const_generics(2)]
218#[cfg_attr(test, assert_instr(ld1, LANE = 1))]
219#[stable(feature = "neon_intrinsics", since = "1.59.0")]
220pub unsafe fn vld1q_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x2_t) -> float64x2_t {
221    static_assert_uimm_bits!(LANE, 1);
222    simd_insert!(src, LANE as u32, *ptr)
223}
224
225/// Bitwise Select instructions. This instruction sets each bit in the destination SIMD&FP register
226/// to the corresponding bit from the first source SIMD&FP register when the original
227/// destination bit was 1, otherwise from the second source SIMD&FP register.
228#[inline]
229#[target_feature(enable = "neon")]
230#[cfg_attr(test, assert_instr(bsl))]
231#[stable(feature = "neon_intrinsics", since = "1.59.0")]
232pub fn vbsl_f64(a: uint64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
233    let not = int64x1_t::splat(-1);
234    unsafe {
235        transmute(simd_or(
236            simd_and(a, transmute(b)),
237            simd_and(simd_xor(a, transmute(not)), transmute(c)),
238        ))
239    }
240}
241/// Bitwise Select.
242#[inline]
243#[target_feature(enable = "neon")]
244#[cfg_attr(test, assert_instr(bsl))]
245#[stable(feature = "neon_intrinsics", since = "1.59.0")]
246pub fn vbsl_p64(a: poly64x1_t, b: poly64x1_t, c: poly64x1_t) -> poly64x1_t {
247    let not = int64x1_t::splat(-1);
248    unsafe { simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c)) }
249}
250/// Bitwise Select. (128-bit)
251#[inline]
252#[target_feature(enable = "neon")]
253#[cfg_attr(test, assert_instr(bsl))]
254#[stable(feature = "neon_intrinsics", since = "1.59.0")]
255pub fn vbslq_f64(a: uint64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
256    let not = int64x2_t::splat(-1);
257    unsafe {
258        transmute(simd_or(
259            simd_and(a, transmute(b)),
260            simd_and(simd_xor(a, transmute(not)), transmute(c)),
261        ))
262    }
263}
264/// Bitwise Select. (128-bit)
265#[inline]
266#[target_feature(enable = "neon")]
267#[cfg_attr(test, assert_instr(bsl))]
268#[stable(feature = "neon_intrinsics", since = "1.59.0")]
269pub fn vbslq_p64(a: poly64x2_t, b: poly64x2_t, c: poly64x2_t) -> poly64x2_t {
270    let not = int64x2_t::splat(-1);
271    unsafe { simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c)) }
272}
273
274/// Vector add.
275#[inline]
276#[target_feature(enable = "neon")]
277#[cfg_attr(test, assert_instr(fadd))]
278#[stable(feature = "neon_intrinsics", since = "1.59.0")]
279pub fn vadd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
280    unsafe { simd_add(a, b) }
281}
282
283/// Vector add.
284#[inline]
285#[target_feature(enable = "neon")]
286#[cfg_attr(test, assert_instr(fadd))]
287#[stable(feature = "neon_intrinsics", since = "1.59.0")]
288pub fn vaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
289    unsafe { simd_add(a, b) }
290}
291
292/// Vector add.
293#[inline]
294#[target_feature(enable = "neon")]
295#[cfg_attr(test, assert_instr(add))]
296#[stable(feature = "neon_intrinsics", since = "1.59.0")]
297pub fn vadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
298    unsafe { simd_add(a, b) }
299}
300
301/// Vector add.
302#[inline]
303#[target_feature(enable = "neon")]
304#[cfg_attr(test, assert_instr(add))]
305#[stable(feature = "neon_intrinsics", since = "1.59.0")]
306pub fn vadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
307    unsafe { simd_add(a, b) }
308}
309
310/// Vector add.
311#[inline]
312#[target_feature(enable = "neon")]
313#[cfg_attr(test, assert_instr(add))]
314#[stable(feature = "neon_intrinsics", since = "1.59.0")]
315pub fn vaddd_s64(a: i64, b: i64) -> i64 {
316    a.wrapping_add(b)
317}
318
319/// Vector add.
320#[inline]
321#[target_feature(enable = "neon")]
322#[cfg_attr(test, assert_instr(add))]
323#[stable(feature = "neon_intrinsics", since = "1.59.0")]
324pub fn vaddd_u64(a: u64, b: u64) -> u64 {
325    a.wrapping_add(b)
326}
327
328/// Extract vector from pair of vectors
329#[inline]
330#[target_feature(enable = "neon")]
331#[cfg_attr(test, assert_instr(nop, N = 0))]
332#[rustc_legacy_const_generics(2)]
333#[stable(feature = "neon_intrinsics", since = "1.59.0")]
334pub fn vext_p64<const N: i32>(a: poly64x1_t, _b: poly64x1_t) -> poly64x1_t {
335    static_assert!(N == 0);
336    a
337}
338
339/// Extract vector from pair of vectors
340#[inline]
341#[target_feature(enable = "neon")]
342#[cfg_attr(test, assert_instr(nop, N = 0))]
343#[rustc_legacy_const_generics(2)]
344#[stable(feature = "neon_intrinsics", since = "1.59.0")]
345pub fn vext_f64<const N: i32>(a: float64x1_t, _b: float64x1_t) -> float64x1_t {
346    static_assert!(N == 0);
347    a
348}
349
350/// Duplicate vector element to vector or scalar
351#[inline]
352#[target_feature(enable = "neon")]
353#[cfg_attr(test, assert_instr(fmov))]
354#[stable(feature = "neon_intrinsics", since = "1.59.0")]
355pub fn vdup_n_p64(value: p64) -> poly64x1_t {
356    unsafe { transmute(u64x1::new(value)) }
357}
358
359/// Duplicate vector element to vector or scalar
360#[inline]
361#[target_feature(enable = "neon")]
362#[cfg_attr(test, assert_instr(nop))]
363#[stable(feature = "neon_intrinsics", since = "1.59.0")]
364pub fn vdup_n_f64(value: f64) -> float64x1_t {
365    float64x1_t::splat(value)
366}
367
368/// Duplicate vector element to vector or scalar
369#[inline]
370#[target_feature(enable = "neon")]
371#[cfg_attr(test, assert_instr(dup))]
372#[stable(feature = "neon_intrinsics", since = "1.59.0")]
373pub fn vdupq_n_p64(value: p64) -> poly64x2_t {
374    unsafe { transmute(u64x2::new(value, value)) }
375}
376
377/// Duplicate vector element to vector or scalar
378#[inline]
379#[target_feature(enable = "neon")]
380#[cfg_attr(test, assert_instr(dup))]
381#[stable(feature = "neon_intrinsics", since = "1.59.0")]
382pub fn vdupq_n_f64(value: f64) -> float64x2_t {
383    float64x2_t::splat(value)
384}
385
386/// Duplicate vector element to vector or scalar
387#[inline]
388#[target_feature(enable = "neon")]
389#[cfg_attr(test, assert_instr(fmov))]
390#[stable(feature = "neon_intrinsics", since = "1.59.0")]
391pub fn vmov_n_p64(value: p64) -> poly64x1_t {
392    vdup_n_p64(value)
393}
394
395/// Duplicate vector element to vector or scalar
396#[inline]
397#[target_feature(enable = "neon")]
398#[cfg_attr(test, assert_instr(nop))]
399#[stable(feature = "neon_intrinsics", since = "1.59.0")]
400pub fn vmov_n_f64(value: f64) -> float64x1_t {
401    vdup_n_f64(value)
402}
403
404/// Duplicate vector element to vector or scalar
405#[inline]
406#[target_feature(enable = "neon")]
407#[cfg_attr(test, assert_instr(dup))]
408#[stable(feature = "neon_intrinsics", since = "1.59.0")]
409pub fn vmovq_n_p64(value: p64) -> poly64x2_t {
410    vdupq_n_p64(value)
411}
412
413/// Duplicate vector element to vector or scalar
414#[inline]
415#[target_feature(enable = "neon")]
416#[cfg_attr(test, assert_instr(dup))]
417#[stable(feature = "neon_intrinsics", since = "1.59.0")]
418pub fn vmovq_n_f64(value: f64) -> float64x2_t {
419    vdupq_n_f64(value)
420}
421
422/// Duplicate vector element to vector or scalar
423#[inline]
424#[target_feature(enable = "neon")]
425#[cfg_attr(test, assert_instr(nop))]
426#[stable(feature = "neon_intrinsics", since = "1.59.0")]
427pub fn vget_high_f64(a: float64x2_t) -> float64x1_t {
428    unsafe { float64x1_t([simd_extract!(a, 1)]) }
429}
430
431/// Duplicate vector element to vector or scalar
432#[inline]
433#[target_feature(enable = "neon")]
434#[cfg_attr(test, assert_instr(ext))]
435#[stable(feature = "neon_intrinsics", since = "1.59.0")]
436pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
437    unsafe { transmute(u64x1::new(simd_extract!(a, 1))) }
438}
439
440/// Duplicate vector element to vector or scalar
441#[inline]
442#[target_feature(enable = "neon")]
443#[cfg_attr(test, assert_instr(nop))]
444#[stable(feature = "neon_intrinsics", since = "1.59.0")]
445pub fn vget_low_f64(a: float64x2_t) -> float64x1_t {
446    unsafe { float64x1_t([simd_extract!(a, 0)]) }
447}
448
449/// Duplicate vector element to vector or scalar
450#[inline]
451#[target_feature(enable = "neon")]
452#[cfg_attr(test, assert_instr(nop))]
453#[stable(feature = "neon_intrinsics", since = "1.59.0")]
454pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
455    unsafe { transmute(u64x1::new(simd_extract!(a, 0))) }
456}
457
458/// Duplicate vector element to vector or scalar
459#[inline]
460#[target_feature(enable = "neon")]
461#[rustc_legacy_const_generics(1)]
462#[stable(feature = "neon_intrinsics", since = "1.59.0")]
463#[cfg_attr(
464    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
465    assert_instr(nop, IMM5 = 0)
466)]
467pub fn vget_lane_f64<const IMM5: i32>(v: float64x1_t) -> f64 {
468    static_assert!(IMM5 == 0);
469    unsafe { simd_extract!(v, IMM5 as u32) }
470}
471
472/// Duplicate vector element to vector or scalar
473#[inline]
474#[target_feature(enable = "neon")]
475#[rustc_legacy_const_generics(1)]
476#[stable(feature = "neon_intrinsics", since = "1.59.0")]
477#[cfg_attr(
478    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
479    assert_instr(nop, IMM5 = 0)
480)]
481pub fn vgetq_lane_f64<const IMM5: i32>(v: float64x2_t) -> f64 {
482    static_assert_uimm_bits!(IMM5, 1);
483    unsafe { simd_extract!(v, IMM5 as u32) }
484}
485
486/// Vector combine
487#[inline]
488#[target_feature(enable = "neon")]
489#[cfg_attr(test, assert_instr(mov))]
490#[stable(feature = "neon_intrinsics", since = "1.59.0")]
491pub fn vcombine_f64(low: float64x1_t, high: float64x1_t) -> float64x2_t {
492    unsafe { simd_shuffle!(low, high, [0, 1]) }
493}
494
495/// Shift left
496#[inline]
497#[target_feature(enable = "neon")]
498#[cfg_attr(test, assert_instr(nop, N = 2))]
499#[rustc_legacy_const_generics(1)]
500#[stable(feature = "neon_intrinsics", since = "1.59.0")]
501pub fn vshld_n_s64<const N: i32>(a: i64) -> i64 {
502    static_assert_uimm_bits!(N, 6);
503    a << N
504}
505
506/// Shift left
507#[inline]
508#[target_feature(enable = "neon")]
509#[cfg_attr(test, assert_instr(nop, N = 2))]
510#[rustc_legacy_const_generics(1)]
511#[stable(feature = "neon_intrinsics", since = "1.59.0")]
512pub fn vshld_n_u64<const N: i32>(a: u64) -> u64 {
513    static_assert_uimm_bits!(N, 6);
514    a << N
515}
516
517/// Signed shift right
518#[inline]
519#[target_feature(enable = "neon")]
520#[cfg_attr(test, assert_instr(nop, N = 2))]
521#[rustc_legacy_const_generics(1)]
522#[stable(feature = "neon_intrinsics", since = "1.59.0")]
523pub fn vshrd_n_s64<const N: i32>(a: i64) -> i64 {
524    static_assert!(N >= 1 && N <= 64);
525    let n: i32 = if N == 64 { 63 } else { N };
526    a >> n
527}
528
529/// Unsigned shift right
530#[inline]
531#[target_feature(enable = "neon")]
532#[cfg_attr(test, assert_instr(nop, N = 2))]
533#[rustc_legacy_const_generics(1)]
534#[stable(feature = "neon_intrinsics", since = "1.59.0")]
535pub fn vshrd_n_u64<const N: i32>(a: u64) -> u64 {
536    static_assert!(N >= 1 && N <= 64);
537    let n: i32 = if N == 64 {
538        return 0;
539    } else {
540        N
541    };
542    a >> n
543}
544
545/// Signed shift right and accumulate
546#[inline]
547#[target_feature(enable = "neon")]
548#[cfg_attr(test, assert_instr(nop, N = 2))]
549#[rustc_legacy_const_generics(2)]
550#[stable(feature = "neon_intrinsics", since = "1.59.0")]
551pub fn vsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
552    static_assert!(N >= 1 && N <= 64);
553    a.wrapping_add(vshrd_n_s64::<N>(b))
554}
555
556/// Unsigned shift right and accumulate
557#[inline]
558#[target_feature(enable = "neon")]
559#[cfg_attr(test, assert_instr(nop, N = 2))]
560#[rustc_legacy_const_generics(2)]
561#[stable(feature = "neon_intrinsics", since = "1.59.0")]
562pub fn vsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
563    static_assert!(N >= 1 && N <= 64);
564    a.wrapping_add(vshrd_n_u64::<N>(b))
565}
566
567#[cfg(test)]
568mod tests {
569    use crate::core_arch::aarch64::test_support::*;
570    use crate::core_arch::arm_shared::test_support::*;
571    use crate::core_arch::{aarch64::neon::*, aarch64::*, simd::*};
572    use stdarch_test::simd_test;
573
574    #[simd_test(enable = "neon")]
575    fn test_vadd_f64() {
576        let a = f64x1::from_array([1.]);
577        let b = f64x1::from_array([8.]);
578        let e = f64x1::from_array([9.]);
579        let r = f64x1::from(vadd_f64(a.into(), b.into()));
580        assert_eq!(r, e);
581    }
582
583    #[simd_test(enable = "neon")]
584    fn test_vaddq_f64() {
585        let a = f64x2::new(1., 2.);
586        let b = f64x2::new(8., 7.);
587        let e = f64x2::new(9., 9.);
588        let r = f64x2::from(vaddq_f64(a.into(), b.into()));
589        assert_eq!(r, e);
590    }
591
592    #[simd_test(enable = "neon")]
593    fn test_vadd_s64() {
594        let a = i64x1::from_array([1]);
595        let b = i64x1::from_array([8]);
596        let e = i64x1::from_array([9]);
597        let r = i64x1::from(vadd_s64(a.into(), b.into()));
598        assert_eq!(r, e);
599    }
600
601    #[simd_test(enable = "neon")]
602    fn test_vadd_u64() {
603        let a = u64x1::from_array([1]);
604        let b = u64x1::from_array([8]);
605        let e = u64x1::from_array([9]);
606        let r = u64x1::from(vadd_u64(a.into(), b.into()));
607        assert_eq!(r, e);
608    }
609
610    #[simd_test(enable = "neon")]
611    fn test_vaddd_s64() {
612        let a = 1_i64;
613        let b = 8_i64;
614        let e = 9_i64;
615        let r: i64 = vaddd_s64(a, b);
616        assert_eq!(r, e);
617    }
618
619    #[simd_test(enable = "neon")]
620    fn test_vaddd_u64() {
621        let a = 1_u64;
622        let b = 8_u64;
623        let e = 9_u64;
624        let r: u64 = vaddd_u64(a, b);
625        assert_eq!(r, e);
626    }
627
628    #[simd_test(enable = "neon")]
629    fn test_vext_p64() {
630        let a = u64x1::new(0);
631        let b = u64x1::new(1);
632        let e = u64x1::new(0);
633        let r = u64x1::from(vext_p64::<0>(a.into(), b.into()));
634        assert_eq!(r, e);
635    }
636
637    #[simd_test(enable = "neon")]
638    fn test_vext_f64() {
639        let a = f64x1::new(0.);
640        let b = f64x1::new(1.);
641        let e = f64x1::new(0.);
642        let r = f64x1::from(vext_f64::<0>(a.into(), b.into()));
643        assert_eq!(r, e);
644    }
645
646    #[simd_test(enable = "neon")]
647    fn test_vshld_n_s64() {
648        let a: i64 = 1;
649        let e: i64 = 4;
650        let r: i64 = vshld_n_s64::<2>(a);
651        assert_eq!(r, e);
652    }
653
654    #[simd_test(enable = "neon")]
655    fn test_vshld_n_u64() {
656        let a: u64 = 1;
657        let e: u64 = 4;
658        let r: u64 = vshld_n_u64::<2>(a);
659        assert_eq!(r, e);
660    }
661
662    #[simd_test(enable = "neon")]
663    fn test_vshrd_n_s64() {
664        let a: i64 = 4;
665        let e: i64 = 1;
666        let r: i64 = vshrd_n_s64::<2>(a);
667        assert_eq!(r, e);
668    }
669
670    #[simd_test(enable = "neon")]
671    fn test_vshrd_n_u64() {
672        let a: u64 = 4;
673        let e: u64 = 1;
674        let r: u64 = vshrd_n_u64::<2>(a);
675        assert_eq!(r, e);
676    }
677
678    #[simd_test(enable = "neon")]
679    fn test_vsrad_n_s64() {
680        let a: i64 = 1;
681        let b: i64 = 4;
682        let e: i64 = 2;
683        let r: i64 = vsrad_n_s64::<2>(a, b);
684        assert_eq!(r, e);
685    }
686
687    #[simd_test(enable = "neon")]
688    fn test_vsrad_n_u64() {
689        let a: u64 = 1;
690        let b: u64 = 4;
691        let e: u64 = 2;
692        let r: u64 = vsrad_n_u64::<2>(a, b);
693        assert_eq!(r, e);
694    }
695
696    #[simd_test(enable = "neon")]
697    fn test_vdup_n_f64() {
698        let a: f64 = 3.3;
699        let e = f64x1::new(3.3);
700        let r = f64x1::from(vdup_n_f64(a));
701        assert_eq!(r, e);
702    }
703
704    #[simd_test(enable = "neon")]
705    fn test_vdup_n_p64() {
706        let a: u64 = 3;
707        let e = u64x1::new(3);
708        let r = u64x1::from(vdup_n_p64(a));
709        assert_eq!(r, e);
710    }
711
712    #[simd_test(enable = "neon")]
713    fn test_vdupq_n_f64() {
714        let a: f64 = 3.3;
715        let e = f64x2::new(3.3, 3.3);
716        let r = f64x2::from(vdupq_n_f64(a));
717        assert_eq!(r, e);
718    }
719
720    #[simd_test(enable = "neon")]
721    fn test_vdupq_n_p64() {
722        let a: u64 = 3;
723        let e = u64x2::new(3, 3);
724        let r = u64x2::from(vdupq_n_p64(a));
725        assert_eq!(r, e);
726    }
727
728    #[simd_test(enable = "neon")]
729    fn test_vmov_n_p64() {
730        let a: u64 = 3;
731        let e = u64x1::new(3);
732        let r = u64x1::from(vmov_n_p64(a));
733        assert_eq!(r, e);
734    }
735
736    #[simd_test(enable = "neon")]
737    fn test_vmov_n_f64() {
738        let a: f64 = 3.3;
739        let e = f64x1::new(3.3);
740        let r = f64x1::from(vmov_n_f64(a));
741        assert_eq!(r, e);
742    }
743
744    #[simd_test(enable = "neon")]
745    fn test_vmovq_n_p64() {
746        let a: u64 = 3;
747        let e = u64x2::new(3, 3);
748        let r = u64x2::from(vmovq_n_p64(a));
749        assert_eq!(r, e);
750    }
751
752    #[simd_test(enable = "neon")]
753    fn test_vmovq_n_f64() {
754        let a: f64 = 3.3;
755        let e = f64x2::new(3.3, 3.3);
756        let r = f64x2::from(vmovq_n_f64(a));
757        assert_eq!(r, e);
758    }
759
760    #[simd_test(enable = "neon")]
761    fn test_vget_high_f64() {
762        let a = f64x2::new(1.0, 2.0);
763        let e = f64x1::new(2.0);
764        let r = f64x1::from(vget_high_f64(a.into()));
765        assert_eq!(r, e);
766    }
767
768    #[simd_test(enable = "neon")]
769    fn test_vget_high_p64() {
770        let a = u64x2::new(1, 2);
771        let e = u64x1::new(2);
772        let r = u64x1::from(vget_high_p64(a.into()));
773        assert_eq!(r, e);
774    }
775
776    #[simd_test(enable = "neon")]
777    fn test_vget_low_f64() {
778        let a = f64x2::new(1.0, 2.0);
779        let e = f64x1::new(1.0);
780        let r = f64x1::from(vget_low_f64(a.into()));
781        assert_eq!(r, e);
782    }
783
784    #[simd_test(enable = "neon")]
785    fn test_vget_low_p64() {
786        let a = u64x2::new(1, 2);
787        let e = u64x1::new(1);
788        let r = u64x1::from(vget_low_p64(a.into()));
789        assert_eq!(r, e);
790    }
791
792    #[simd_test(enable = "neon")]
793    fn test_vget_lane_f64() {
794        let v = f64x1::new(1.0);
795        let r = vget_lane_f64::<0>(v.into());
796        assert_eq!(r, 1.0);
797    }
798
799    #[simd_test(enable = "neon")]
800    fn test_vgetq_lane_f64() {
801        let v = f64x2::new(0.0, 1.0);
802        let r = vgetq_lane_f64::<1>(v.into());
803        assert_eq!(r, 1.0);
804        let r = vgetq_lane_f64::<0>(v.into());
805        assert_eq!(r, 0.0);
806    }
807
808    #[simd_test(enable = "neon")]
809    fn test_vcopy_lane_s64() {
810        let a = i64x1::new(1);
811        let b = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
812        let e = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
813        let r = i64x1::from(vcopy_lane_s64::<0, 0>(a.into(), b.into()));
814        assert_eq!(r, e);
815    }
816
817    #[simd_test(enable = "neon")]
818    fn test_vcopy_lane_u64() {
819        let a = u64x1::new(1);
820        let b = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
821        let e = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
822        let r = u64x1::from(vcopy_lane_u64::<0, 0>(a.into(), b.into()));
823        assert_eq!(r, e);
824    }
825
826    #[simd_test(enable = "neon")]
827    fn test_vcopy_lane_p64() {
828        let a = u64x1::new(1);
829        let b = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
830        let e = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
831        let r = u64x1::from(vcopy_lane_p64::<0, 0>(a.into(), b.into()));
832        assert_eq!(r, e);
833    }
834
835    #[simd_test(enable = "neon")]
836    fn test_vcopy_lane_f64() {
837        let a = f64x1::from_array([1.]);
838        let b = f64x1::from_array([0.]);
839        let e = f64x1::from_array([0.]);
840        let r = f64x1::from(vcopy_lane_f64::<0, 0>(a.into(), b.into()));
841        assert_eq!(r, e);
842    }
843
844    #[simd_test(enable = "neon")]
845    fn test_vcopy_laneq_s64() {
846        let a = i64x1::new(1);
847        let b = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
848        let e = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
849        let r = i64x1::from(vcopy_laneq_s64::<0, 1>(a.into(), b.into()));
850        assert_eq!(r, e);
851    }
852
853    #[simd_test(enable = "neon")]
854    fn test_vcopy_laneq_u64() {
855        let a = u64x1::new(1);
856        let b = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
857        let e = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
858        let r = u64x1::from(vcopy_laneq_u64::<0, 1>(a.into(), b.into()));
859        assert_eq!(r, e);
860    }
861
862    #[simd_test(enable = "neon")]
863    fn test_vcopy_laneq_p64() {
864        let a = u64x1::new(1);
865        let b = u64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
866        let e = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
867        let r = u64x1::from(vcopy_laneq_p64::<0, 1>(a.into(), b.into()));
868        assert_eq!(r, e);
869    }
870
871    #[simd_test(enable = "neon")]
872    fn test_vcopy_laneq_f64() {
873        let a = f64x1::from_array([1.]);
874        let b = f64x2::from_array([0., 0.5]);
875        let e = f64x1::from_array([0.5]);
876        let r = f64x1::from(vcopy_laneq_f64::<0, 1>(a.into(), b.into()));
877        assert_eq!(r, e);
878    }
879
880    #[simd_test(enable = "neon")]
881    fn test_vbsl_f64() {
882        let a = u64x1::new(0x8000000000000000);
883        let b = f64x1::new(-1.23f64);
884        let c = f64x1::new(2.34f64);
885        let e = f64x1::new(-2.34f64);
886        let r = f64x1::from(vbsl_f64(a.into(), b.into(), c.into()));
887        assert_eq!(r, e);
888    }
889
890    #[simd_test(enable = "neon")]
891    fn test_vbsl_p64() {
892        let a = u64x1::new(1);
893        let b = u64x1::new(u64::MAX);
894        let c = u64x1::new(u64::MIN);
895        let e = u64x1::new(1);
896        let r = u64x1::from(vbsl_p64(a.into(), b.into(), c.into()));
897        assert_eq!(r, e);
898    }
899
900    #[simd_test(enable = "neon")]
901    fn test_vbslq_f64() {
902        let a = u64x2::new(1, 0x8000000000000000);
903        let b = f64x2::new(f64::MAX, -1.23f64);
904        let c = f64x2::new(f64::MIN, 2.34f64);
905        let e = f64x2::new(f64::MIN, -2.34f64);
906        let r = f64x2::from(vbslq_f64(a.into(), b.into(), c.into()));
907        assert_eq!(r, e);
908    }
909
910    #[simd_test(enable = "neon")]
911    fn test_vbslq_p64() {
912        let a = u64x2::new(u64::MAX, 1);
913        let b = u64x2::new(u64::MAX, u64::MAX);
914        let c = u64x2::new(u64::MIN, u64::MIN);
915        let e = u64x2::new(u64::MAX, 1);
916        let r = u64x2::from(vbslq_p64(a.into(), b.into(), c.into()));
917        assert_eq!(r, e);
918    }
919
920    #[simd_test(enable = "neon")]
921    fn test_vld1_f64() {
922        let a: [f64; 2] = [0., 1.];
923        let e = f64x1::new(1.);
924        let r = unsafe { f64x1::from(vld1_f64(a[1..].as_ptr())) };
925        assert_eq!(r, e)
926    }
927
928    #[simd_test(enable = "neon")]
929    fn test_vld1q_f64() {
930        let a: [f64; 3] = [0., 1., 2.];
931        let e = f64x2::new(1., 2.);
932        let r = unsafe { f64x2::from(vld1q_f64(a[1..].as_ptr())) };
933        assert_eq!(r, e)
934    }
935
936    #[simd_test(enable = "neon")]
937    fn test_vld1_dup_f64() {
938        let a: [f64; 2] = [1., 42.];
939        let e = f64x1::new(42.);
940        let r = unsafe { f64x1::from(vld1_dup_f64(a[1..].as_ptr())) };
941        assert_eq!(r, e)
942    }
943
944    #[simd_test(enable = "neon")]
945    fn test_vld1q_dup_f64() {
946        let elem: f64 = 42.;
947        let e = f64x2::new(42., 42.);
948        let r = unsafe { f64x2::from(vld1q_dup_f64(&elem)) };
949        assert_eq!(r, e)
950    }
951
952    #[simd_test(enable = "neon")]
953    fn test_vld1_lane_f64() {
954        let a = f64x1::new(0.);
955        let elem: f64 = 42.;
956        let e = f64x1::new(42.);
957        let r = unsafe { f64x1::from(vld1_lane_f64::<0>(&elem, a.into())) };
958        assert_eq!(r, e)
959    }
960
961    #[simd_test(enable = "neon")]
962    fn test_vld1q_lane_f64() {
963        let a = f64x2::new(0., 1.);
964        let elem: f64 = 42.;
965        let e = f64x2::new(0., 42.);
966        let r = unsafe { f64x2::from(vld1q_lane_f64::<1>(&elem, a.into())) };
967        assert_eq!(r, e)
968    }
969
970    #[simd_test(enable = "neon")]
971    fn test_vst1_f64() {
972        let mut vals = [0_f64; 2];
973        let a = f64x1::new(1.);
974
975        unsafe {
976            vst1_f64(vals[1..].as_mut_ptr(), a.into());
977        }
978
979        assert_eq!(vals[0], 0.);
980        assert_eq!(vals[1], 1.);
981    }
982
983    #[simd_test(enable = "neon")]
984    fn test_vst1q_f64() {
985        let mut vals = [0_f64; 3];
986        let a = f64x2::new(1., 2.);
987
988        unsafe {
989            vst1q_f64(vals[1..].as_mut_ptr(), a.into());
990        }
991
992        assert_eq!(vals[0], 0.);
993        assert_eq!(vals[1], 1.);
994        assert_eq!(vals[2], 2.);
995    }
996
997    macro_rules! wide_store_load_roundtrip {
998        ($elem_ty:ty, $len:expr, $vec_ty:ty, $store:expr, $load:expr) => {
999            let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty);
1000            let a: $vec_ty = transmute(vals);
1001            let mut tmp = [0 as $elem_ty; $len];
1002            $store(tmp.as_mut_ptr().cast(), a);
1003            let r: $vec_ty = $load(tmp.as_ptr().cast());
1004            let out: [$elem_ty; $len] = transmute(r);
1005            assert_eq!(out, vals);
1006        };
1007    }
1008
1009    macro_rules! wide_store_load_roundtrip_fp16 {
1010        ($( $name:ident $args:tt);* $(;)?) => {
1011            $(
1012                #[simd_test(enable = "neon,fp16")]
1013                #[cfg(not(target_arch = "arm64ec"))]
1014                unsafe fn $name() {
1015                    wide_store_load_roundtrip! $args;
1016                }
1017            )*
1018        };
1019    }
1020
1021    wide_store_load_roundtrip_fp16! {
1022        test_vld1_f16_x2(f16, 8, float16x4x2_t, vst1_f16_x2, vld1_f16_x2);
1023        test_vld1_f16_x3(f16, 12, float16x4x3_t, vst1_f16_x3, vld1_f16_x3);
1024        test_vld1_f16_x4(f16, 16, float16x4x4_t, vst1_f16_x4, vld1_f16_x4);
1025
1026        test_vld1q_f16_x2(f16, 16, float16x8x2_t, vst1q_f16_x2, vld1q_f16_x2);
1027        test_vld1q_f16_x3(f16, 24, float16x8x3_t, vst1q_f16_x3, vld1q_f16_x3);
1028        test_vld1q_f16_x4(f16, 32, float16x8x4_t, vst1q_f16_x4, vld1q_f16_x4);
1029    }
1030
1031    macro_rules! wide_store_load_roundtrip_aes {
1032        ($( $name:ident $args:tt);* $(;)?) => {
1033            $(
1034                #[simd_test(enable = "neon,aes")]
1035                unsafe fn $name() {
1036                    wide_store_load_roundtrip! $args;
1037                }
1038            )*
1039        };
1040    }
1041
1042    wide_store_load_roundtrip_aes! {
1043        test_vld1_p64_x2(p64, 2, poly64x1x2_t, vst1_p64_x2, vld1_p64_x2);
1044        test_vld1_p64_x3(p64, 3, poly64x1x3_t, vst1_p64_x3, vld1_p64_x3);
1045        test_vld1_p64_x4(p64, 4, poly64x1x4_t, vst1_p64_x4, vld1_p64_x4);
1046
1047        test_vld1q_p64_x2(p64, 4, poly64x2x2_t, vst1q_p64_x2, vld1q_p64_x2);
1048        test_vld1q_p64_x3(p64, 6, poly64x2x3_t, vst1q_p64_x3, vld1q_p64_x3);
1049        test_vld1q_p64_x4(p64, 8, poly64x2x4_t, vst1q_p64_x4, vld1q_p64_x4);
1050    }
1051
1052    macro_rules! wide_store_load_roundtrip_neon {
1053        ($( $name:ident $args:tt);* $(;)?) => {
1054            $(
1055                #[simd_test(enable = "neon")]
1056                unsafe fn $name() {
1057                    wide_store_load_roundtrip! $args;
1058                }
1059            )*
1060        };
1061    }
1062
1063    wide_store_load_roundtrip_neon! {
1064        test_vld1_f32_x2(f32, 4, float32x2x2_t, vst1_f32_x2, vld1_f32_x2);
1065        test_vld1_f32_x3(f32, 6, float32x2x3_t, vst1_f32_x3, vld1_f32_x3);
1066        test_vld1_f32_x4(f32, 8, float32x2x4_t, vst1_f32_x4, vld1_f32_x4);
1067
1068        test_vld1q_f32_x2(f32, 8, float32x4x2_t, vst1q_f32_x2, vld1q_f32_x2);
1069        test_vld1q_f32_x3(f32, 12, float32x4x3_t, vst1q_f32_x3, vld1q_f32_x3);
1070        test_vld1q_f32_x4(f32, 16, float32x4x4_t, vst1q_f32_x4, vld1q_f32_x4);
1071
1072        test_vld1_s8_x2(i8, 16, int8x8x2_t, vst1_s8_x2, vld1_s8_x2);
1073        test_vld1_s8_x3(i8, 24, int8x8x3_t, vst1_s8_x3, vld1_s8_x3);
1074        test_vld1_s8_x4(i8, 32, int8x8x4_t, vst1_s8_x4, vld1_s8_x4);
1075
1076        test_vld1q_s8_x2(i8, 32, int8x16x2_t, vst1q_s8_x2, vld1q_s8_x2);
1077        test_vld1q_s8_x3(i8, 48, int8x16x3_t, vst1q_s8_x3, vld1q_s8_x3);
1078        test_vld1q_s8_x4(i8, 64, int8x16x4_t, vst1q_s8_x4, vld1q_s8_x4);
1079
1080        test_vld1_s16_x2(i16, 8, int16x4x2_t, vst1_s16_x2, vld1_s16_x2);
1081        test_vld1_s16_x3(i16, 12, int16x4x3_t, vst1_s16_x3, vld1_s16_x3);
1082        test_vld1_s16_x4(i16, 16, int16x4x4_t, vst1_s16_x4, vld1_s16_x4);
1083
1084        test_vld1q_s16_x2(i16, 16, int16x8x2_t, vst1q_s16_x2, vld1q_s16_x2);
1085        test_vld1q_s16_x3(i16, 24, int16x8x3_t, vst1q_s16_x3, vld1q_s16_x3);
1086        test_vld1q_s16_x4(i16, 32, int16x8x4_t, vst1q_s16_x4, vld1q_s16_x4);
1087
1088        test_vld1_s32_x2(i32, 4, int32x2x2_t, vst1_s32_x2, vld1_s32_x2);
1089        test_vld1_s32_x3(i32, 6, int32x2x3_t, vst1_s32_x3, vld1_s32_x3);
1090        test_vld1_s32_x4(i32, 8, int32x2x4_t, vst1_s32_x4, vld1_s32_x4);
1091
1092        test_vld1q_s32_x2(i32, 8, int32x4x2_t, vst1q_s32_x2, vld1q_s32_x2);
1093        test_vld1q_s32_x3(i32, 12, int32x4x3_t, vst1q_s32_x3, vld1q_s32_x3);
1094        test_vld1q_s32_x4(i32, 16, int32x4x4_t, vst1q_s32_x4, vld1q_s32_x4);
1095
1096        test_vld1_s64_x2(i64, 2, int64x1x2_t, vst1_s64_x2, vld1_s64_x2);
1097        test_vld1_s64_x3(i64, 3, int64x1x3_t, vst1_s64_x3, vld1_s64_x3);
1098        test_vld1_s64_x4(i64, 4, int64x1x4_t, vst1_s64_x4, vld1_s64_x4);
1099
1100        test_vld1q_s64_x2(i64, 4, int64x2x2_t, vst1q_s64_x2, vld1q_s64_x2);
1101        test_vld1q_s64_x3(i64, 6, int64x2x3_t, vst1q_s64_x3, vld1q_s64_x3);
1102        test_vld1q_s64_x4(i64, 8, int64x2x4_t, vst1q_s64_x4, vld1q_s64_x4);
1103
1104        test_vld1_u8_x2(u8, 16, uint8x8x2_t, vst1_u8_x2, vld1_u8_x2);
1105        test_vld1_u8_x3(u8, 24, uint8x8x3_t, vst1_u8_x3, vld1_u8_x3);
1106        test_vld1_u8_x4(u8, 32, uint8x8x4_t, vst1_u8_x4, vld1_u8_x4);
1107
1108        test_vld1q_u8_x2(u8, 32, uint8x16x2_t, vst1q_u8_x2, vld1q_u8_x2);
1109        test_vld1q_u8_x3(u8, 48, uint8x16x3_t, vst1q_u8_x3, vld1q_u8_x3);
1110        test_vld1q_u8_x4(u8, 64, uint8x16x4_t, vst1q_u8_x4, vld1q_u8_x4);
1111
1112        test_vld1_u16_x2(u16, 8, uint16x4x2_t, vst1_u16_x2, vld1_u16_x2);
1113        test_vld1_u16_x3(u16, 12, uint16x4x3_t, vst1_u16_x3, vld1_u16_x3);
1114        test_vld1_u16_x4(u16, 16, uint16x4x4_t, vst1_u16_x4, vld1_u16_x4);
1115
1116        test_vld1q_u16_x2(u16, 16, uint16x8x2_t, vst1q_u16_x2, vld1q_u16_x2);
1117        test_vld1q_u16_x3(u16, 24, uint16x8x3_t, vst1q_u16_x3, vld1q_u16_x3);
1118        test_vld1q_u16_x4(u16, 32, uint16x8x4_t, vst1q_u16_x4, vld1q_u16_x4);
1119
1120        test_vld1_u32_x2(u32, 4, uint32x2x2_t, vst1_u32_x2, vld1_u32_x2);
1121        test_vld1_u32_x3(u32, 6, uint32x2x3_t, vst1_u32_x3, vld1_u32_x3);
1122        test_vld1_u32_x4(u32, 8, uint32x2x4_t, vst1_u32_x4, vld1_u32_x4);
1123
1124        test_vld1q_u32_x2(u32, 8, uint32x4x2_t, vst1q_u32_x2, vld1q_u32_x2);
1125        test_vld1q_u32_x3(u32, 12, uint32x4x3_t, vst1q_u32_x3, vld1q_u32_x3);
1126        test_vld1q_u32_x4(u32, 16, uint32x4x4_t, vst1q_u32_x4, vld1q_u32_x4);
1127
1128        test_vld1_u64_x2(u64, 2, uint64x1x2_t, vst1_u64_x2, vld1_u64_x2);
1129        test_vld1_u64_x3(u64, 3, uint64x1x3_t, vst1_u64_x3, vld1_u64_x3);
1130        test_vld1_u64_x4(u64, 4, uint64x1x4_t, vst1_u64_x4, vld1_u64_x4);
1131
1132        test_vld1q_u64_x2(u64, 4, uint64x2x2_t, vst1q_u64_x2, vld1q_u64_x2);
1133        test_vld1q_u64_x3(u64, 6, uint64x2x3_t, vst1q_u64_x3, vld1q_u64_x3);
1134        test_vld1q_u64_x4(u64, 8, uint64x2x4_t, vst1q_u64_x4, vld1q_u64_x4);
1135
1136        test_vld1_p8_x2(p8, 16, poly8x8x2_t, vst1_p8_x2, vld1_p8_x2);
1137        test_vld1_p8_x3(p8, 24, poly8x8x3_t, vst1_p8_x3, vld1_p8_x3);
1138        test_vld1_p8_x4(p8, 32, poly8x8x4_t, vst1_p8_x4, vld1_p8_x4);
1139
1140        test_vld1q_p8_x2(p8, 32, poly8x16x2_t, vst1q_p8_x2, vld1q_p8_x2);
1141        test_vld1q_p8_x3(p8, 48, poly8x16x3_t, vst1q_p8_x3, vld1q_p8_x3);
1142        test_vld1q_p8_x4(p8, 64, poly8x16x4_t, vst1q_p8_x4, vld1q_p8_x4);
1143
1144        test_vld1_p16_x2(p16, 8, poly16x4x2_t, vst1_p16_x2, vld1_p16_x2);
1145        test_vld1_p16_x3(p16, 12, poly16x4x3_t, vst1_p16_x3, vld1_p16_x3);
1146        test_vld1_p16_x4(p16, 16, poly16x4x4_t, vst1_p16_x4, vld1_p16_x4);
1147
1148        test_vld1q_p16_x2(p16, 16, poly16x8x2_t, vst1q_p16_x2, vld1q_p16_x2);
1149        test_vld1q_p16_x3(p16, 24, poly16x8x3_t, vst1q_p16_x3, vld1q_p16_x3);
1150        test_vld1q_p16_x4(p16, 32, poly16x8x4_t, vst1q_p16_x4, vld1q_p16_x4);
1151    }
1152}
1153
1154#[cfg(test)]
1155#[path = "../../arm_shared/neon/table_lookup_tests.rs"]
1156mod table_lookup_tests;
1157
1158#[cfg(test)]
1159#[path = "../../arm_shared/neon/shift_and_insert_tests.rs"]
1160mod shift_and_insert_tests;
1161
1162#[cfg(test)]
1163#[path = "../../arm_shared/neon/load_tests.rs"]
1164mod load_tests;
1165
1166#[cfg(test)]
1167#[path = "../../arm_shared/neon/store_tests.rs"]
1168mod store_tests;