Skip to main content

core/stdarch/crates/core_arch/src/aarch64/neon/
mod.rs

1//! ARMv8 ASIMD intrinsics
2
3#![allow(non_camel_case_types)]
4
5#[rustfmt::skip]
6mod generated;
7#[rustfmt::skip]
8#[stable(feature = "neon_intrinsics", since = "1.59.0")]
9pub use self::generated::*;
10
11// FIXME: replace neon with asimd
12
13use crate::{
14    core_arch::{arm_shared::*, simd::*},
15    hint::unreachable_unchecked,
16    intrinsics::{simd::*, *},
17    mem::transmute,
18};
19#[cfg(test)]
20use stdarch_test::assert_instr;
21
22types! {
23    #![stable(feature = "neon_intrinsics", since = "1.59.0")]
24
25    /// ARM-specific 64-bit wide vector of one packed `f64`.
26    pub struct float64x1_t(1 x f64); // FIXME: check this!
27    /// ARM-specific 128-bit wide vector of two packed `f64`.
28    pub struct float64x2_t(2 x f64);
29}
30
31/// ARM-specific type containing two `float64x1_t` vectors.
32#[repr(C)]
33#[derive(Copy, Clone, Debug)]
34#[stable(feature = "neon_intrinsics", since = "1.59.0")]
35pub struct float64x1x2_t(pub float64x1_t, pub float64x1_t);
36/// ARM-specific type containing three `float64x1_t` vectors.
37#[repr(C)]
38#[derive(Copy, Clone, Debug)]
39#[stable(feature = "neon_intrinsics", since = "1.59.0")]
40pub struct float64x1x3_t(pub float64x1_t, pub float64x1_t, pub float64x1_t);
41/// ARM-specific type containing four `float64x1_t` vectors.
42#[repr(C)]
43#[derive(Copy, Clone, Debug)]
44#[stable(feature = "neon_intrinsics", since = "1.59.0")]
45pub struct float64x1x4_t(
46    pub float64x1_t,
47    pub float64x1_t,
48    pub float64x1_t,
49    pub float64x1_t,
50);
51
52/// ARM-specific type containing two `float64x2_t` vectors.
53#[repr(C)]
54#[derive(Copy, Clone, Debug)]
55#[stable(feature = "neon_intrinsics", since = "1.59.0")]
56pub struct float64x2x2_t(pub float64x2_t, pub float64x2_t);
57/// ARM-specific type containing three `float64x2_t` vectors.
58#[repr(C)]
59#[derive(Copy, Clone, Debug)]
60#[stable(feature = "neon_intrinsics", since = "1.59.0")]
61pub struct float64x2x3_t(pub float64x2_t, pub float64x2_t, pub float64x2_t);
62/// ARM-specific type containing four `float64x2_t` vectors.
63#[repr(C)]
64#[derive(Copy, Clone, Debug)]
65#[stable(feature = "neon_intrinsics", since = "1.59.0")]
66pub struct float64x2x4_t(
67    pub float64x2_t,
68    pub float64x2_t,
69    pub float64x2_t,
70    pub float64x2_t,
71);
72
73/// Helper for the 'shift right and insert' functions.
74macro_rules! shift_right_and_insert {
75    ($ty:ty, $width:literal, $N:expr, $a:expr, $b:expr) => {{
76        type V = Simd<$ty, $width>;
77
78        if $N as u32 == <$ty>::BITS {
79            $a
80        } else {
81            let a: V = transmute($a);
82            let b: V = transmute($b);
83
84            let mask = <$ty>::MAX >> $N;
85            let kept: V = simd_and(a, V::splat(!mask));
86
87            let shift_counts = V::splat($N as $ty);
88            let shifted = simd_shr(b, shift_counts);
89
90            transmute(simd_or(kept, shifted))
91        }
92    }};
93}
94
95pub(crate) use shift_right_and_insert;
96
97/// Duplicate vector element to vector or scalar
98#[inline]
99#[target_feature(enable = "neon")]
100#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
101#[rustc_legacy_const_generics(1, 3)]
102#[stable(feature = "neon_intrinsics", since = "1.59.0")]
103pub fn vcopy_lane_s64<const N1: i32, const N2: i32>(_a: int64x1_t, b: int64x1_t) -> int64x1_t {
104    static_assert!(N1 == 0);
105    static_assert!(N2 == 0);
106    b
107}
108
109/// Duplicate vector element to vector or scalar
110#[inline]
111#[target_feature(enable = "neon")]
112#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
113#[rustc_legacy_const_generics(1, 3)]
114#[stable(feature = "neon_intrinsics", since = "1.59.0")]
115pub fn vcopy_lane_u64<const N1: i32, const N2: i32>(_a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
116    static_assert!(N1 == 0);
117    static_assert!(N2 == 0);
118    b
119}
120
121/// Duplicate vector element to vector or scalar
122#[inline]
123#[target_feature(enable = "neon")]
124#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
125#[rustc_legacy_const_generics(1, 3)]
126#[stable(feature = "neon_intrinsics", since = "1.59.0")]
127pub fn vcopy_lane_p64<const N1: i32, const N2: i32>(_a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
128    static_assert!(N1 == 0);
129    static_assert!(N2 == 0);
130    b
131}
132
133/// Duplicate vector element to vector or scalar
134#[inline]
135#[target_feature(enable = "neon")]
136#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
137#[rustc_legacy_const_generics(1, 3)]
138#[stable(feature = "neon_intrinsics", since = "1.59.0")]
139pub fn vcopy_lane_f64<const N1: i32, const N2: i32>(
140    _a: float64x1_t,
141    b: float64x1_t,
142) -> float64x1_t {
143    static_assert!(N1 == 0);
144    static_assert!(N2 == 0);
145    b
146}
147
148/// Duplicate vector element to vector or scalar
149#[inline]
150#[target_feature(enable = "neon")]
151#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
152#[rustc_legacy_const_generics(1, 3)]
153#[stable(feature = "neon_intrinsics", since = "1.59.0")]
154pub fn vcopy_laneq_s64<const LANE1: i32, const LANE2: i32>(
155    _a: int64x1_t,
156    b: int64x2_t,
157) -> int64x1_t {
158    static_assert!(LANE1 == 0);
159    static_assert_uimm_bits!(LANE2, 1);
160    unsafe { transmute::<i64, _>(simd_extract!(b, LANE2 as u32)) }
161}
162
163/// Duplicate vector element to vector or scalar
164#[inline]
165#[target_feature(enable = "neon")]
166#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
167#[rustc_legacy_const_generics(1, 3)]
168#[stable(feature = "neon_intrinsics", since = "1.59.0")]
169pub fn vcopy_laneq_u64<const LANE1: i32, const LANE2: i32>(
170    _a: uint64x1_t,
171    b: uint64x2_t,
172) -> uint64x1_t {
173    static_assert!(LANE1 == 0);
174    static_assert_uimm_bits!(LANE2, 1);
175    unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
176}
177
178/// Duplicate vector element to vector or scalar
179#[inline]
180#[target_feature(enable = "neon")]
181#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
182#[rustc_legacy_const_generics(1, 3)]
183#[stable(feature = "neon_intrinsics", since = "1.59.0")]
184pub fn vcopy_laneq_p64<const LANE1: i32, const LANE2: i32>(
185    _a: poly64x1_t,
186    b: poly64x2_t,
187) -> poly64x1_t {
188    static_assert!(LANE1 == 0);
189    static_assert_uimm_bits!(LANE2, 1);
190    unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
191}
192
193/// Duplicate vector element to vector or scalar
194#[inline]
195#[target_feature(enable = "neon")]
196#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
197#[rustc_legacy_const_generics(1, 3)]
198#[stable(feature = "neon_intrinsics", since = "1.59.0")]
199pub fn vcopy_laneq_f64<const LANE1: i32, const LANE2: i32>(
200    _a: float64x1_t,
201    b: float64x2_t,
202) -> float64x1_t {
203    static_assert!(LANE1 == 0);
204    static_assert_uimm_bits!(LANE2, 1);
205    unsafe { transmute::<f64, _>(simd_extract!(b, LANE2 as u32)) }
206}
207
208/// Load multiple single-element structures to one, two, three, or four registers
209#[inline]
210#[target_feature(enable = "neon")]
211#[cfg_attr(test, assert_instr(ldr))]
212#[stable(feature = "neon_intrinsics", since = "1.59.0")]
213pub unsafe fn vld1_dup_f64(ptr: *const f64) -> float64x1_t {
214    vld1_f64(ptr)
215}
216
217/// Load multiple single-element structures to one, two, three, or four registers
218#[inline]
219#[target_feature(enable = "neon")]
220#[cfg_attr(test, assert_instr(ld1r))]
221#[stable(feature = "neon_intrinsics", since = "1.59.0")]
222pub unsafe fn vld1q_dup_f64(ptr: *const f64) -> float64x2_t {
223    let x = vld1q_lane_f64::<0>(ptr, transmute(f64x2::splat(0.)));
224    simd_shuffle!(x, x, [0, 0])
225}
226
227/// Load one single-element structure to one lane of one register.
228#[inline]
229#[target_feature(enable = "neon")]
230#[rustc_legacy_const_generics(2)]
231#[cfg_attr(test, assert_instr(ldr, LANE = 0))]
232#[stable(feature = "neon_intrinsics", since = "1.59.0")]
233pub unsafe fn vld1_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x1_t) -> float64x1_t {
234    static_assert!(LANE == 0);
235    simd_insert!(src, LANE as u32, *ptr)
236}
237
238/// Load one single-element structure to one lane of one register.
239#[inline]
240#[target_feature(enable = "neon")]
241#[rustc_legacy_const_generics(2)]
242#[cfg_attr(test, assert_instr(ld1, LANE = 1))]
243#[stable(feature = "neon_intrinsics", since = "1.59.0")]
244pub unsafe fn vld1q_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x2_t) -> float64x2_t {
245    static_assert_uimm_bits!(LANE, 1);
246    simd_insert!(src, LANE as u32, *ptr)
247}
248
249/// Bitwise Select instructions. This instruction sets each bit in the destination SIMD&FP register
250/// to the corresponding bit from the first source SIMD&FP register when the original
251/// destination bit was 1, otherwise from the second source SIMD&FP register.
252#[inline]
253#[target_feature(enable = "neon")]
254#[cfg_attr(test, assert_instr(bsl))]
255#[stable(feature = "neon_intrinsics", since = "1.59.0")]
256pub fn vbsl_f64(a: uint64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
257    let not = int64x1_t::splat(-1);
258    unsafe {
259        transmute(simd_or(
260            simd_and(a, transmute(b)),
261            simd_and(simd_xor(a, transmute(not)), transmute(c)),
262        ))
263    }
264}
265/// Bitwise Select.
266#[inline]
267#[target_feature(enable = "neon")]
268#[cfg_attr(test, assert_instr(bsl))]
269#[stable(feature = "neon_intrinsics", since = "1.59.0")]
270pub fn vbsl_p64(a: poly64x1_t, b: poly64x1_t, c: poly64x1_t) -> poly64x1_t {
271    let not = int64x1_t::splat(-1);
272    unsafe { simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c)) }
273}
274/// Bitwise Select. (128-bit)
275#[inline]
276#[target_feature(enable = "neon")]
277#[cfg_attr(test, assert_instr(bsl))]
278#[stable(feature = "neon_intrinsics", since = "1.59.0")]
279pub fn vbslq_f64(a: uint64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
280    let not = int64x2_t::splat(-1);
281    unsafe {
282        transmute(simd_or(
283            simd_and(a, transmute(b)),
284            simd_and(simd_xor(a, transmute(not)), transmute(c)),
285        ))
286    }
287}
288/// Bitwise Select. (128-bit)
289#[inline]
290#[target_feature(enable = "neon")]
291#[cfg_attr(test, assert_instr(bsl))]
292#[stable(feature = "neon_intrinsics", since = "1.59.0")]
293pub fn vbslq_p64(a: poly64x2_t, b: poly64x2_t, c: poly64x2_t) -> poly64x2_t {
294    let not = int64x2_t::splat(-1);
295    unsafe { simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c)) }
296}
297
298/// Vector add.
299#[inline]
300#[target_feature(enable = "neon")]
301#[cfg_attr(test, assert_instr(fadd))]
302#[stable(feature = "neon_intrinsics", since = "1.59.0")]
303pub fn vadd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
304    unsafe { simd_add(a, b) }
305}
306
307/// Vector add.
308#[inline]
309#[target_feature(enable = "neon")]
310#[cfg_attr(test, assert_instr(fadd))]
311#[stable(feature = "neon_intrinsics", since = "1.59.0")]
312pub fn vaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
313    unsafe { simd_add(a, b) }
314}
315
316/// Vector add.
317#[inline]
318#[target_feature(enable = "neon")]
319#[cfg_attr(test, assert_instr(add))]
320#[stable(feature = "neon_intrinsics", since = "1.59.0")]
321pub fn vadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
322    unsafe { simd_add(a, b) }
323}
324
325/// Vector add.
326#[inline]
327#[target_feature(enable = "neon")]
328#[cfg_attr(test, assert_instr(add))]
329#[stable(feature = "neon_intrinsics", since = "1.59.0")]
330pub fn vadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
331    unsafe { simd_add(a, b) }
332}
333
334/// Vector add.
335#[inline]
336#[target_feature(enable = "neon")]
337#[cfg_attr(test, assert_instr(add))]
338#[stable(feature = "neon_intrinsics", since = "1.59.0")]
339pub fn vaddd_s64(a: i64, b: i64) -> i64 {
340    a.wrapping_add(b)
341}
342
343/// Vector add.
344#[inline]
345#[target_feature(enable = "neon")]
346#[cfg_attr(test, assert_instr(add))]
347#[stable(feature = "neon_intrinsics", since = "1.59.0")]
348pub fn vaddd_u64(a: u64, b: u64) -> u64 {
349    a.wrapping_add(b)
350}
351
352/// Extract vector from pair of vectors
353#[inline]
354#[target_feature(enable = "neon")]
355#[cfg_attr(test, assert_instr(nop, N = 0))]
356#[rustc_legacy_const_generics(2)]
357#[stable(feature = "neon_intrinsics", since = "1.59.0")]
358pub fn vext_p64<const N: i32>(a: poly64x1_t, _b: poly64x1_t) -> poly64x1_t {
359    static_assert!(N == 0);
360    a
361}
362
363/// Extract vector from pair of vectors
364#[inline]
365#[target_feature(enable = "neon")]
366#[cfg_attr(test, assert_instr(nop, N = 0))]
367#[rustc_legacy_const_generics(2)]
368#[stable(feature = "neon_intrinsics", since = "1.59.0")]
369pub fn vext_f64<const N: i32>(a: float64x1_t, _b: float64x1_t) -> float64x1_t {
370    static_assert!(N == 0);
371    a
372}
373
374/// Duplicate vector element to vector or scalar
375#[inline]
376#[target_feature(enable = "neon")]
377#[cfg_attr(test, assert_instr(fmov))]
378#[stable(feature = "neon_intrinsics", since = "1.59.0")]
379pub fn vdup_n_p64(value: p64) -> poly64x1_t {
380    unsafe { transmute(u64x1::new(value)) }
381}
382
383/// Duplicate vector element to vector or scalar
384#[inline]
385#[target_feature(enable = "neon")]
386#[cfg_attr(test, assert_instr(nop))]
387#[stable(feature = "neon_intrinsics", since = "1.59.0")]
388pub fn vdup_n_f64(value: f64) -> float64x1_t {
389    float64x1_t::splat(value)
390}
391
392/// Duplicate vector element to vector or scalar
393#[inline]
394#[target_feature(enable = "neon")]
395#[cfg_attr(test, assert_instr(dup))]
396#[stable(feature = "neon_intrinsics", since = "1.59.0")]
397pub fn vdupq_n_p64(value: p64) -> poly64x2_t {
398    unsafe { transmute(u64x2::new(value, value)) }
399}
400
401/// Duplicate vector element to vector or scalar
402#[inline]
403#[target_feature(enable = "neon")]
404#[cfg_attr(test, assert_instr(dup))]
405#[stable(feature = "neon_intrinsics", since = "1.59.0")]
406pub fn vdupq_n_f64(value: f64) -> float64x2_t {
407    float64x2_t::splat(value)
408}
409
410/// Duplicate vector element to vector or scalar
411#[inline]
412#[target_feature(enable = "neon")]
413#[cfg_attr(test, assert_instr(fmov))]
414#[stable(feature = "neon_intrinsics", since = "1.59.0")]
415pub fn vmov_n_p64(value: p64) -> poly64x1_t {
416    vdup_n_p64(value)
417}
418
419/// Duplicate vector element to vector or scalar
420#[inline]
421#[target_feature(enable = "neon")]
422#[cfg_attr(test, assert_instr(nop))]
423#[stable(feature = "neon_intrinsics", since = "1.59.0")]
424pub fn vmov_n_f64(value: f64) -> float64x1_t {
425    vdup_n_f64(value)
426}
427
428/// Duplicate vector element to vector or scalar
429#[inline]
430#[target_feature(enable = "neon")]
431#[cfg_attr(test, assert_instr(dup))]
432#[stable(feature = "neon_intrinsics", since = "1.59.0")]
433pub fn vmovq_n_p64(value: p64) -> poly64x2_t {
434    vdupq_n_p64(value)
435}
436
437/// Duplicate vector element to vector or scalar
438#[inline]
439#[target_feature(enable = "neon")]
440#[cfg_attr(test, assert_instr(dup))]
441#[stable(feature = "neon_intrinsics", since = "1.59.0")]
442pub fn vmovq_n_f64(value: f64) -> float64x2_t {
443    vdupq_n_f64(value)
444}
445
446/// Duplicate vector element to vector or scalar
447#[inline]
448#[target_feature(enable = "neon")]
449#[cfg_attr(test, assert_instr(nop))]
450#[stable(feature = "neon_intrinsics", since = "1.59.0")]
451pub fn vget_high_f64(a: float64x2_t) -> float64x1_t {
452    unsafe { float64x1_t([simd_extract!(a, 1)]) }
453}
454
455/// Duplicate vector element to vector or scalar
456#[inline]
457#[target_feature(enable = "neon")]
458#[cfg_attr(test, assert_instr(ext))]
459#[stable(feature = "neon_intrinsics", since = "1.59.0")]
460pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
461    unsafe { transmute(u64x1::new(simd_extract!(a, 1))) }
462}
463
464/// Duplicate vector element to vector or scalar
465#[inline]
466#[target_feature(enable = "neon")]
467#[cfg_attr(test, assert_instr(nop))]
468#[stable(feature = "neon_intrinsics", since = "1.59.0")]
469pub fn vget_low_f64(a: float64x2_t) -> float64x1_t {
470    unsafe { float64x1_t([simd_extract!(a, 0)]) }
471}
472
473/// Duplicate vector element to vector or scalar
474#[inline]
475#[target_feature(enable = "neon")]
476#[cfg_attr(test, assert_instr(nop))]
477#[stable(feature = "neon_intrinsics", since = "1.59.0")]
478pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
479    unsafe { transmute(u64x1::new(simd_extract!(a, 0))) }
480}
481
482/// Duplicate vector element to vector or scalar
483#[inline]
484#[target_feature(enable = "neon")]
485#[rustc_legacy_const_generics(1)]
486#[stable(feature = "neon_intrinsics", since = "1.59.0")]
487#[cfg_attr(
488    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
489    assert_instr(nop, IMM5 = 0)
490)]
491pub fn vget_lane_f64<const IMM5: i32>(v: float64x1_t) -> f64 {
492    static_assert!(IMM5 == 0);
493    unsafe { simd_extract!(v, IMM5 as u32) }
494}
495
496/// Duplicate vector element to vector or scalar
497#[inline]
498#[target_feature(enable = "neon")]
499#[rustc_legacy_const_generics(1)]
500#[stable(feature = "neon_intrinsics", since = "1.59.0")]
501#[cfg_attr(
502    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
503    assert_instr(nop, IMM5 = 0)
504)]
505pub fn vgetq_lane_f64<const IMM5: i32>(v: float64x2_t) -> f64 {
506    static_assert_uimm_bits!(IMM5, 1);
507    unsafe { simd_extract!(v, IMM5 as u32) }
508}
509
510/// Vector combine
511#[inline]
512#[target_feature(enable = "neon")]
513#[cfg_attr(test, assert_instr(mov))]
514#[stable(feature = "neon_intrinsics", since = "1.59.0")]
515pub fn vcombine_f64(low: float64x1_t, high: float64x1_t) -> float64x2_t {
516    unsafe { simd_shuffle!(low, high, [0, 1]) }
517}
518
519/// Shift left
520#[inline]
521#[target_feature(enable = "neon")]
522#[cfg_attr(test, assert_instr(nop, N = 2))]
523#[rustc_legacy_const_generics(1)]
524#[stable(feature = "neon_intrinsics", since = "1.59.0")]
525pub fn vshld_n_s64<const N: i32>(a: i64) -> i64 {
526    static_assert_uimm_bits!(N, 6);
527    a << N
528}
529
530/// Shift left
531#[inline]
532#[target_feature(enable = "neon")]
533#[cfg_attr(test, assert_instr(nop, N = 2))]
534#[rustc_legacy_const_generics(1)]
535#[stable(feature = "neon_intrinsics", since = "1.59.0")]
536pub fn vshld_n_u64<const N: i32>(a: u64) -> u64 {
537    static_assert_uimm_bits!(N, 6);
538    a << N
539}
540
541/// Signed shift right
542#[inline]
543#[target_feature(enable = "neon")]
544#[cfg_attr(test, assert_instr(nop, N = 2))]
545#[rustc_legacy_const_generics(1)]
546#[stable(feature = "neon_intrinsics", since = "1.59.0")]
547pub fn vshrd_n_s64<const N: i32>(a: i64) -> i64 {
548    static_assert!(N >= 1 && N <= 64);
549    let n: i32 = if N == 64 { 63 } else { N };
550    a >> n
551}
552
553/// Unsigned shift right
554#[inline]
555#[target_feature(enable = "neon")]
556#[cfg_attr(test, assert_instr(nop, N = 2))]
557#[rustc_legacy_const_generics(1)]
558#[stable(feature = "neon_intrinsics", since = "1.59.0")]
559pub fn vshrd_n_u64<const N: i32>(a: u64) -> u64 {
560    static_assert!(N >= 1 && N <= 64);
561    let n: i32 = if N == 64 {
562        return 0;
563    } else {
564        N
565    };
566    a >> n
567}
568
569/// Signed shift right and accumulate
570#[inline]
571#[target_feature(enable = "neon")]
572#[cfg_attr(test, assert_instr(nop, N = 2))]
573#[rustc_legacy_const_generics(2)]
574#[stable(feature = "neon_intrinsics", since = "1.59.0")]
575pub fn vsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
576    static_assert!(N >= 1 && N <= 64);
577    a.wrapping_add(vshrd_n_s64::<N>(b))
578}
579
580/// Unsigned shift right and accumulate
581#[inline]
582#[target_feature(enable = "neon")]
583#[cfg_attr(test, assert_instr(nop, N = 2))]
584#[rustc_legacy_const_generics(2)]
585#[stable(feature = "neon_intrinsics", since = "1.59.0")]
586pub fn vsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
587    static_assert!(N >= 1 && N <= 64);
588    a.wrapping_add(vshrd_n_u64::<N>(b))
589}
590
591#[cfg(test)]
592mod tests {
593    use crate::core_arch::aarch64::test_support::*;
594    use crate::core_arch::arm_shared::test_support::*;
595    use crate::core_arch::{aarch64::neon::*, aarch64::*, simd::*};
596    use stdarch_test::simd_test;
597
598    #[simd_test(enable = "neon")]
599    fn test_vadd_f64() {
600        let a = f64x1::from_array([1.]);
601        let b = f64x1::from_array([8.]);
602        let e = f64x1::from_array([9.]);
603        let r = f64x1::from(vadd_f64(a.into(), b.into()));
604        assert_eq!(r, e);
605    }
606
607    #[simd_test(enable = "neon")]
608    fn test_vaddq_f64() {
609        let a = f64x2::new(1., 2.);
610        let b = f64x2::new(8., 7.);
611        let e = f64x2::new(9., 9.);
612        let r = f64x2::from(vaddq_f64(a.into(), b.into()));
613        assert_eq!(r, e);
614    }
615
616    #[simd_test(enable = "neon")]
617    fn test_vadd_s64() {
618        let a = i64x1::from_array([1]);
619        let b = i64x1::from_array([8]);
620        let e = i64x1::from_array([9]);
621        let r = i64x1::from(vadd_s64(a.into(), b.into()));
622        assert_eq!(r, e);
623    }
624
625    #[simd_test(enable = "neon")]
626    fn test_vadd_u64() {
627        let a = u64x1::from_array([1]);
628        let b = u64x1::from_array([8]);
629        let e = u64x1::from_array([9]);
630        let r = u64x1::from(vadd_u64(a.into(), b.into()));
631        assert_eq!(r, e);
632    }
633
634    #[simd_test(enable = "neon")]
635    fn test_vaddd_s64() {
636        let a = 1_i64;
637        let b = 8_i64;
638        let e = 9_i64;
639        let r: i64 = vaddd_s64(a, b);
640        assert_eq!(r, e);
641    }
642
643    #[simd_test(enable = "neon")]
644    fn test_vaddd_u64() {
645        let a = 1_u64;
646        let b = 8_u64;
647        let e = 9_u64;
648        let r: u64 = vaddd_u64(a, b);
649        assert_eq!(r, e);
650    }
651
652    #[simd_test(enable = "neon")]
653    fn test_vext_p64() {
654        let a = u64x1::new(0);
655        let b = u64x1::new(1);
656        let e = u64x1::new(0);
657        let r = u64x1::from(vext_p64::<0>(a.into(), b.into()));
658        assert_eq!(r, e);
659    }
660
661    #[simd_test(enable = "neon")]
662    fn test_vext_f64() {
663        let a = f64x1::new(0.);
664        let b = f64x1::new(1.);
665        let e = f64x1::new(0.);
666        let r = f64x1::from(vext_f64::<0>(a.into(), b.into()));
667        assert_eq!(r, e);
668    }
669
670    #[simd_test(enable = "neon")]
671    fn test_vshld_n_s64() {
672        let a: i64 = 1;
673        let e: i64 = 4;
674        let r: i64 = vshld_n_s64::<2>(a);
675        assert_eq!(r, e);
676    }
677
678    #[simd_test(enable = "neon")]
679    fn test_vshld_n_u64() {
680        let a: u64 = 1;
681        let e: u64 = 4;
682        let r: u64 = vshld_n_u64::<2>(a);
683        assert_eq!(r, e);
684    }
685
686    #[simd_test(enable = "neon")]
687    fn test_vshrd_n_s64() {
688        let a: i64 = 4;
689        let e: i64 = 1;
690        let r: i64 = vshrd_n_s64::<2>(a);
691        assert_eq!(r, e);
692    }
693
694    #[simd_test(enable = "neon")]
695    fn test_vshrd_n_u64() {
696        let a: u64 = 4;
697        let e: u64 = 1;
698        let r: u64 = vshrd_n_u64::<2>(a);
699        assert_eq!(r, e);
700    }
701
702    #[simd_test(enable = "neon")]
703    fn test_vsrad_n_s64() {
704        let a: i64 = 1;
705        let b: i64 = 4;
706        let e: i64 = 2;
707        let r: i64 = vsrad_n_s64::<2>(a, b);
708        assert_eq!(r, e);
709    }
710
711    #[simd_test(enable = "neon")]
712    fn test_vsrad_n_u64() {
713        let a: u64 = 1;
714        let b: u64 = 4;
715        let e: u64 = 2;
716        let r: u64 = vsrad_n_u64::<2>(a, b);
717        assert_eq!(r, e);
718    }
719
720    #[simd_test(enable = "neon")]
721    fn test_vdup_n_f64() {
722        let a: f64 = 3.3;
723        let e = f64x1::new(3.3);
724        let r = f64x1::from(vdup_n_f64(a));
725        assert_eq!(r, e);
726    }
727
728    #[simd_test(enable = "neon")]
729    fn test_vdup_n_p64() {
730        let a: u64 = 3;
731        let e = u64x1::new(3);
732        let r = u64x1::from(vdup_n_p64(a));
733        assert_eq!(r, e);
734    }
735
736    #[simd_test(enable = "neon")]
737    fn test_vdupq_n_f64() {
738        let a: f64 = 3.3;
739        let e = f64x2::new(3.3, 3.3);
740        let r = f64x2::from(vdupq_n_f64(a));
741        assert_eq!(r, e);
742    }
743
744    #[simd_test(enable = "neon")]
745    fn test_vdupq_n_p64() {
746        let a: u64 = 3;
747        let e = u64x2::new(3, 3);
748        let r = u64x2::from(vdupq_n_p64(a));
749        assert_eq!(r, e);
750    }
751
752    #[simd_test(enable = "neon")]
753    fn test_vmov_n_p64() {
754        let a: u64 = 3;
755        let e = u64x1::new(3);
756        let r = u64x1::from(vmov_n_p64(a));
757        assert_eq!(r, e);
758    }
759
760    #[simd_test(enable = "neon")]
761    fn test_vmov_n_f64() {
762        let a: f64 = 3.3;
763        let e = f64x1::new(3.3);
764        let r = f64x1::from(vmov_n_f64(a));
765        assert_eq!(r, e);
766    }
767
768    #[simd_test(enable = "neon")]
769    fn test_vmovq_n_p64() {
770        let a: u64 = 3;
771        let e = u64x2::new(3, 3);
772        let r = u64x2::from(vmovq_n_p64(a));
773        assert_eq!(r, e);
774    }
775
776    #[simd_test(enable = "neon")]
777    fn test_vmovq_n_f64() {
778        let a: f64 = 3.3;
779        let e = f64x2::new(3.3, 3.3);
780        let r = f64x2::from(vmovq_n_f64(a));
781        assert_eq!(r, e);
782    }
783
784    #[simd_test(enable = "neon")]
785    fn test_vget_high_f64() {
786        let a = f64x2::new(1.0, 2.0);
787        let e = f64x1::new(2.0);
788        let r = f64x1::from(vget_high_f64(a.into()));
789        assert_eq!(r, e);
790    }
791
792    #[simd_test(enable = "neon")]
793    fn test_vget_high_p64() {
794        let a = u64x2::new(1, 2);
795        let e = u64x1::new(2);
796        let r = u64x1::from(vget_high_p64(a.into()));
797        assert_eq!(r, e);
798    }
799
800    #[simd_test(enable = "neon")]
801    fn test_vget_low_f64() {
802        let a = f64x2::new(1.0, 2.0);
803        let e = f64x1::new(1.0);
804        let r = f64x1::from(vget_low_f64(a.into()));
805        assert_eq!(r, e);
806    }
807
808    #[simd_test(enable = "neon")]
809    fn test_vget_low_p64() {
810        let a = u64x2::new(1, 2);
811        let e = u64x1::new(1);
812        let r = u64x1::from(vget_low_p64(a.into()));
813        assert_eq!(r, e);
814    }
815
816    #[simd_test(enable = "neon")]
817    fn test_vget_lane_f64() {
818        let v = f64x1::new(1.0);
819        let r = vget_lane_f64::<0>(v.into());
820        assert_eq!(r, 1.0);
821    }
822
823    #[simd_test(enable = "neon")]
824    fn test_vgetq_lane_f64() {
825        let v = f64x2::new(0.0, 1.0);
826        let r = vgetq_lane_f64::<1>(v.into());
827        assert_eq!(r, 1.0);
828        let r = vgetq_lane_f64::<0>(v.into());
829        assert_eq!(r, 0.0);
830    }
831
832    #[simd_test(enable = "neon")]
833    fn test_vcopy_lane_s64() {
834        let a = i64x1::new(1);
835        let b = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
836        let e = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
837        let r = i64x1::from(vcopy_lane_s64::<0, 0>(a.into(), b.into()));
838        assert_eq!(r, e);
839    }
840
841    #[simd_test(enable = "neon")]
842    fn test_vcopy_lane_u64() {
843        let a = u64x1::new(1);
844        let b = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
845        let e = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
846        let r = u64x1::from(vcopy_lane_u64::<0, 0>(a.into(), b.into()));
847        assert_eq!(r, e);
848    }
849
850    #[simd_test(enable = "neon")]
851    fn test_vcopy_lane_p64() {
852        let a = u64x1::new(1);
853        let b = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
854        let e = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
855        let r = u64x1::from(vcopy_lane_p64::<0, 0>(a.into(), b.into()));
856        assert_eq!(r, e);
857    }
858
859    #[simd_test(enable = "neon")]
860    fn test_vcopy_lane_f64() {
861        let a = f64x1::from_array([1.]);
862        let b = f64x1::from_array([0.]);
863        let e = f64x1::from_array([0.]);
864        let r = f64x1::from(vcopy_lane_f64::<0, 0>(a.into(), b.into()));
865        assert_eq!(r, e);
866    }
867
868    #[simd_test(enable = "neon")]
869    fn test_vcopy_laneq_s64() {
870        let a = i64x1::new(1);
871        let b = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
872        let e = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
873        let r = i64x1::from(vcopy_laneq_s64::<0, 1>(a.into(), b.into()));
874        assert_eq!(r, e);
875    }
876
877    #[simd_test(enable = "neon")]
878    fn test_vcopy_laneq_u64() {
879        let a = u64x1::new(1);
880        let b = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
881        let e = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
882        let r = u64x1::from(vcopy_laneq_u64::<0, 1>(a.into(), b.into()));
883        assert_eq!(r, e);
884    }
885
886    #[simd_test(enable = "neon")]
887    fn test_vcopy_laneq_p64() {
888        let a = u64x1::new(1);
889        let b = u64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
890        let e = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
891        let r = u64x1::from(vcopy_laneq_p64::<0, 1>(a.into(), b.into()));
892        assert_eq!(r, e);
893    }
894
895    #[simd_test(enable = "neon")]
896    fn test_vcopy_laneq_f64() {
897        let a = f64x1::from_array([1.]);
898        let b = f64x2::from_array([0., 0.5]);
899        let e = f64x1::from_array([0.5]);
900        let r = f64x1::from(vcopy_laneq_f64::<0, 1>(a.into(), b.into()));
901        assert_eq!(r, e);
902    }
903
904    #[simd_test(enable = "neon")]
905    fn test_vbsl_f64() {
906        let a = u64x1::new(0x8000000000000000);
907        let b = f64x1::new(-1.23f64);
908        let c = f64x1::new(2.34f64);
909        let e = f64x1::new(-2.34f64);
910        let r = f64x1::from(vbsl_f64(a.into(), b.into(), c.into()));
911        assert_eq!(r, e);
912    }
913
914    #[simd_test(enable = "neon")]
915    fn test_vbsl_p64() {
916        let a = u64x1::new(1);
917        let b = u64x1::new(u64::MAX);
918        let c = u64x1::new(u64::MIN);
919        let e = u64x1::new(1);
920        let r = u64x1::from(vbsl_p64(a.into(), b.into(), c.into()));
921        assert_eq!(r, e);
922    }
923
924    #[simd_test(enable = "neon")]
925    fn test_vbslq_f64() {
926        let a = u64x2::new(1, 0x8000000000000000);
927        let b = f64x2::new(f64::MAX, -1.23f64);
928        let c = f64x2::new(f64::MIN, 2.34f64);
929        let e = f64x2::new(f64::MIN, -2.34f64);
930        let r = f64x2::from(vbslq_f64(a.into(), b.into(), c.into()));
931        assert_eq!(r, e);
932    }
933
934    #[simd_test(enable = "neon")]
935    fn test_vbslq_p64() {
936        let a = u64x2::new(u64::MAX, 1);
937        let b = u64x2::new(u64::MAX, u64::MAX);
938        let c = u64x2::new(u64::MIN, u64::MIN);
939        let e = u64x2::new(u64::MAX, 1);
940        let r = u64x2::from(vbslq_p64(a.into(), b.into(), c.into()));
941        assert_eq!(r, e);
942    }
943
944    #[simd_test(enable = "neon")]
945    fn test_vld1_f64() {
946        let a: [f64; 2] = [0., 1.];
947        let e = f64x1::new(1.);
948        let r = unsafe { f64x1::from(vld1_f64(a[1..].as_ptr())) };
949        assert_eq!(r, e)
950    }
951
952    #[simd_test(enable = "neon")]
953    fn test_vld1q_f64() {
954        let a: [f64; 3] = [0., 1., 2.];
955        let e = f64x2::new(1., 2.);
956        let r = unsafe { f64x2::from(vld1q_f64(a[1..].as_ptr())) };
957        assert_eq!(r, e)
958    }
959
960    #[simd_test(enable = "neon")]
961    fn test_vld1_dup_f64() {
962        let a: [f64; 2] = [1., 42.];
963        let e = f64x1::new(42.);
964        let r = unsafe { f64x1::from(vld1_dup_f64(a[1..].as_ptr())) };
965        assert_eq!(r, e)
966    }
967
968    #[simd_test(enable = "neon")]
969    fn test_vld1q_dup_f64() {
970        let elem: f64 = 42.;
971        let e = f64x2::new(42., 42.);
972        let r = unsafe { f64x2::from(vld1q_dup_f64(&elem)) };
973        assert_eq!(r, e)
974    }
975
976    #[simd_test(enable = "neon")]
977    fn test_vld1_lane_f64() {
978        let a = f64x1::new(0.);
979        let elem: f64 = 42.;
980        let e = f64x1::new(42.);
981        let r = unsafe { f64x1::from(vld1_lane_f64::<0>(&elem, a.into())) };
982        assert_eq!(r, e)
983    }
984
985    #[simd_test(enable = "neon")]
986    fn test_vld1q_lane_f64() {
987        let a = f64x2::new(0., 1.);
988        let elem: f64 = 42.;
989        let e = f64x2::new(0., 42.);
990        let r = unsafe { f64x2::from(vld1q_lane_f64::<1>(&elem, a.into())) };
991        assert_eq!(r, e)
992    }
993
994    #[simd_test(enable = "neon")]
995    fn test_vst1_f64() {
996        let mut vals = [0_f64; 2];
997        let a = f64x1::new(1.);
998
999        unsafe {
1000            vst1_f64(vals[1..].as_mut_ptr(), a.into());
1001        }
1002
1003        assert_eq!(vals[0], 0.);
1004        assert_eq!(vals[1], 1.);
1005    }
1006
1007    #[simd_test(enable = "neon")]
1008    fn test_vst1q_f64() {
1009        let mut vals = [0_f64; 3];
1010        let a = f64x2::new(1., 2.);
1011
1012        unsafe {
1013            vst1q_f64(vals[1..].as_mut_ptr(), a.into());
1014        }
1015
1016        assert_eq!(vals[0], 0.);
1017        assert_eq!(vals[1], 1.);
1018        assert_eq!(vals[2], 2.);
1019    }
1020
1021    macro_rules! wide_store_load_roundtrip {
1022        ($elem_ty:ty, $len:expr, $vec_ty:ty, $store:expr, $load:expr) => {
1023            let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty);
1024            let a: $vec_ty = transmute(vals);
1025            let mut tmp = [0 as $elem_ty; $len];
1026            $store(tmp.as_mut_ptr().cast(), a);
1027            let r: $vec_ty = $load(tmp.as_ptr().cast());
1028            let out: [$elem_ty; $len] = transmute(r);
1029            assert_eq!(out, vals);
1030        };
1031    }
1032
1033    macro_rules! wide_store_load_roundtrip_fp16 {
1034        ($( $name:ident $args:tt);* $(;)?) => {
1035            $(
1036                #[simd_test(enable = "neon,fp16")]
1037                #[cfg(not(target_arch = "arm64ec"))]
1038                unsafe fn $name() {
1039                    wide_store_load_roundtrip! $args;
1040                }
1041            )*
1042        };
1043    }
1044
1045    wide_store_load_roundtrip_fp16! {
1046        test_vld1_f16_x2(f16, 8, float16x4x2_t, vst1_f16_x2, vld1_f16_x2);
1047        test_vld1_f16_x3(f16, 12, float16x4x3_t, vst1_f16_x3, vld1_f16_x3);
1048        test_vld1_f16_x4(f16, 16, float16x4x4_t, vst1_f16_x4, vld1_f16_x4);
1049
1050        test_vld1q_f16_x2(f16, 16, float16x8x2_t, vst1q_f16_x2, vld1q_f16_x2);
1051        test_vld1q_f16_x3(f16, 24, float16x8x3_t, vst1q_f16_x3, vld1q_f16_x3);
1052        test_vld1q_f16_x4(f16, 32, float16x8x4_t, vst1q_f16_x4, vld1q_f16_x4);
1053    }
1054
1055    macro_rules! wide_store_load_roundtrip_aes {
1056        ($( $name:ident $args:tt);* $(;)?) => {
1057            $(
1058                #[simd_test(enable = "neon,aes")]
1059                unsafe fn $name() {
1060                    wide_store_load_roundtrip! $args;
1061                }
1062            )*
1063        };
1064    }
1065
1066    wide_store_load_roundtrip_aes! {
1067        test_vld1_p64_x2(p64, 2, poly64x1x2_t, vst1_p64_x2, vld1_p64_x2);
1068        test_vld1_p64_x3(p64, 3, poly64x1x3_t, vst1_p64_x3, vld1_p64_x3);
1069        test_vld1_p64_x4(p64, 4, poly64x1x4_t, vst1_p64_x4, vld1_p64_x4);
1070
1071        test_vld1q_p64_x2(p64, 4, poly64x2x2_t, vst1q_p64_x2, vld1q_p64_x2);
1072        test_vld1q_p64_x3(p64, 6, poly64x2x3_t, vst1q_p64_x3, vld1q_p64_x3);
1073        test_vld1q_p64_x4(p64, 8, poly64x2x4_t, vst1q_p64_x4, vld1q_p64_x4);
1074    }
1075
1076    macro_rules! wide_store_load_roundtrip_neon {
1077        ($( $name:ident $args:tt);* $(;)?) => {
1078            $(
1079                #[simd_test(enable = "neon")]
1080                unsafe fn $name() {
1081                    wide_store_load_roundtrip! $args;
1082                }
1083            )*
1084        };
1085    }
1086
1087    wide_store_load_roundtrip_neon! {
1088        test_vld1_f32_x2(f32, 4, float32x2x2_t, vst1_f32_x2, vld1_f32_x2);
1089        test_vld1_f32_x3(f32, 6, float32x2x3_t, vst1_f32_x3, vld1_f32_x3);
1090        test_vld1_f32_x4(f32, 8, float32x2x4_t, vst1_f32_x4, vld1_f32_x4);
1091
1092        test_vld1q_f32_x2(f32, 8, float32x4x2_t, vst1q_f32_x2, vld1q_f32_x2);
1093        test_vld1q_f32_x3(f32, 12, float32x4x3_t, vst1q_f32_x3, vld1q_f32_x3);
1094        test_vld1q_f32_x4(f32, 16, float32x4x4_t, vst1q_f32_x4, vld1q_f32_x4);
1095
1096        test_vld1_f64_x2(f64, 2, float64x1x2_t, vst1_f64_x2, vld1_f64_x2);
1097        test_vld1_f64_x3(f64, 3, float64x1x3_t, vst1_f64_x3, vld1_f64_x3);
1098        test_vld1_f64_x4(f64, 4, float64x1x4_t, vst1_f64_x4, vld1_f64_x4);
1099
1100        test_vld1q_f64_x2(f64, 4, float64x2x2_t, vst1q_f64_x2, vld1q_f64_x2);
1101        test_vld1q_f64_x3(f64, 6, float64x2x3_t, vst1q_f64_x3, vld1q_f64_x3);
1102        test_vld1q_f64_x4(f64, 8, float64x2x4_t, vst1q_f64_x4, vld1q_f64_x4);
1103
1104        test_vld1_s8_x2(i8, 16, int8x8x2_t, vst1_s8_x2, vld1_s8_x2);
1105        test_vld1_s8_x3(i8, 24, int8x8x3_t, vst1_s8_x3, vld1_s8_x3);
1106        test_vld1_s8_x4(i8, 32, int8x8x4_t, vst1_s8_x4, vld1_s8_x4);
1107
1108        test_vld1q_s8_x2(i8, 32, int8x16x2_t, vst1q_s8_x2, vld1q_s8_x2);
1109        test_vld1q_s8_x3(i8, 48, int8x16x3_t, vst1q_s8_x3, vld1q_s8_x3);
1110        test_vld1q_s8_x4(i8, 64, int8x16x4_t, vst1q_s8_x4, vld1q_s8_x4);
1111
1112        test_vld1_s16_x2(i16, 8, int16x4x2_t, vst1_s16_x2, vld1_s16_x2);
1113        test_vld1_s16_x3(i16, 12, int16x4x3_t, vst1_s16_x3, vld1_s16_x3);
1114        test_vld1_s16_x4(i16, 16, int16x4x4_t, vst1_s16_x4, vld1_s16_x4);
1115
1116        test_vld1q_s16_x2(i16, 16, int16x8x2_t, vst1q_s16_x2, vld1q_s16_x2);
1117        test_vld1q_s16_x3(i16, 24, int16x8x3_t, vst1q_s16_x3, vld1q_s16_x3);
1118        test_vld1q_s16_x4(i16, 32, int16x8x4_t, vst1q_s16_x4, vld1q_s16_x4);
1119
1120        test_vld1_s32_x2(i32, 4, int32x2x2_t, vst1_s32_x2, vld1_s32_x2);
1121        test_vld1_s32_x3(i32, 6, int32x2x3_t, vst1_s32_x3, vld1_s32_x3);
1122        test_vld1_s32_x4(i32, 8, int32x2x4_t, vst1_s32_x4, vld1_s32_x4);
1123
1124        test_vld1q_s32_x2(i32, 8, int32x4x2_t, vst1q_s32_x2, vld1q_s32_x2);
1125        test_vld1q_s32_x3(i32, 12, int32x4x3_t, vst1q_s32_x3, vld1q_s32_x3);
1126        test_vld1q_s32_x4(i32, 16, int32x4x4_t, vst1q_s32_x4, vld1q_s32_x4);
1127
1128        test_vld1_s64_x2(i64, 2, int64x1x2_t, vst1_s64_x2, vld1_s64_x2);
1129        test_vld1_s64_x3(i64, 3, int64x1x3_t, vst1_s64_x3, vld1_s64_x3);
1130        test_vld1_s64_x4(i64, 4, int64x1x4_t, vst1_s64_x4, vld1_s64_x4);
1131
1132        test_vld1q_s64_x2(i64, 4, int64x2x2_t, vst1q_s64_x2, vld1q_s64_x2);
1133        test_vld1q_s64_x3(i64, 6, int64x2x3_t, vst1q_s64_x3, vld1q_s64_x3);
1134        test_vld1q_s64_x4(i64, 8, int64x2x4_t, vst1q_s64_x4, vld1q_s64_x4);
1135
1136        test_vld1_u8_x2(u8, 16, uint8x8x2_t, vst1_u8_x2, vld1_u8_x2);
1137        test_vld1_u8_x3(u8, 24, uint8x8x3_t, vst1_u8_x3, vld1_u8_x3);
1138        test_vld1_u8_x4(u8, 32, uint8x8x4_t, vst1_u8_x4, vld1_u8_x4);
1139
1140        test_vld1q_u8_x2(u8, 32, uint8x16x2_t, vst1q_u8_x2, vld1q_u8_x2);
1141        test_vld1q_u8_x3(u8, 48, uint8x16x3_t, vst1q_u8_x3, vld1q_u8_x3);
1142        test_vld1q_u8_x4(u8, 64, uint8x16x4_t, vst1q_u8_x4, vld1q_u8_x4);
1143
1144        test_vld1_u16_x2(u16, 8, uint16x4x2_t, vst1_u16_x2, vld1_u16_x2);
1145        test_vld1_u16_x3(u16, 12, uint16x4x3_t, vst1_u16_x3, vld1_u16_x3);
1146        test_vld1_u16_x4(u16, 16, uint16x4x4_t, vst1_u16_x4, vld1_u16_x4);
1147
1148        test_vld1q_u16_x2(u16, 16, uint16x8x2_t, vst1q_u16_x2, vld1q_u16_x2);
1149        test_vld1q_u16_x3(u16, 24, uint16x8x3_t, vst1q_u16_x3, vld1q_u16_x3);
1150        test_vld1q_u16_x4(u16, 32, uint16x8x4_t, vst1q_u16_x4, vld1q_u16_x4);
1151
1152        test_vld1_u32_x2(u32, 4, uint32x2x2_t, vst1_u32_x2, vld1_u32_x2);
1153        test_vld1_u32_x3(u32, 6, uint32x2x3_t, vst1_u32_x3, vld1_u32_x3);
1154        test_vld1_u32_x4(u32, 8, uint32x2x4_t, vst1_u32_x4, vld1_u32_x4);
1155
1156        test_vld1q_u32_x2(u32, 8, uint32x4x2_t, vst1q_u32_x2, vld1q_u32_x2);
1157        test_vld1q_u32_x3(u32, 12, uint32x4x3_t, vst1q_u32_x3, vld1q_u32_x3);
1158        test_vld1q_u32_x4(u32, 16, uint32x4x4_t, vst1q_u32_x4, vld1q_u32_x4);
1159
1160        test_vld1_u64_x2(u64, 2, uint64x1x2_t, vst1_u64_x2, vld1_u64_x2);
1161        test_vld1_u64_x3(u64, 3, uint64x1x3_t, vst1_u64_x3, vld1_u64_x3);
1162        test_vld1_u64_x4(u64, 4, uint64x1x4_t, vst1_u64_x4, vld1_u64_x4);
1163
1164        test_vld1q_u64_x2(u64, 4, uint64x2x2_t, vst1q_u64_x2, vld1q_u64_x2);
1165        test_vld1q_u64_x3(u64, 6, uint64x2x3_t, vst1q_u64_x3, vld1q_u64_x3);
1166        test_vld1q_u64_x4(u64, 8, uint64x2x4_t, vst1q_u64_x4, vld1q_u64_x4);
1167
1168        test_vld1_p8_x2(p8, 16, poly8x8x2_t, vst1_p8_x2, vld1_p8_x2);
1169        test_vld1_p8_x3(p8, 24, poly8x8x3_t, vst1_p8_x3, vld1_p8_x3);
1170        test_vld1_p8_x4(p8, 32, poly8x8x4_t, vst1_p8_x4, vld1_p8_x4);
1171
1172        test_vld1q_p8_x2(p8, 32, poly8x16x2_t, vst1q_p8_x2, vld1q_p8_x2);
1173        test_vld1q_p8_x3(p8, 48, poly8x16x3_t, vst1q_p8_x3, vld1q_p8_x3);
1174        test_vld1q_p8_x4(p8, 64, poly8x16x4_t, vst1q_p8_x4, vld1q_p8_x4);
1175
1176        test_vld1_p16_x2(p16, 8, poly16x4x2_t, vst1_p16_x2, vld1_p16_x2);
1177        test_vld1_p16_x3(p16, 12, poly16x4x3_t, vst1_p16_x3, vld1_p16_x3);
1178        test_vld1_p16_x4(p16, 16, poly16x4x4_t, vst1_p16_x4, vld1_p16_x4);
1179
1180        test_vld1q_p16_x2(p16, 16, poly16x8x2_t, vst1q_p16_x2, vld1q_p16_x2);
1181        test_vld1q_p16_x3(p16, 24, poly16x8x3_t, vst1q_p16_x3, vld1q_p16_x3);
1182        test_vld1q_p16_x4(p16, 32, poly16x8x4_t, vst1q_p16_x4, vld1q_p16_x4);
1183    }
1184
1185    wide_store_load_roundtrip_neon! {
1186        test_vld2_f32_x2(f32, 4, float32x2x2_t, vst2_f32, vld2_f32);
1187        test_vld2_f32_x3(f32, 6, float32x2x3_t, vst3_f32, vld3_f32);
1188        test_vld2_f32_x4(f32, 8, float32x2x4_t, vst4_f32, vld4_f32);
1189
1190        test_vld2q_f32_x2(f32, 8, float32x4x2_t, vst2q_f32, vld2q_f32);
1191        test_vld3q_f32_x3(f32, 12, float32x4x3_t, vst3q_f32, vld3q_f32);
1192        test_vld4q_f32_x4(f32, 16, float32x4x4_t, vst4q_f32, vld4q_f32);
1193
1194        test_vld2_f64_x2(f64, 2, float64x1x2_t, vst2_f64, vld2_f64);
1195        test_vld2_f64_x3(f64, 3, float64x1x3_t, vst3_f64, vld3_f64);
1196        test_vld2_f64_x4(f64, 4, float64x1x4_t, vst4_f64, vld4_f64);
1197
1198        test_vld2q_f64_x2(f64, 4, float64x2x2_t, vst2q_f64, vld2q_f64);
1199        test_vld3q_f64_x3(f64, 6, float64x2x3_t, vst3q_f64, vld3q_f64);
1200        test_vld4q_f64_x4(f64, 8, float64x2x4_t, vst4q_f64, vld4q_f64);
1201
1202        test_vld2_s8_x2(i8, 16, int8x8x2_t, vst2_s8, vld2_s8);
1203        test_vld2_s8_x3(i8, 24, int8x8x3_t, vst3_s8, vld3_s8);
1204        test_vld2_s8_x4(i8, 32, int8x8x4_t, vst4_s8, vld4_s8);
1205
1206        test_vld2q_s8_x2(i8, 32, int8x16x2_t, vst2q_s8, vld2q_s8);
1207        test_vld3q_s8_x3(i8, 48, int8x16x3_t, vst3q_s8, vld3q_s8);
1208        test_vld4q_s8_x4(i8, 64, int8x16x4_t, vst4q_s8, vld4q_s8);
1209
1210        test_vld2_s16_x2(i16, 8, int16x4x2_t, vst2_s16, vld2_s16);
1211        test_vld2_s16_x3(i16, 12, int16x4x3_t, vst3_s16, vld3_s16);
1212        test_vld2_s16_x4(i16, 16, int16x4x4_t, vst4_s16, vld4_s16);
1213
1214        test_vld2q_s16_x2(i16, 16, int16x8x2_t, vst2q_s16, vld2q_s16);
1215        test_vld3q_s16_x3(i16, 24, int16x8x3_t, vst3q_s16, vld3q_s16);
1216        test_vld4q_s16_x4(i16, 32, int16x8x4_t, vst4q_s16, vld4q_s16);
1217
1218        test_vld2_s32_x2(i32, 4, int32x2x2_t, vst2_s32, vld2_s32);
1219        test_vld2_s32_x3(i32, 6, int32x2x3_t, vst3_s32, vld3_s32);
1220        test_vld2_s32_x4(i32, 8, int32x2x4_t, vst4_s32, vld4_s32);
1221
1222        test_vld2q_s32_x2(i32, 8, int32x4x2_t, vst2q_s32, vld2q_s32);
1223        test_vld3q_s32_x3(i32, 12, int32x4x3_t, vst3q_s32, vld3q_s32);
1224        test_vld4q_s32_x4(i32, 16, int32x4x4_t, vst4q_s32, vld4q_s32);
1225
1226        test_vld2_s64_x2(i64, 2, int64x1x2_t, vst2_s64, vld2_s64);
1227        test_vld2_s64_x3(i64, 3, int64x1x3_t, vst3_s64, vld3_s64);
1228        test_vld2_s64_x4(i64, 4, int64x1x4_t, vst4_s64, vld4_s64);
1229
1230        test_vld2q_s64_x2(i64, 4, int64x2x2_t, vst2q_s64, vld2q_s64);
1231        test_vld3q_s64_x3(i64, 6, int64x2x3_t, vst3q_s64, vld3q_s64);
1232        test_vld4q_s64_x4(i64, 8, int64x2x4_t, vst4q_s64, vld4q_s64);
1233
1234        test_vld2_u8_x2(u8, 16, uint8x8x2_t, vst2_u8, vld2_u8);
1235        test_vld2_u8_x3(u8, 24, uint8x8x3_t, vst3_u8, vld3_u8);
1236        test_vld2_u8_x4(u8, 32, uint8x8x4_t, vst4_u8, vld4_u8);
1237
1238        test_vld2q_u8_x2(u8, 32, uint8x16x2_t, vst2q_u8, vld2q_u8);
1239        test_vld3q_u8_x3(u8, 48, uint8x16x3_t, vst3q_u8, vld3q_u8);
1240        test_vld4q_u8_x4(u8, 64, uint8x16x4_t, vst4q_u8, vld4q_u8);
1241
1242        test_vld2_u16_x2(u16, 8, uint16x4x2_t, vst2_u16, vld2_u16);
1243        test_vld2_u16_x3(u16, 12, uint16x4x3_t, vst3_u16, vld3_u16);
1244        test_vld2_u16_x4(u16, 16, uint16x4x4_t, vst4_u16, vld4_u16);
1245
1246        test_vld2q_u16_x2(u16, 16, uint16x8x2_t, vst2q_u16, vld2q_u16);
1247        test_vld3q_u16_x3(u16, 24, uint16x8x3_t, vst3q_u16, vld3q_u16);
1248        test_vld4q_u16_x4(u16, 32, uint16x8x4_t, vst4q_u16, vld4q_u16);
1249
1250        test_vld2_u32_x2(u32, 4, uint32x2x2_t, vst2_u32, vld2_u32);
1251        test_vld2_u32_x3(u32, 6, uint32x2x3_t, vst3_u32, vld3_u32);
1252        test_vld2_u32_x4(u32, 8, uint32x2x4_t, vst4_u32, vld4_u32);
1253
1254        test_vld2q_u32_x2(u32, 8, uint32x4x2_t, vst2q_u32, vld2q_u32);
1255        test_vld3q_u32_x3(u32, 12, uint32x4x3_t, vst3q_u32, vld3q_u32);
1256        test_vld4q_u32_x4(u32, 16, uint32x4x4_t, vst4q_u32, vld4q_u32);
1257
1258        test_vld2_u64_x2(u64, 2, uint64x1x2_t, vst2_u64, vld2_u64);
1259        test_vld2_u64_x3(u64, 3, uint64x1x3_t, vst3_u64, vld3_u64);
1260        test_vld2_u64_x4(u64, 4, uint64x1x4_t, vst4_u64, vld4_u64);
1261
1262        test_vld2q_u64_x2(u64, 4, uint64x2x2_t, vst2q_u64, vld2q_u64);
1263        test_vld3q_u64_x3(u64, 6, uint64x2x3_t, vst3q_u64, vld3q_u64);
1264        test_vld4q_u64_x4(u64, 8, uint64x2x4_t, vst4q_u64, vld4q_u64);
1265
1266        test_vld2_p8_x2(p8, 16, poly8x8x2_t, vst2_p8, vld2_p8);
1267        test_vld2_p8_x3(p8, 24, poly8x8x3_t, vst3_p8, vld3_p8);
1268        test_vld2_p8_x4(p8, 32, poly8x8x4_t, vst4_p8, vld4_p8);
1269
1270        test_vld2q_p8_x2(p8, 32, poly8x16x2_t, vst2q_p8, vld2q_p8);
1271        test_vld3q_p8_x3(p8, 48, poly8x16x3_t, vst3q_p8, vld3q_p8);
1272        test_vld4q_p8_x4(p8, 64, poly8x16x4_t, vst4q_p8, vld4q_p8);
1273
1274        test_vld2_p16_x2(p16, 8, poly16x4x2_t, vst2_p16, vld2_p16);
1275        test_vld2_p16_x3(p16, 12, poly16x4x3_t, vst3_p16, vld3_p16);
1276        test_vld2_p16_x4(p16, 16, poly16x4x4_t, vst4_p16, vld4_p16);
1277
1278        test_vld2q_p16_x2(p16, 16, poly16x8x2_t, vst2q_p16, vld2q_p16);
1279        test_vld3q_p16_x3(p16, 24, poly16x8x3_t, vst3q_p16, vld3q_p16);
1280        test_vld4q_p16_x4(p16, 32, poly16x8x4_t, vst4q_p16, vld4q_p16);
1281    }
1282}
1283
1284#[cfg(test)]
1285#[path = "../../arm_shared/neon/table_lookup_tests.rs"]
1286mod table_lookup_tests;
1287
1288#[cfg(test)]
1289#[path = "../../arm_shared/neon/shift_and_insert_tests.rs"]
1290mod shift_and_insert_tests;
1291
1292#[cfg(test)]
1293#[path = "../../arm_shared/neon/load_tests.rs"]
1294mod load_tests;
1295
1296#[cfg(test)]
1297#[path = "../../arm_shared/neon/store_tests.rs"]
1298mod store_tests;