Vector Optimized Library of Kernels  3.2.0
Architecture-tuned implementations of math kernels
volk_rvv_intrinsics.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2024 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
10 /*
11  * This file is intended to hold RVV intrinsics of intrinsics.
12  * They should be used in VOLK kernels to avoid copy-paste.
13  */
14 
15 #ifndef INCLUDE_VOLK_VOLK_RVV_INTRINSICS_H_
16 #define INCLUDE_VOLK_VOLK_RVV_INTRINSICS_H_
17 #include <riscv_vector.h>
18 
19 #define RISCV_SHRINK2(op, T, S, v) \
20  __riscv_##op(__riscv_vget_##T##S##m1(v, 0), \
21  __riscv_vget_##T##S##m1(v, 1), \
22  __riscv_vsetvlmax_e##S##m1())
23 
24 #define RISCV_SHRINK4(op, T, S, v) \
25  __riscv_##op(__riscv_##op(__riscv_vget_##T##S##m1(v, 0), \
26  __riscv_vget_##T##S##m1(v, 1), \
27  __riscv_vsetvlmax_e##S##m1()), \
28  __riscv_##op(__riscv_vget_##T##S##m1(v, 2), \
29  __riscv_vget_##T##S##m1(v, 3), \
30  __riscv_vsetvlmax_e##S##m1()), \
31  __riscv_vsetvlmax_e##S##m1())
32 
33 #define RISCV_SHRINK8(op, T, S, v) \
34  __riscv_##op(__riscv_##op(__riscv_##op(__riscv_vget_##T##S##m1(v, 0), \
35  __riscv_vget_##T##S##m1(v, 1), \
36  __riscv_vsetvlmax_e##S##m1()), \
37  __riscv_##op(__riscv_vget_##T##S##m1(v, 2), \
38  __riscv_vget_##T##S##m1(v, 3), \
39  __riscv_vsetvlmax_e##S##m1()), \
40  __riscv_vsetvlmax_e##S##m1()), \
41  __riscv_##op(__riscv_##op(__riscv_vget_##T##S##m1(v, 4), \
42  __riscv_vget_##T##S##m1(v, 5), \
43  __riscv_vsetvlmax_e##S##m1()), \
44  __riscv_##op(__riscv_vget_##T##S##m1(v, 6), \
45  __riscv_vget_##T##S##m1(v, 7), \
46  __riscv_vsetvlmax_e##S##m1()), \
47  __riscv_vsetvlmax_e##S##m1()), \
48  __riscv_vsetvlmax_e##S##m1())
49 
50 #define RISCV_PERM4(f, v, vidx) \
51  __riscv_vcreate_v_u8m1_u8m4( \
52  f(__riscv_vget_u8m1(v, 0), vidx, __riscv_vsetvlmax_e8m1()), \
53  f(__riscv_vget_u8m1(v, 1), vidx, __riscv_vsetvlmax_e8m1()), \
54  f(__riscv_vget_u8m1(v, 2), vidx, __riscv_vsetvlmax_e8m1()), \
55  f(__riscv_vget_u8m1(v, 3), vidx, __riscv_vsetvlmax_e8m1()))
56 
57 #define RISCV_LUT4(f, vtbl, v) \
58  __riscv_vcreate_v_u8m1_u8m4( \
59  f(vtbl, __riscv_vget_u8m1(v, 0), __riscv_vsetvlmax_e8m1()), \
60  f(vtbl, __riscv_vget_u8m1(v, 1), __riscv_vsetvlmax_e8m1()), \
61  f(vtbl, __riscv_vget_u8m1(v, 2), __riscv_vsetvlmax_e8m1()), \
62  f(vtbl, __riscv_vget_u8m1(v, 3), __riscv_vsetvlmax_e8m1()))
63 
64 #define RISCV_PERM8(f, v, vidx) \
65  __riscv_vcreate_v_u8m1_u8m8( \
66  f(__riscv_vget_u8m1(v, 0), vidx, __riscv_vsetvlmax_e8m1()), \
67  f(__riscv_vget_u8m1(v, 1), vidx, __riscv_vsetvlmax_e8m1()), \
68  f(__riscv_vget_u8m1(v, 2), vidx, __riscv_vsetvlmax_e8m1()), \
69  f(__riscv_vget_u8m1(v, 3), vidx, __riscv_vsetvlmax_e8m1()), \
70  f(__riscv_vget_u8m1(v, 4), vidx, __riscv_vsetvlmax_e8m1()), \
71  f(__riscv_vget_u8m1(v, 5), vidx, __riscv_vsetvlmax_e8m1()), \
72  f(__riscv_vget_u8m1(v, 6), vidx, __riscv_vsetvlmax_e8m1()), \
73  f(__riscv_vget_u8m1(v, 7), vidx, __riscv_vsetvlmax_e8m1()))
74 
75 #define RISCV_VMFLTZ(T, v, vl) __riscv_vmslt(__riscv_vreinterpret_i##T(v), 0, vl)
76 
77 #endif /* INCLUDE_VOLK_VOLK_RVV_INTRINSICS_H_ */