30 #ifndef INCLUDED_VOLK_32u_REVERSE_32u_U_H
35 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0,
36 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8,
37 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94,
38 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC,
39 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2,
40 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA,
41 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86,
42 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
43 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE,
44 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1,
45 0x31, 0xB1, 0x71, 0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99,
46 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
47 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD,
48 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3,
49 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B,
50 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
51 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7,
52 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
53 0x3F, 0xBF, 0x7F, 0xFF
55 #ifdef LV_HAVE_GENERIC
59 const uint32_t* in_ptr = in;
60 uint32_t* out_ptr = out;
61 unsigned int number = 0;
62 for (; number < num_points; ++number) {
63 *out_ptr = (((*in_ptr >> 31) & 1) << 0) | (((*in_ptr >> 30) & 1) << 1) |
64 (((*in_ptr >> 29) & 1) << 2) | (((*in_ptr >> 28) & 1) << 3) |
65 (((*in_ptr >> 27) & 1) << 4) | (((*in_ptr >> 26) & 1) << 5) |
66 (((*in_ptr >> 25) & 1) << 6) | (((*in_ptr >> 24) & 1) << 7) |
67 (((*in_ptr >> 23) & 1) << 8) | (((*in_ptr >> 22) & 1) << 9) |
68 (((*in_ptr >> 21) & 1) << 10) | (((*in_ptr >> 20) & 1) << 11) |
69 (((*in_ptr >> 19) & 1) << 12) | (((*in_ptr >> 18) & 1) << 13) |
70 (((*in_ptr >> 17) & 1) << 14) | (((*in_ptr >> 16) & 1) << 15) |
71 (((*in_ptr >> 15) & 1) << 16) | (((*in_ptr >> 14) & 1) << 17) |
72 (((*in_ptr >> 13) & 1) << 18) | (((*in_ptr >> 12) & 1) << 19) |
73 (((*in_ptr >> 11) & 1) << 20) | (((*in_ptr >> 10) & 1) << 21) |
74 (((*in_ptr >> 9) & 1) << 22) | (((*in_ptr >> 8) & 1) << 23) |
75 (((*in_ptr >> 7) & 1) << 24) | (((*in_ptr >> 6) & 1) << 25) |
76 (((*in_ptr >> 5) & 1) << 26) | (((*in_ptr >> 4) & 1) << 27) |
77 (((*in_ptr >> 3) & 1) << 28) | (((*in_ptr >> 2) & 1) << 29) |
78 (((*in_ptr >> 1) & 1) << 30) | (((*in_ptr >> 0) & 1) << 31);
85 #ifdef LV_HAVE_GENERIC
88 unsigned int num_points)
90 const uint32_t* in_ptr = in;
91 uint32_t* out_ptr = out;
92 unsigned int number = 0;
93 for (; number < num_points; ++number) {
94 const uint8_t* in8 = (
const uint8_t*)in_ptr;
95 uint8_t* out8 = (uint8_t*)out_ptr;
97 out8[3] = (((in8[0] >> 7) & 1) << 0) | (((in8[0] >> 6) & 1) << 1) |
98 (((in8[0] >> 5) & 1) << 2) | (((in8[0] >> 4) & 1) << 3) |
99 (((in8[0] >> 3) & 1) << 4) | (((in8[0] >> 2) & 1) << 5) |
100 (((in8[0] >> 1) & 1) << 6) | (((in8[0] >> 0) & 1) << 7);
102 out8[2] = (((in8[1] >> 7) & 1) << 0) | (((in8[1] >> 6) & 1) << 1) |
103 (((in8[1] >> 5) & 1) << 2) | (((in8[1] >> 4) & 1) << 3) |
104 (((in8[1] >> 3) & 1) << 4) | (((in8[1] >> 2) & 1) << 5) |
105 (((in8[1] >> 1) & 1) << 6) | (((in8[1] >> 0) & 1) << 7);
107 out8[1] = (((in8[2] >> 7) & 1) << 0) | (((in8[2] >> 6) & 1) << 1) |
108 (((in8[2] >> 5) & 1) << 2) | (((in8[2] >> 4) & 1) << 3) |
109 (((in8[2] >> 3) & 1) << 4) | (((in8[2] >> 2) & 1) << 5) |
110 (((in8[2] >> 1) & 1) << 6) | (((in8[2] >> 0) & 1) << 7);
112 out8[0] = (((in8[3] >> 7) & 1) << 0) | (((in8[3] >> 6) & 1) << 1) |
113 (((in8[3] >> 5) & 1) << 2) | (((in8[3] >> 4) & 1) << 3) |
114 (((in8[3] >> 3) & 1) << 4) | (((in8[3] >> 2) & 1) << 5) |
115 (((in8[3] >> 1) & 1) << 6) | (((in8[3] >> 0) & 1) << 7);
124 #ifdef LV_HAVE_GENERIC
128 const uint32_t* in_ptr = in;
129 uint32_t* out_ptr = out;
130 unsigned int number = 0;
131 for (; number < num_points; ++number) {
144 #ifdef LV_HAVE_GENERIC
148 const uint32_t* in_ptr = in;
149 uint32_t* out_ptr = out;
152 unsigned int number = 0;
153 for (; number < num_points; ++number) {
154 in8 = (
const uint8_t*)in_ptr;
155 out8 = (uint8_t*)out_ptr;
156 out8[3] = ((in8[0] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
157 out8[2] = ((in8[1] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
158 out8[1] = ((in8[2] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
159 out8[0] = ((in8[3] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
166 #ifdef LV_HAVE_GENERIC
171 const uint32_t* in_ptr = in;
172 uint32_t* out_ptr = out;
175 unsigned int number = 0;
176 for (; number < num_points; ++number) {
177 in8 = (
const uint8_t*)in_ptr;
178 out8 = (uint8_t*)out_ptr;
179 out8[3] = (in8[0] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
180 out8[2] = (in8[1] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
181 out8[1] = (in8[2] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
182 out8[0] = (in8[3] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
190 #ifdef LV_HAVE_GENERIC
193 unsigned int num_points)
195 const uint32_t* in_ptr = in;
196 uint32_t* out_ptr = out;
197 unsigned int number = 0;
198 for (; number < num_points; ++number) {
199 uint32_t tmp = *in_ptr;
203 tmp = (tmp << 16) | (tmp >> 16);
208 tmp = ((tmp & (0xFF | 0xFF << 16)) << 8) | ((tmp >> 8) & (0xFF | 0xFF << 16));
212 tmp = ((tmp & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24)) << 4) |
213 ((tmp >> 4) & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24));
218 tmp = ((tmp & (0x33333333)) << 2) | ((tmp >> 2) & (0x33333333));
223 tmp = ((tmp & (0x55555555)) << 1) | ((tmp >> 1) & (0x55555555));
231 #ifdef LV_HAVE_GENERIC
234 unsigned int num_points)
237 const uint32_t* in_ptr = in;
238 uint32_t* out_ptr = out;
239 unsigned int number = 0;
240 for (; number < num_points; ++number) {
241 uint32_t tmp = *in_ptr;
242 tmp = ((tmp & (0x55555555)) << 1) | ((tmp >> 1) & (0x55555555));
243 tmp = ((tmp & (0x33333333)) << 2) | ((tmp >> 2) & (0x33333333));
244 tmp = ((tmp & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24)) << 4) |
245 ((tmp >> 4) & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24));
246 tmp = ((tmp & (0xFF | 0xFF << 16)) << 8) | ((tmp >> 8) & (0xFF | 0xFF << 16));
247 tmp = (tmp << 16) | (tmp >> 16);
256 #ifdef LV_HAVE_NEONV8
257 #include <arm_neon.h>
260 volk_32u_reverse_32u_neonv8(uint32_t* out,
const uint32_t* in,
unsigned int num_points)
262 const uint32_t* in_ptr = in;
263 uint32_t* out_ptr = out;
265 const uint8x16_t idx = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
267 const unsigned int quarterPoints = num_points / 4;
268 unsigned int number = 0;
269 for (; number < quarterPoints; ++number) {
271 uint32x4_t x = vld1q_u32(in_ptr);
273 vreinterpretq_u32_u8(vqtbl1q_u8(vrbitq_u8(vreinterpretq_u8_u32(x)), idx));
274 vst1q_u32(out_ptr, z);
278 number = quarterPoints * 4;
279 for (; number < num_points; ++number) {
292 #include <arm_neon.h>
294 #if defined(__aarch64__)
296 __VOLK_ASM("rbit %w[result], %w[value]" \
297 : [result] "=r"(*out_ptr) \
298 : [value] "r"(*in_ptr) \
304 __VOLK_ASM("rbit %[result], %[value]" \
305 : [result] "=r"(*out_ptr) \
306 : [value] "r"(*in_ptr) \
316 const uint32_t* in_ptr = in;
317 uint32_t* out_ptr = out;
318 const unsigned int eighthPoints = num_points / 8;
319 unsigned int number = 0;
320 for (; number < eighthPoints; ++number) {
331 number = eighthPoints * 8;
332 for (; number < num_points; ++number) {
static void volk_32u_reverse_32u_1972magic(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:169
static void volk_32u_reverse_32u_2001magic(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:146
static void volk_32u_reverse_32u_lut(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:126
#define DO_RBIT
Definition: volk_32u_reverse_32u.h:303
static const unsigned char BitReverseTable256[]
Definition: volk_32u_reverse_32u.h:34
static void volk_32u_reverse_32u_bintree_permute_bottom_up(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:232
static void volk_32u_reverse_32u_generic(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:57
static void volk_32u_reverse_32u_bintree_permute_top_down(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:191
static void volk_32u_reverse_32u_arm(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:313
static void volk_32u_reverse_32u_byte_shuffle(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:86
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:68