41 #ifndef INCLUDED_VOLK_32u_REVERSE_32u_U_H 90 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0,
91 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8,
92 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94,
93 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC,
94 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2,
95 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA,
96 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86,
97 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
98 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE,
99 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1,
100 0x31, 0xB1, 0x71, 0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99,
101 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
102 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD,
103 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3,
104 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B,
105 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
106 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7,
107 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
108 0x3F, 0xBF, 0x7F, 0xFF
110 #ifdef LV_HAVE_GENERIC 113 unsigned int num_points)
117 unsigned int number = 0;
118 for (; number < num_points; ++number) {
119 out_ptr->
b00 = in_ptr->
b31;
120 out_ptr->
b01 = in_ptr->
b30;
121 out_ptr->
b02 = in_ptr->
b29;
122 out_ptr->
b03 = in_ptr->
b28;
123 out_ptr->
b04 = in_ptr->
b27;
124 out_ptr->
b05 = in_ptr->
b26;
125 out_ptr->
b06 = in_ptr->
b25;
126 out_ptr->
b07 = in_ptr->
b24;
127 out_ptr->
b08 = in_ptr->
b23;
128 out_ptr->
b09 = in_ptr->
b22;
129 out_ptr->
b10 = in_ptr->
b21;
130 out_ptr->
b11 = in_ptr->
b20;
131 out_ptr->
b12 = in_ptr->
b19;
132 out_ptr->
b13 = in_ptr->
b18;
133 out_ptr->
b14 = in_ptr->
b17;
134 out_ptr->
b15 = in_ptr->
b16;
135 out_ptr->
b16 = in_ptr->
b15;
136 out_ptr->
b17 = in_ptr->
b14;
137 out_ptr->
b18 = in_ptr->
b13;
138 out_ptr->
b19 = in_ptr->
b12;
139 out_ptr->
b20 = in_ptr->
b11;
140 out_ptr->
b21 = in_ptr->
b10;
141 out_ptr->
b22 = in_ptr->
b09;
142 out_ptr->
b23 = in_ptr->
b08;
143 out_ptr->
b24 = in_ptr->
b07;
144 out_ptr->
b25 = in_ptr->
b06;
145 out_ptr->
b26 = in_ptr->
b05;
146 out_ptr->
b27 = in_ptr->
b04;
147 out_ptr->
b28 = in_ptr->
b03;
148 out_ptr->
b29 = in_ptr->
b02;
149 out_ptr->
b30 = in_ptr->
b01;
150 out_ptr->
b31 = in_ptr->
b00;
157 #ifdef LV_HAVE_GENERIC 160 unsigned int num_points)
162 const uint32_t* in_ptr = in;
163 uint32_t* out_ptr = out;
164 unsigned int number = 0;
165 for (; number < num_points; ++number) {
212 #ifdef LV_HAVE_GENERIC 216 const uint32_t* in_ptr = in;
217 uint32_t* out_ptr = out;
218 unsigned int number = 0;
219 for (; number < num_points; ++number) {
232 #ifdef LV_HAVE_GENERIC 236 const uint32_t* in_ptr = in;
237 uint32_t* out_ptr = out;
240 unsigned int number = 0;
241 for (; number < num_points; ++number) {
242 in8 = (
const uint8_t*)in_ptr;
243 out8 = (uint8_t*)out_ptr;
244 out8[3] = ((in8[0] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
245 out8[2] = ((in8[1] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
246 out8[1] = ((in8[2] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
247 out8[0] = ((in8[3] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
254 #ifdef LV_HAVE_GENERIC 259 const uint32_t* in_ptr = in;
260 uint32_t* out_ptr = out;
263 unsigned int number = 0;
264 for (; number < num_points; ++number) {
265 in8 = (
const uint8_t*)in_ptr;
266 out8 = (uint8_t*)out_ptr;
267 out8[3] = (in8[0] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
268 out8[2] = (in8[1] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
269 out8[1] = (in8[2] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
270 out8[0] = (in8[3] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
278 #ifdef LV_HAVE_GENERIC 281 unsigned int num_points)
283 const uint32_t* in_ptr = in;
284 uint32_t* out_ptr = out;
285 unsigned int number = 0;
286 for (; number < num_points; ++number) {
287 uint32_t tmp = *in_ptr;
291 tmp = (tmp << 16) | (tmp >> 16);
296 tmp = ((tmp & (0xFF | 0xFF << 16)) << 8) | ((tmp >> 8) & (0xFF | 0xFF << 16));
300 tmp = ((tmp & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24)) << 4) |
301 ((tmp >> 4) & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24));
306 tmp = ((tmp & (0x33333333)) << 2) | ((tmp >> 2) & (0x33333333));
311 tmp = ((tmp & (0x55555555)) << 1) | ((tmp >> 1) & (0x55555555));
319 #ifdef LV_HAVE_GENERIC 322 unsigned int num_points)
325 const uint32_t* in_ptr = in;
326 uint32_t* out_ptr = out;
327 unsigned int number = 0;
328 for (; number < num_points; ++number) {
329 uint32_t tmp = *in_ptr;
330 tmp = ((tmp & (0x55555555)) << 1) | ((tmp >> 1) & (0x55555555));
331 tmp = ((tmp & (0x33333333)) << 2) | ((tmp >> 2) & (0x33333333));
332 tmp = ((tmp & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24)) << 4) |
333 ((tmp >> 4) & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24));
334 tmp = ((tmp & (0xFF | 0xFF << 16)) << 8) | ((tmp >> 8) & (0xFF | 0xFF << 16));
335 tmp = (tmp << 16) | (tmp >> 16);
344 #ifdef LV_HAVE_NEONV8 345 #include <arm_neon.h> 348 volk_32u_reverse_32u_neonv8(uint32_t* out,
const uint32_t* in,
unsigned int num_points)
350 const uint32_t* in_ptr = in;
351 uint32_t* out_ptr = out;
353 const uint8x16_t idx = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
355 const unsigned int quarterPoints = num_points / 4;
356 unsigned int number = 0;
357 for (; number < quarterPoints; ++number) {
359 uint32x4_t x = vld1q_u32(in_ptr);
361 vreinterpretq_u32_u8(vqtbl1q_u8(vrbitq_u8(vreinterpretq_u8_u32(x)), idx));
362 vst1q_u32(out_ptr, z);
366 number = quarterPoints * 4;
367 for (; number < num_points; ++number) {
379 #include <arm_neon.h> 382 __VOLK_ASM("rbit %[result], %[value]" \ 383 : [result] "=r"(*out_ptr) \ 384 : [value] "r"(*in_ptr) \ 393 const uint32_t* in_ptr = in;
394 uint32_t* out_ptr = out;
395 const unsigned int eighthPoints = num_points / 8;
396 unsigned int number = 0;
397 for (; number < eighthPoints; ++number) {
408 number = eighthPoints * 8;
409 for (; number < num_points; ++number) {
int b18
Definition: volk_32u_reverse_32u.h:61
int b12
Definition: volk_32u_reverse_32u.h:55
int b31
Definition: volk_32u_reverse_32u.h:74
int b17
Definition: volk_32u_reverse_32u.h:60
int b01
Definition: volk_32u_reverse_32u.h:44
int b06
Definition: volk_32u_reverse_32u.h:49
int b15
Definition: volk_32u_reverse_32u.h:58
int b05
Definition: volk_32u_reverse_32u.h:48
uint8_t b02
Definition: volk_32u_reverse_32u.h:79
int b08
Definition: volk_32u_reverse_32u.h:51
int b16
Definition: volk_32u_reverse_32u.h:59
uint8_t b03
Definition: volk_32u_reverse_32u.h:80
Definition: volk_32u_reverse_32u.h:76
int b25
Definition: volk_32u_reverse_32u.h:68
static void volk_32u_reverse_32u_byte_shuffle(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:158
static void volk_32u_reverse_32u_arm(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:390
int b07
Definition: volk_32u_reverse_32u.h:50
static void volk_32u_reverse_32u_dword_shuffle(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:111
static const unsigned char BitReverseTable256[]
Definition: volk_32u_reverse_32u.h:89
Definition: volk_32u_reverse_32u.h:42
uint8_t b07
Definition: volk_32u_reverse_32u.h:84
int b19
Definition: volk_32u_reverse_32u.h:62
#define DO_RBIT
Definition: volk_32u_reverse_32u.h:381
int b24
Definition: volk_32u_reverse_32u.h:67
int b10
Definition: volk_32u_reverse_32u.h:53
int b14
Definition: volk_32u_reverse_32u.h:57
int b29
Definition: volk_32u_reverse_32u.h:72
uint8_t b04
Definition: volk_32u_reverse_32u.h:81
int b27
Definition: volk_32u_reverse_32u.h:70
uint8_t b06
Definition: volk_32u_reverse_32u.h:83
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:62
int b23
Definition: volk_32u_reverse_32u.h:66
int b00
Definition: volk_32u_reverse_32u.h:43
int b22
Definition: volk_32u_reverse_32u.h:65
uint8_t b01
Definition: volk_32u_reverse_32u.h:78
int b04
Definition: volk_32u_reverse_32u.h:47
static void volk_32u_reverse_32u_bintree_permute_bottom_up(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:320
int b28
Definition: volk_32u_reverse_32u.h:71
int b30
Definition: volk_32u_reverse_32u.h:73
int b26
Definition: volk_32u_reverse_32u.h:69
int b20
Definition: volk_32u_reverse_32u.h:63
int b11
Definition: volk_32u_reverse_32u.h:54
int b21
Definition: volk_32u_reverse_32u.h:64
int b09
Definition: volk_32u_reverse_32u.h:52
int b02
Definition: volk_32u_reverse_32u.h:45
uint8_t b00
Definition: volk_32u_reverse_32u.h:77
static void volk_32u_reverse_32u_bintree_permute_top_down(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:279
static void volk_32u_reverse_32u_lut(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:214
uint8_t b05
Definition: volk_32u_reverse_32u.h:82
static void volk_32u_reverse_32u_1972magic(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:257
int b13
Definition: volk_32u_reverse_32u.h:56
static void volk_32u_reverse_32u_2001magic(uint32_t *out, const uint32_t *in, unsigned int num_points)
Definition: volk_32u_reverse_32u.h:234
int b03
Definition: volk_32u_reverse_32u.h:46