74 #ifndef VOLK_KERNELS_VOLK_VOLK_32F_8U_POLARBUTTERFLY_32F_H_ 75 #define VOLK_KERNELS_VOLK_VOLK_32F_8U_POLARBUTTERFLY_32F_H_ 79 static inline float llr_odd(
const float la,
const float lb)
81 const float ala = fabsf(la);
82 const float alb = fabsf(lb);
83 return copysignf(1.0f, la) * copysignf(1.0f, lb) * (ala > alb ? alb : ala);
87 float* llrs,
int min_stage,
const int depth,
const int frame_size,
const int row)
89 int loop_stage = depth - 1;
92 int stage_size = 0x01 << loop_stage;
95 while (min_stage <= loop_stage) {
96 dst_llr_ptr = llrs + loop_stage * frame_size + row;
97 src_llr_ptr = dst_llr_ptr + frame_size;
98 for (el = 0; el < stage_size; el++) {
99 *dst_llr_ptr++ =
llr_odd(*src_llr_ptr, *(src_llr_ptr + 1));
108 static inline float llr_even(
const float la,
const float lb,
const unsigned char f)
119 even_u_values(
unsigned char* u_even,
const unsigned char* u,
const int u_num)
123 for (
i = 1;
i < u_num;
i += 2) {
133 for (
i = 1;
i < u_num;
i += 2) {
134 *u_xor++ = *u ^ *(u + 1);
141 int max_stage_depth = 0;
142 int half_stage_size = 0x01;
143 int stage_size = half_stage_size << 1;
144 while (max_stage_depth < (frame_exp - 1)) {
145 if (!(row % stage_size < half_stage_size)) {
148 half_stage_size <<= 1;
152 return max_stage_depth;
155 #ifdef LV_HAVE_GENERIC 164 const int frame_size = 0x01 << frame_exp;
165 const int next_stage = stage + 1;
167 const int half_stage_size = 0x01 << stage;
168 const int stage_size = half_stage_size << 1;
170 const bool is_upper_stage_half = row % stage_size < half_stage_size;
173 float* next_llrs = llrs + frame_size;
174 float* call_row_llr = llrs + row;
176 const int section = row - (row % stage_size);
177 const int jump_size = ((row % half_stage_size) << 1) % stage_size;
179 const int next_upper_row = section + jump_size;
180 const int next_lower_row = next_upper_row + 1;
182 const float* upper_right_llr_ptr = next_llrs + next_upper_row;
183 const float* lower_right_llr_ptr = next_llrs + next_lower_row;
185 if (!is_upper_stage_half) {
186 const int u_pos = u_num >> stage;
187 const unsigned char f = u[u_pos - 1];
188 *call_row_llr =
llr_even(*upper_right_llr_ptr, *lower_right_llr_ptr, f);
192 if (frame_exp > next_stage) {
193 unsigned char* u_half = u + frame_size;
196 next_llrs, u_half, frame_exp, next_stage, u_num, next_upper_row);
200 next_llrs, u_half, frame_exp, next_stage, u_num, next_lower_row);
203 *call_row_llr =
llr_odd(*upper_right_llr_ptr, *lower_right_llr_ptr);
210 #include <immintrin.h> 220 const int frame_size = 0x01 << frame_exp;
222 const float* next_llrs = llrs + frame_size + row;
223 *(llrs + row) =
llr_even(*(next_llrs - 1), *next_llrs, u[u_num - 1]);
228 if (max_stage_depth < 3) {
233 int loop_stage = max_stage_depth;
234 int stage_size = 0x01 << loop_stage;
239 __m256 src0, src1, dst;
244 unsigned char* u_target = u + frame_size;
245 unsigned char* u_temp = u + 2 * frame_size;
246 memcpy(u_temp, u + u_num - stage_size,
sizeof(
unsigned char) * stage_size);
248 if (stage_size > 15) {
255 src_llr_ptr = llrs + (max_stage_depth + 1) * frame_size + row - stage_size;
256 dst_llr_ptr = llrs + max_stage_depth * frame_size + row;
261 for (p = 0; p < stage_size; p += 8) {
263 fbits = _mm_loadu_si128((__m128i*)u_target);
266 src0 = _mm256_loadu_ps(src_llr_ptr);
267 src1 = _mm256_loadu_ps(src_llr_ptr + 8);
272 _mm256_storeu_ps(dst_llr_ptr, dst);
280 const int min_stage = stage > 2 ? stage : 2;
285 while (min_stage < loop_stage) {
286 dst_llr_ptr = llrs + loop_stage * frame_size + row;
287 src_llr_ptr = dst_llr_ptr + frame_size;
288 for (el = 0; el < stage_size; el += 8) {
289 src0 = _mm256_loadu_ps(src_llr_ptr);
291 src1 = _mm256_loadu_ps(src_llr_ptr);
296 _mm256_storeu_ps(dst_llr_ptr, dst);
311 #include <immintrin.h> 314 static inline void volk_32f_8u_polarbutterfly_32f_u_avx2(
float* llrs,
321 const int frame_size = 0x01 << frame_exp;
323 const float* next_llrs = llrs + frame_size + row;
324 *(llrs + row) =
llr_even(*(next_llrs - 1), *next_llrs, u[u_num - 1]);
329 if (max_stage_depth < 3) {
334 int loop_stage = max_stage_depth;
335 int stage_size = 0x01 << loop_stage;
340 __m256 src0, src1, dst;
345 unsigned char* u_target = u + frame_size;
346 unsigned char* u_temp = u + 2 * frame_size;
347 memcpy(u_temp, u + u_num - stage_size,
sizeof(
unsigned char) * stage_size);
349 if (stage_size > 15) {
356 src_llr_ptr = llrs + (max_stage_depth + 1) * frame_size + row - stage_size;
357 dst_llr_ptr = llrs + max_stage_depth * frame_size + row;
362 for (p = 0; p < stage_size; p += 8) {
364 fbits = _mm_loadu_si128((__m128i*)u_target);
367 src0 = _mm256_loadu_ps(src_llr_ptr);
368 src1 = _mm256_loadu_ps(src_llr_ptr + 8);
373 _mm256_storeu_ps(dst_llr_ptr, dst);
381 const int min_stage = stage > 2 ? stage : 2;
386 while (min_stage < loop_stage) {
387 dst_llr_ptr = llrs + loop_stage * frame_size + row;
388 src_llr_ptr = dst_llr_ptr + frame_size;
389 for (el = 0; el < stage_size; el += 8) {
390 src0 = _mm256_loadu_ps(src_llr_ptr);
392 src1 = _mm256_loadu_ps(src_llr_ptr);
397 _mm256_storeu_ps(dst_llr_ptr, dst);
static void volk_8u_x2_encodeframepolar_8u_generic(unsigned char *frame, unsigned char *temp, unsigned int frame_size)
Definition: volk_8u_x2_encodeframepolar_8u.h:65
static void volk_32f_8u_polarbutterfly_32f_generic(float *llrs, unsigned char *u, const int frame_exp, const int stage, const int u_num, const int row)
Definition: volk_32f_8u_polarbutterfly_32f.h:157
static void llr_odd_stages(float *llrs, int min_stage, const int depth, const int frame_size, const int row)
Definition: volk_32f_8u_polarbutterfly_32f.h:86
static void volk_32f_8u_polarbutterfly_32f_u_avx(float *llrs, unsigned char *u, const int frame_exp, const int stage, const int u_num, const int row)
Definition: volk_32f_8u_polarbutterfly_32f.h:213
static void odd_xor_even_values(unsigned char *u_xor, const unsigned char *u, const int u_num)
Definition: volk_32f_8u_polarbutterfly_32f.h:130
static float llr_even(const float la, const float lb, const unsigned char f)
Definition: volk_32f_8u_polarbutterfly_32f.h:108
static __m256 _mm256_polar_minsum_llrs(__m256 src0, __m256 src1)
Definition: volk_avx_intrinsics.h:167
static void even_u_values(unsigned char *u_even, const unsigned char *u, const int u_num)
Definition: volk_32f_8u_polarbutterfly_32f.h:119
for i
Definition: volk_config_fixed.tmpl.h:25
static int calculate_max_stage_depth_for_row(const int frame_exp, const int row)
Definition: volk_32f_8u_polarbutterfly_32f.h:139
static float llr_odd(const float la, const float lb)
Definition: volk_32f_8u_polarbutterfly_32f.h:79
static __m256 _mm256_polar_fsign_add_llrs_avx2(__m256 src0, __m256 src1, __m128i fbits)
Definition: volk_avx2_intrinsics.h:81
static void volk_8u_x2_encodeframepolar_8u_u_ssse3(unsigned char *frame, unsigned char *temp, unsigned int frame_size)
Definition: volk_8u_x2_encodeframepolar_8u.h:89
static __m256 _mm256_polar_fsign_add_llrs(__m256 src0, __m256 src1, __m128i fbits)
Definition: volk_avx_intrinsics.h:184