55 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H 56 #define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H 64 #include <immintrin.h> 66 static inline void volk_16ic_s32f_magnitude_32f_a_avx2(
float* magnitudeVector,
69 unsigned int num_points)
71 unsigned int number = 0;
72 const unsigned int eighthPoints = num_points / 8;
74 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
75 float* magnitudeVectorPtr = magnitudeVector;
77 __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
79 __m256 cplxValue1, cplxValue2, result;
81 __m128i short1, short2;
82 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
84 for (; number < eighthPoints; number++) {
86 int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
87 complexVectorPtr += 16;
88 short1 = _mm256_extracti128_si256(int1, 0);
89 short2 = _mm256_extracti128_si256(int1, 1);
91 int1 = _mm256_cvtepi16_epi32(short1);
92 int2 = _mm256_cvtepi16_epi32(short2);
93 cplxValue1 = _mm256_cvtepi32_ps(int1);
94 cplxValue2 = _mm256_cvtepi32_ps(int2);
96 cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
97 cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
99 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
100 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
102 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
103 result = _mm256_permutevar8x32_ps(result, idx);
105 result = _mm256_sqrt_ps(result);
107 _mm256_store_ps(magnitudeVectorPtr, result);
109 magnitudeVectorPtr += 8;
112 number = eighthPoints * 8;
113 magnitudeVectorPtr = &magnitudeVector[number];
114 complexVectorPtr = (
const int16_t*)&complexVector[number];
115 for (; number < num_points; number++) {
116 float val1Real = (float)(*complexVectorPtr++) / scalar;
117 float val1Imag = (float)(*complexVectorPtr++) / scalar;
118 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
125 #include <pmmintrin.h> 130 unsigned int num_points)
132 unsigned int number = 0;
133 const unsigned int quarterPoints = num_points / 4;
135 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
136 float* magnitudeVectorPtr = magnitudeVector;
138 __m128 invScalar = _mm_set_ps1(1.0 / scalar);
140 __m128 cplxValue1, cplxValue2, result;
144 for (; number < quarterPoints; number++) {
146 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
147 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
148 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
149 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
151 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
152 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
153 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
154 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
156 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
157 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
159 complexVectorPtr += 8;
161 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
162 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
164 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
165 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
167 result = _mm_hadd_ps(cplxValue1, cplxValue2);
169 result = _mm_sqrt_ps(result);
171 _mm_store_ps(magnitudeVectorPtr, result);
173 magnitudeVectorPtr += 4;
176 number = quarterPoints * 4;
177 magnitudeVectorPtr = &magnitudeVector[number];
178 complexVectorPtr = (
const int16_t*)&complexVector[number];
179 for (; number < num_points; number++) {
180 float val1Real = (float)(*complexVectorPtr++) / scalar;
181 float val1Imag = (float)(*complexVectorPtr++) / scalar;
182 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
188 #include <xmmintrin.h> 193 unsigned int num_points)
195 unsigned int number = 0;
196 const unsigned int quarterPoints = num_points / 4;
198 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
199 float* magnitudeVectorPtr = magnitudeVector;
201 const float iScalar = 1.0 / scalar;
202 __m128 invScalar = _mm_set_ps1(iScalar);
204 __m128 cplxValue1, cplxValue2, result, re, im;
208 for (; number < quarterPoints; number++) {
209 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
210 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
211 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
212 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
214 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
215 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
216 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
217 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
219 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
220 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
222 re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88);
223 im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd);
225 complexVectorPtr += 8;
227 cplxValue1 = _mm_mul_ps(re, invScalar);
228 cplxValue2 = _mm_mul_ps(im, invScalar);
230 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
231 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
233 result = _mm_add_ps(cplxValue1, cplxValue2);
235 result = _mm_sqrt_ps(result);
237 _mm_store_ps(magnitudeVectorPtr, result);
239 magnitudeVectorPtr += 4;
242 number = quarterPoints * 4;
243 magnitudeVectorPtr = &magnitudeVector[number];
244 complexVectorPtr = (
const int16_t*)&complexVector[number];
245 for (; number < num_points; number++) {
246 float val1Real = (float)(*complexVectorPtr++) * iScalar;
247 float val1Imag = (float)(*complexVectorPtr++) * iScalar;
248 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
255 #ifdef LV_HAVE_GENERIC 260 unsigned int num_points)
262 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
263 float* magnitudeVectorPtr = magnitudeVector;
264 unsigned int number = 0;
265 const float invScalar = 1.0 / scalar;
266 for (number = 0; number < num_points; number++) {
267 float real = ((float)(*complexVectorPtr++)) * invScalar;
268 float imag = ((float)(*complexVectorPtr++)) * invScalar;
269 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
274 #ifdef LV_HAVE_ORC_DISABLED 276 extern void volk_16ic_s32f_magnitude_32f_a_orc_impl(
float* magnitudeVector,
279 unsigned int num_points);
281 static inline void volk_16ic_s32f_magnitude_32f_u_orc(
float* magnitudeVector,
284 unsigned int num_points)
286 volk_16ic_s32f_magnitude_32f_a_orc_impl(
287 magnitudeVector, complexVector, scalar, num_points);
294 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_u_H 295 #define INCLUDED_volk_16ic_s32f_magnitude_32f_u_H 297 #include <inttypes.h> 303 #include <immintrin.h> 305 static inline void volk_16ic_s32f_magnitude_32f_u_avx2(
float* magnitudeVector,
308 unsigned int num_points)
310 unsigned int number = 0;
311 const unsigned int eighthPoints = num_points / 8;
313 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
314 float* magnitudeVectorPtr = magnitudeVector;
316 __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
318 __m256 cplxValue1, cplxValue2, result;
320 __m128i short1, short2;
321 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
323 for (; number < eighthPoints; number++) {
325 int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
326 complexVectorPtr += 16;
327 short1 = _mm256_extracti128_si256(int1, 0);
328 short2 = _mm256_extracti128_si256(int1, 1);
330 int1 = _mm256_cvtepi16_epi32(short1);
331 int2 = _mm256_cvtepi16_epi32(short2);
332 cplxValue1 = _mm256_cvtepi32_ps(int1);
333 cplxValue2 = _mm256_cvtepi32_ps(int2);
335 cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
336 cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
338 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
339 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
341 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
342 result = _mm256_permutevar8x32_ps(result, idx);
344 result = _mm256_sqrt_ps(result);
346 _mm256_storeu_ps(magnitudeVectorPtr, result);
348 magnitudeVectorPtr += 8;
351 number = eighthPoints * 8;
352 magnitudeVectorPtr = &magnitudeVector[number];
353 complexVectorPtr = (
const int16_t*)&complexVector[number];
354 for (; number < num_points; number++) {
355 float val1Real = (float)(*complexVectorPtr++) / scalar;
356 float val1Imag = (float)(*complexVectorPtr++) / scalar;
357 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
short complex lv_16sc_t
Definition: volk_complex.h:67
static void volk_16ic_s32f_magnitude_32f_a_sse(float *magnitudeVector, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_magnitude_32f.h:190
static void volk_16ic_s32f_magnitude_32f_a_sse3(float *magnitudeVector, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_magnitude_32f.h:127
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
static void volk_16ic_s32f_magnitude_32f_generic(float *magnitudeVector, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_magnitude_32f.h:257