73 #ifdef LV_HAVE_GENERIC 79 unsigned int num_points)
81 const float* complexVectorPtr = (
float*)complexVector;
82 int16_t* magnitudeVectorPtr = magnitudeVector;
83 unsigned int number = 0;
84 for (number = 0; number < num_points; number++) {
89 *magnitudeVectorPtr++ = (int16_t)
rintf(scalar * sqrtf(real + imag));
94 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H 95 #define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H 103 #include <immintrin.h> 105 static inline void volk_32fc_s32f_magnitude_16i_a_avx2(int16_t* magnitudeVector,
108 unsigned int num_points)
110 unsigned int number = 0;
111 const unsigned int eighthPoints = num_points / 8;
113 const float* complexVectorPtr = (
const float*)complexVector;
114 int16_t* magnitudeVectorPtr = magnitudeVector;
116 __m256 vScalar = _mm256_set1_ps(scalar);
117 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
118 __m256 cplxValue1, cplxValue2, result;
122 for (; number < eighthPoints; number++) {
123 cplxValue1 = _mm256_load_ps(complexVectorPtr);
124 complexVectorPtr += 8;
126 cplxValue2 = _mm256_load_ps(complexVectorPtr);
127 complexVectorPtr += 8;
129 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
130 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
132 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
134 result = _mm256_sqrt_ps(result);
136 result = _mm256_mul_ps(result, vScalar);
138 resultInt = _mm256_cvtps_epi32(result);
139 resultInt = _mm256_packs_epi32(resultInt, resultInt);
140 resultInt = _mm256_permutevar8x32_epi32(
142 resultShort = _mm256_extracti128_si256(resultInt, 0);
143 _mm_store_si128((__m128i*)magnitudeVectorPtr, resultShort);
144 magnitudeVectorPtr += 8;
147 number = eighthPoints * 8;
149 magnitudeVector + number, complexVector + number, scalar, num_points - number);
154 #include <pmmintrin.h> 159 unsigned int num_points)
161 unsigned int number = 0;
162 const unsigned int quarterPoints = num_points / 4;
164 const float* complexVectorPtr = (
const float*)complexVector;
165 int16_t* magnitudeVectorPtr = magnitudeVector;
167 __m128 vScalar = _mm_set_ps1(scalar);
169 __m128 cplxValue1, cplxValue2, result;
173 for (; number < quarterPoints; number++) {
174 cplxValue1 = _mm_load_ps(complexVectorPtr);
175 complexVectorPtr += 4;
177 cplxValue2 = _mm_load_ps(complexVectorPtr);
178 complexVectorPtr += 4;
180 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
181 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
183 result = _mm_hadd_ps(cplxValue1, cplxValue2);
185 result = _mm_sqrt_ps(result);
187 result = _mm_mul_ps(result, vScalar);
189 _mm_store_ps(floatBuffer, result);
190 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
191 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
192 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
193 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
196 number = quarterPoints * 4;
198 magnitudeVector + number, complexVector + number, scalar, num_points - number);
204 #include <xmmintrin.h> 209 unsigned int num_points)
211 unsigned int number = 0;
212 const unsigned int quarterPoints = num_points / 4;
214 const float* complexVectorPtr = (
const float*)complexVector;
215 int16_t* magnitudeVectorPtr = magnitudeVector;
217 __m128 vScalar = _mm_set_ps1(scalar);
219 __m128 cplxValue1, cplxValue2, result;
220 __m128 iValue, qValue;
224 for (; number < quarterPoints; number++) {
225 cplxValue1 = _mm_load_ps(complexVectorPtr);
226 complexVectorPtr += 4;
228 cplxValue2 = _mm_load_ps(complexVectorPtr);
229 complexVectorPtr += 4;
232 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
234 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
237 _mm_mul_ps(iValue, iValue);
239 _mm_mul_ps(qValue, qValue);
241 result = _mm_add_ps(iValue2, qValue2);
243 result = _mm_sqrt_ps(result);
245 result = _mm_mul_ps(result, vScalar);
247 _mm_store_ps(floatBuffer, result);
248 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
249 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
250 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
251 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
254 number = quarterPoints * 4;
256 magnitudeVector + number, complexVector + number, scalar, num_points - number);
263 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_u_H 264 #define INCLUDED_volk_32fc_s32f_magnitude_16i_u_H 266 #include <inttypes.h> 272 #include <immintrin.h> 274 static inline void volk_32fc_s32f_magnitude_16i_u_avx2(int16_t* magnitudeVector,
277 unsigned int num_points)
279 unsigned int number = 0;
280 const unsigned int eighthPoints = num_points / 8;
282 const float* complexVectorPtr = (
const float*)complexVector;
283 int16_t* magnitudeVectorPtr = magnitudeVector;
285 __m256 vScalar = _mm256_set1_ps(scalar);
286 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
287 __m256 cplxValue1, cplxValue2, result;
291 for (; number < eighthPoints; number++) {
292 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
293 complexVectorPtr += 8;
295 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
296 complexVectorPtr += 8;
298 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
299 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
301 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
303 result = _mm256_sqrt_ps(result);
305 result = _mm256_mul_ps(result, vScalar);
307 resultInt = _mm256_cvtps_epi32(result);
308 resultInt = _mm256_packs_epi32(resultInt, resultInt);
309 resultInt = _mm256_permutevar8x32_epi32(
311 resultShort = _mm256_extracti128_si256(resultInt, 0);
312 _mm_storeu_si128((__m128i*)magnitudeVectorPtr, resultShort);
313 magnitudeVectorPtr += 8;
316 number = eighthPoints * 8;
318 magnitudeVector + number, complexVector + number, scalar, num_points - number);
#define __VOLK_VOLATILE
Definition: volk_common.h:64
static float rintf(float x)
Definition: config.h:37
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
static void volk_32fc_s32f_magnitude_16i_a_sse3(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:156
float complex lv_32fc_t
Definition: volk_complex.h:70
static void volk_32fc_s32f_magnitude_16i_generic(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:76
static void volk_32fc_s32f_magnitude_16i_a_sse(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:206