74 #ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H 75 #define INCLUDED_volk_32fc_s32f_atan2_32f_a_H 82 #include <smmintrin.h> 84 #ifdef LV_HAVE_LIB_SIMDMATH 88 static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(
float* outputVector,
90 const float normalizeFactor,
91 unsigned int num_points)
93 const float* complexVectorPtr = (
float*)complexVector;
94 float* outPtr = outputVector;
96 unsigned int number = 0;
97 const float invNormalizeFactor = 1.0 / normalizeFactor;
99 #ifdef LV_HAVE_LIB_SIMDMATH 100 const unsigned int quarterPoints = num_points / 4;
101 __m128 testVector = _mm_set_ps1(2 * M_PI);
102 __m128 correctVector = _mm_set_ps1(M_PI);
103 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
105 __m128 complex1, complex2, iValue, qValue;
108 for (; number < quarterPoints; number++) {
110 complex1 = _mm_load_ps(complexVectorPtr);
111 complexVectorPtr += 4;
112 complex2 = _mm_load_ps(complexVectorPtr);
113 complexVectorPtr += 4;
115 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2, 0, 2, 0));
116 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3, 1, 3, 1));
118 phase = atan2f4(qValue, iValue);
121 keepMask = _mm_cmpneq_ps(phase, testVector);
122 phase = _mm_blendv_ps(correctVector, phase, keepMask);
124 phase = _mm_mul_ps(phase, vNormalizeFactor);
125 _mm_store_ps((
float*)outPtr, phase);
128 number = quarterPoints * 4;
131 for (; number < num_points; number++) {
132 const float real = *complexVectorPtr++;
133 const float imag = *complexVectorPtr++;
134 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
141 #include <xmmintrin.h> 143 #ifdef LV_HAVE_LIB_SIMDMATH 144 #include <simdmath.h> 149 const float normalizeFactor,
150 unsigned int num_points)
152 const float* complexVectorPtr = (
float*)complexVector;
153 float* outPtr = outputVector;
155 unsigned int number = 0;
156 const float invNormalizeFactor = 1.0 / normalizeFactor;
158 #ifdef LV_HAVE_LIB_SIMDMATH 159 const unsigned int quarterPoints = num_points / 4;
160 __m128 testVector = _mm_set_ps1(2 * M_PI);
161 __m128 correctVector = _mm_set_ps1(M_PI);
162 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
164 __m128 complex1, complex2, iValue, qValue;
168 for (; number < quarterPoints; number++) {
170 complex1 = _mm_load_ps(complexVectorPtr);
171 complexVectorPtr += 4;
172 complex2 = _mm_load_ps(complexVectorPtr);
173 complexVectorPtr += 4;
175 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2, 0, 2, 0));
176 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3, 1, 3, 1));
178 phase = atan2f4(qValue, iValue);
181 keepMask = _mm_cmpneq_ps(phase, testVector);
182 phase = _mm_and_ps(phase, keepMask);
183 mask = _mm_andnot_ps(keepMask, correctVector);
184 phase = _mm_or_ps(phase, mask);
186 phase = _mm_mul_ps(phase, vNormalizeFactor);
187 _mm_store_ps((
float*)outPtr, phase);
190 number = quarterPoints * 4;
193 for (; number < num_points; number++) {
194 const float real = *complexVectorPtr++;
195 const float imag = *complexVectorPtr++;
196 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
201 #ifdef LV_HAVE_GENERIC 205 const float normalizeFactor,
206 unsigned int num_points)
208 float* outPtr = outputVector;
209 const float* inPtr = (
float*)inputVector;
210 const float invNormalizeFactor = 1.0 / normalizeFactor;
212 for (number = 0; number < num_points; number++) {
213 const float real = *inPtr++;
214 const float imag = *inPtr++;
215 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
static void volk_32fc_s32f_atan2_32f_a_sse(float *outputVector, const lv_32fc_t *complexVector, const float normalizeFactor, unsigned int num_points)
Definition: volk_32fc_s32f_atan2_32f.h:147
static void volk_32fc_s32f_atan2_32f_generic(float *outputVector, const lv_32fc_t *inputVector, const float normalizeFactor, unsigned int num_points)
Definition: volk_32fc_s32f_atan2_32f.h:203
float complex lv_32fc_t
Definition: volk_complex.h:70