65 #ifndef INCLUDED_volk_32f_invsqrt_32f_a_H 66 #define INCLUDED_volk_32f_invsqrt_32f_a_H 76 const float threehalfs = 1.5F;
84 u.i = 0x5f3759df - (u.i >> 1);
85 u.f = u.f * (threehalfs - (x2 * u.f * u.f));
93 #include <immintrin.h> 98 unsigned int number = 0;
99 const unsigned int eighthPoints = num_points / 8;
101 float* cPtr = cVector;
102 const float* aPtr = aVector;
104 for (; number < eighthPoints; number++) {
105 aVal = _mm256_load_ps(aPtr);
106 cVal = _mm256_rsqrt_ps(aVal);
107 _mm256_store_ps(cPtr, cVal);
112 number = eighthPoints * 8;
113 for (; number < num_points; number++)
120 #include <xmmintrin.h> 125 unsigned int number = 0;
126 const unsigned int quarterPoints = num_points / 4;
128 float* cPtr = cVector;
129 const float* aPtr = aVector;
132 for (; number < quarterPoints; number++) {
134 aVal = _mm_load_ps(aPtr);
136 cVal = _mm_rsqrt_ps(aVal);
138 _mm_store_ps(cPtr, cVal);
144 number = quarterPoints * 4;
145 for (; number < num_points; number++) {
153 #include <arm_neon.h> 159 const unsigned int quarter_points = num_points / 4;
161 float* cPtr = cVector;
162 const float* aPtr = aVector;
163 float32x4_t a_val, c_val;
164 for (number = 0; number < quarter_points; ++number) {
165 a_val = vld1q_f32(aPtr);
166 c_val = vrsqrteq_f32(a_val);
167 vst1q_f32(cPtr, c_val);
172 for (number = quarter_points * 4; number < num_points; number++)
178 #ifdef LV_HAVE_GENERIC 181 const float* aVector,
182 unsigned int num_points)
184 float* cPtr = cVector;
185 const float* aPtr = aVector;
186 unsigned int number = 0;
187 for (number = 0; number < num_points; number++) {
194 #include <immintrin.h> 199 unsigned int number = 0;
200 const unsigned int eighthPoints = num_points / 8;
202 float* cPtr = cVector;
203 const float* aPtr = aVector;
205 for (; number < eighthPoints; number++) {
206 aVal = _mm256_loadu_ps(aPtr);
207 cVal = _mm256_rsqrt_ps(aVal);
208 _mm256_storeu_ps(cPtr, cVal);
213 number = eighthPoints * 8;
214 for (; number < num_points; number++)
static void volk_32f_invsqrt_32f_a_avx(float *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_invsqrt_32f.h:96
static void volk_32f_invsqrt_32f_neon(float *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_invsqrt_32f.h:156
static void volk_32f_invsqrt_32f_a_sse(float *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_invsqrt_32f.h:123
for i
Definition: volk_config_fixed.tmpl.h:25
static float Q_rsqrt(float number)
Definition: volk_32f_invsqrt_32f.h:73
static void volk_32f_invsqrt_32f_u_avx(float *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_invsqrt_32f.h:197
static void volk_32f_invsqrt_32f_generic(float *cVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_invsqrt_32f.h:180