71 #ifndef INCLUDED_volk_32f_x2_multiply_32f_u_H 72 #define INCLUDED_volk_32f_x2_multiply_32f_u_H 78 #include <xmmintrin.h> 83 unsigned int num_points)
85 unsigned int number = 0;
86 const unsigned int quarterPoints = num_points / 4;
88 float* cPtr = cVector;
89 const float* aPtr = aVector;
90 const float* bPtr = bVector;
92 __m128 aVal, bVal, cVal;
93 for (; number < quarterPoints; number++) {
95 aVal = _mm_loadu_ps(aPtr);
96 bVal = _mm_loadu_ps(bPtr);
98 cVal = _mm_mul_ps(aVal, bVal);
100 _mm_storeu_ps(cPtr, cVal);
107 number = quarterPoints * 4;
108 for (; number < num_points; number++) {
109 *cPtr++ = (*aPtr++) * (*bPtr++);
114 #ifdef LV_HAVE_AVX512F 115 #include <immintrin.h> 117 static inline void volk_32f_x2_multiply_32f_u_avx512f(
float* cVector,
118 const float* aVector,
119 const float* bVector,
120 unsigned int num_points)
122 unsigned int number = 0;
123 const unsigned int sixteenthPoints = num_points / 16;
125 float* cPtr = cVector;
126 const float* aPtr = aVector;
127 const float* bPtr = bVector;
129 __m512 aVal, bVal, cVal;
130 for (; number < sixteenthPoints; number++) {
132 aVal = _mm512_loadu_ps(aPtr);
133 bVal = _mm512_loadu_ps(bPtr);
135 cVal = _mm512_mul_ps(aVal, bVal);
137 _mm512_storeu_ps(cPtr, cVal);
144 number = sixteenthPoints * 16;
145 for (; number < num_points; number++) {
146 *cPtr++ = (*aPtr++) * (*bPtr++);
152 #include <immintrin.h> 155 const float* aVector,
156 const float* bVector,
157 unsigned int num_points)
159 unsigned int number = 0;
160 const unsigned int eighthPoints = num_points / 8;
162 float* cPtr = cVector;
163 const float* aPtr = aVector;
164 const float* bPtr = bVector;
166 __m256 aVal, bVal, cVal;
167 for (; number < eighthPoints; number++) {
169 aVal = _mm256_loadu_ps(aPtr);
170 bVal = _mm256_loadu_ps(bPtr);
172 cVal = _mm256_mul_ps(aVal, bVal);
174 _mm256_storeu_ps(cPtr, cVal);
181 number = eighthPoints * 8;
182 for (; number < num_points; number++) {
183 *cPtr++ = (*aPtr++) * (*bPtr++);
189 #ifdef LV_HAVE_GENERIC 192 const float* aVector,
193 const float* bVector,
194 unsigned int num_points)
196 float* cPtr = cVector;
197 const float* aPtr = aVector;
198 const float* bPtr = bVector;
199 unsigned int number = 0;
201 for (number = 0; number < num_points; number++) {
202 *cPtr++ = (*aPtr++) * (*bPtr++);
211 #ifndef INCLUDED_volk_32f_x2_multiply_32f_a_H 212 #define INCLUDED_volk_32f_x2_multiply_32f_a_H 214 #include <inttypes.h> 218 #include <xmmintrin.h> 221 const float* aVector,
222 const float* bVector,
223 unsigned int num_points)
225 unsigned int number = 0;
226 const unsigned int quarterPoints = num_points / 4;
228 float* cPtr = cVector;
229 const float* aPtr = aVector;
230 const float* bPtr = bVector;
232 __m128 aVal, bVal, cVal;
233 for (; number < quarterPoints; number++) {
235 aVal = _mm_load_ps(aPtr);
236 bVal = _mm_load_ps(bPtr);
238 cVal = _mm_mul_ps(aVal, bVal);
240 _mm_store_ps(cPtr, cVal);
247 number = quarterPoints * 4;
248 for (; number < num_points; number++) {
249 *cPtr++ = (*aPtr++) * (*bPtr++);
254 #ifdef LV_HAVE_AVX512F 255 #include <immintrin.h> 257 static inline void volk_32f_x2_multiply_32f_a_avx512f(
float* cVector,
258 const float* aVector,
259 const float* bVector,
260 unsigned int num_points)
262 unsigned int number = 0;
263 const unsigned int sixteenthPoints = num_points / 16;
265 float* cPtr = cVector;
266 const float* aPtr = aVector;
267 const float* bPtr = bVector;
269 __m512 aVal, bVal, cVal;
270 for (; number < sixteenthPoints; number++) {
272 aVal = _mm512_load_ps(aPtr);
273 bVal = _mm512_load_ps(bPtr);
275 cVal = _mm512_mul_ps(aVal, bVal);
277 _mm512_store_ps(cPtr, cVal);
284 number = sixteenthPoints * 16;
285 for (; number < num_points; number++) {
286 *cPtr++ = (*aPtr++) * (*bPtr++);
293 #include <immintrin.h> 296 const float* aVector,
297 const float* bVector,
298 unsigned int num_points)
300 unsigned int number = 0;
301 const unsigned int eighthPoints = num_points / 8;
303 float* cPtr = cVector;
304 const float* aPtr = aVector;
305 const float* bPtr = bVector;
307 __m256 aVal, bVal, cVal;
308 for (; number < eighthPoints; number++) {
310 aVal = _mm256_load_ps(aPtr);
311 bVal = _mm256_load_ps(bPtr);
313 cVal = _mm256_mul_ps(aVal, bVal);
315 _mm256_store_ps(cPtr, cVal);
322 number = eighthPoints * 8;
323 for (; number < num_points; number++) {
324 *cPtr++ = (*aPtr++) * (*bPtr++);
331 #include <arm_neon.h> 334 const float* aVector,
335 const float* bVector,
336 unsigned int num_points)
338 const unsigned int quarter_points = num_points / 4;
340 float32x4_t avec, bvec, cvec;
341 for (number = 0; number < quarter_points; ++number) {
342 avec = vld1q_f32(aVector);
343 bvec = vld1q_f32(bVector);
344 cvec = vmulq_f32(avec, bvec);
345 vst1q_f32(cVector, cvec);
350 for (number = quarter_points * 4; number < num_points; ++number) {
351 *cVector++ = *aVector++ * *bVector++;
357 #ifdef LV_HAVE_GENERIC 360 const float* aVector,
361 const float* bVector,
362 unsigned int num_points)
364 float* cPtr = cVector;
365 const float* aPtr = aVector;
366 const float* bPtr = bVector;
367 unsigned int number = 0;
369 for (number = 0; number < num_points; number++) {
370 *cPtr++ = (*aPtr++) * (*bPtr++);
377 extern void volk_32f_x2_multiply_32f_a_orc_impl(
float* cVector,
378 const float* aVector,
379 const float* bVector,
380 unsigned int num_points);
382 static inline void volk_32f_x2_multiply_32f_u_orc(
float* cVector,
383 const float* aVector,
384 const float* bVector,
385 unsigned int num_points)
387 volk_32f_x2_multiply_32f_a_orc_impl(cVector, aVector, bVector, num_points);
static void volk_32f_x2_multiply_32f_u_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_multiply_32f.h:154
static void volk_32f_x2_multiply_32f_a_sse(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_multiply_32f.h:220
static void volk_32f_x2_multiply_32f_neon(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_multiply_32f.h:333
static void volk_32f_x2_multiply_32f_generic(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_multiply_32f.h:191
static void volk_32f_x2_multiply_32f_u_sse(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_multiply_32f.h:80
static void volk_32f_x2_multiply_32f_a_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_multiply_32f.h:295
static void volk_32f_x2_multiply_32f_a_generic(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_multiply_32f.h:359