Vector Optimized Library of Kernels  2.4
Architecture-tuned implementations of math kernels
volk_32fc_magnitude_squared_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
71 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
72 #define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
73 
74 #include <inttypes.h>
75 #include <math.h>
76 #include <stdio.h>
77 
78 #ifdef LV_HAVE_AVX
79 #include <immintrin.h>
81 
82 static inline void volk_32fc_magnitude_squared_32f_u_avx(float* magnitudeVector,
83  const lv_32fc_t* complexVector,
84  unsigned int num_points)
85 {
86  unsigned int number = 0;
87  const unsigned int eighthPoints = num_points / 8;
88 
89  const float* complexVectorPtr = (float*)complexVector;
90  float* magnitudeVectorPtr = magnitudeVector;
91 
92  __m256 cplxValue1, cplxValue2, result;
93 
94  for (; number < eighthPoints; number++) {
95  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
96  cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
97  result = _mm256_magnitudesquared_ps(cplxValue1, cplxValue2);
98  _mm256_storeu_ps(magnitudeVectorPtr, result);
99 
100  complexVectorPtr += 16;
101  magnitudeVectorPtr += 8;
102  }
103 
104  number = eighthPoints * 8;
105  for (; number < num_points; number++) {
106  float val1Real = *complexVectorPtr++;
107  float val1Imag = *complexVectorPtr++;
108  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
109  }
110 }
111 #endif /* LV_HAVE_AVX */
112 
113 
114 #ifdef LV_HAVE_SSE3
115 #include <pmmintrin.h>
117 
118 static inline void volk_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector,
119  const lv_32fc_t* complexVector,
120  unsigned int num_points)
121 {
122  unsigned int number = 0;
123  const unsigned int quarterPoints = num_points / 4;
124 
125  const float* complexVectorPtr = (float*)complexVector;
126  float* magnitudeVectorPtr = magnitudeVector;
127 
128  __m128 cplxValue1, cplxValue2, result;
129  for (; number < quarterPoints; number++) {
130  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
131  complexVectorPtr += 4;
132 
133  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
134  complexVectorPtr += 4;
135 
136  result = _mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2);
137  _mm_storeu_ps(magnitudeVectorPtr, result);
138  magnitudeVectorPtr += 4;
139  }
140 
141  number = quarterPoints * 4;
142  for (; number < num_points; number++) {
143  float val1Real = *complexVectorPtr++;
144  float val1Imag = *complexVectorPtr++;
145  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
146  }
147 }
148 #endif /* LV_HAVE_SSE3 */
149 
150 
151 #ifdef LV_HAVE_SSE
153 #include <xmmintrin.h>
154 
155 static inline void volk_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector,
156  const lv_32fc_t* complexVector,
157  unsigned int num_points)
158 {
159  unsigned int number = 0;
160  const unsigned int quarterPoints = num_points / 4;
161 
162  const float* complexVectorPtr = (float*)complexVector;
163  float* magnitudeVectorPtr = magnitudeVector;
164 
165  __m128 cplxValue1, cplxValue2, result;
166 
167  for (; number < quarterPoints; number++) {
168  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
169  complexVectorPtr += 4;
170 
171  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
172  complexVectorPtr += 4;
173 
174  result = _mm_magnitudesquared_ps(cplxValue1, cplxValue2);
175  _mm_storeu_ps(magnitudeVectorPtr, result);
176  magnitudeVectorPtr += 4;
177  }
178 
179  number = quarterPoints * 4;
180  for (; number < num_points; number++) {
181  float val1Real = *complexVectorPtr++;
182  float val1Imag = *complexVectorPtr++;
183  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
184  }
185 }
186 #endif /* LV_HAVE_SSE */
187 
188 
189 #ifdef LV_HAVE_GENERIC
190 
191 static inline void volk_32fc_magnitude_squared_32f_generic(float* magnitudeVector,
192  const lv_32fc_t* complexVector,
193  unsigned int num_points)
194 {
195  const float* complexVectorPtr = (float*)complexVector;
196  float* magnitudeVectorPtr = magnitudeVector;
197  unsigned int number = 0;
198  for (number = 0; number < num_points; number++) {
199  const float real = *complexVectorPtr++;
200  const float imag = *complexVectorPtr++;
201  *magnitudeVectorPtr++ = (real * real) + (imag * imag);
202  }
203 }
204 #endif /* LV_HAVE_GENERIC */
205 
206 
207 #endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
208 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
209 #define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
210 
211 #include <inttypes.h>
212 #include <math.h>
213 #include <stdio.h>
214 
215 #ifdef LV_HAVE_AVX
216 #include <immintrin.h>
218 
219 static inline void volk_32fc_magnitude_squared_32f_a_avx(float* magnitudeVector,
220  const lv_32fc_t* complexVector,
221  unsigned int num_points)
222 {
223  unsigned int number = 0;
224  const unsigned int eighthPoints = num_points / 8;
225 
226  const float* complexVectorPtr = (float*)complexVector;
227  float* magnitudeVectorPtr = magnitudeVector;
228 
229  __m256 cplxValue1, cplxValue2, result;
230  for (; number < eighthPoints; number++) {
231  cplxValue1 = _mm256_load_ps(complexVectorPtr);
232  complexVectorPtr += 8;
233 
234  cplxValue2 = _mm256_load_ps(complexVectorPtr);
235  complexVectorPtr += 8;
236 
237  result = _mm256_magnitudesquared_ps(cplxValue1, cplxValue2);
238  _mm256_store_ps(magnitudeVectorPtr, result);
239  magnitudeVectorPtr += 8;
240  }
241 
242  number = eighthPoints * 8;
243  for (; number < num_points; number++) {
244  float val1Real = *complexVectorPtr++;
245  float val1Imag = *complexVectorPtr++;
246  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
247  }
248 }
249 #endif /* LV_HAVE_AVX */
250 
251 
252 #ifdef LV_HAVE_SSE3
253 #include <pmmintrin.h>
255 
256 static inline void volk_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector,
257  const lv_32fc_t* complexVector,
258  unsigned int num_points)
259 {
260  unsigned int number = 0;
261  const unsigned int quarterPoints = num_points / 4;
262 
263  const float* complexVectorPtr = (float*)complexVector;
264  float* magnitudeVectorPtr = magnitudeVector;
265 
266  __m128 cplxValue1, cplxValue2, result;
267  for (; number < quarterPoints; number++) {
268  cplxValue1 = _mm_load_ps(complexVectorPtr);
269  complexVectorPtr += 4;
270 
271  cplxValue2 = _mm_load_ps(complexVectorPtr);
272  complexVectorPtr += 4;
273 
274  result = _mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2);
275  _mm_store_ps(magnitudeVectorPtr, result);
276  magnitudeVectorPtr += 4;
277  }
278 
279  number = quarterPoints * 4;
280  for (; number < num_points; number++) {
281  float val1Real = *complexVectorPtr++;
282  float val1Imag = *complexVectorPtr++;
283  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
284  }
285 }
286 #endif /* LV_HAVE_SSE3 */
287 
288 
289 #ifdef LV_HAVE_SSE
291 #include <xmmintrin.h>
292 
293 static inline void volk_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector,
294  const lv_32fc_t* complexVector,
295  unsigned int num_points)
296 {
297  unsigned int number = 0;
298  const unsigned int quarterPoints = num_points / 4;
299 
300  const float* complexVectorPtr = (float*)complexVector;
301  float* magnitudeVectorPtr = magnitudeVector;
302 
303  __m128 cplxValue1, cplxValue2, result;
304  for (; number < quarterPoints; number++) {
305  cplxValue1 = _mm_load_ps(complexVectorPtr);
306  complexVectorPtr += 4;
307 
308  cplxValue2 = _mm_load_ps(complexVectorPtr);
309  complexVectorPtr += 4;
310 
311  result = _mm_magnitudesquared_ps(cplxValue1, cplxValue2);
312  _mm_store_ps(magnitudeVectorPtr, result);
313  magnitudeVectorPtr += 4;
314  }
315 
316  number = quarterPoints * 4;
317  for (; number < num_points; number++) {
318  float val1Real = *complexVectorPtr++;
319  float val1Imag = *complexVectorPtr++;
320  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
321  }
322 }
323 #endif /* LV_HAVE_SSE */
324 
325 
326 #ifdef LV_HAVE_NEON
327 #include <arm_neon.h>
328 
329 static inline void volk_32fc_magnitude_squared_32f_neon(float* magnitudeVector,
330  const lv_32fc_t* complexVector,
331  unsigned int num_points)
332 {
333  unsigned int number = 0;
334  const unsigned int quarterPoints = num_points / 4;
335 
336  const float* complexVectorPtr = (float*)complexVector;
337  float* magnitudeVectorPtr = magnitudeVector;
338 
339  float32x4x2_t cmplx_val;
340  float32x4_t result;
341  for (; number < quarterPoints; number++) {
342  cmplx_val = vld2q_f32(complexVectorPtr);
343  complexVectorPtr += 8;
344 
345  cmplx_val.val[0] =
346  vmulq_f32(cmplx_val.val[0], cmplx_val.val[0]); // Square the values
347  cmplx_val.val[1] =
348  vmulq_f32(cmplx_val.val[1], cmplx_val.val[1]); // Square the values
349 
350  result =
351  vaddq_f32(cmplx_val.val[0], cmplx_val.val[1]); // Add the I2 and Q2 values
352 
353  vst1q_f32(magnitudeVectorPtr, result);
354  magnitudeVectorPtr += 4;
355  }
356 
357  number = quarterPoints * 4;
358  for (; number < num_points; number++) {
359  float val1Real = *complexVectorPtr++;
360  float val1Imag = *complexVectorPtr++;
361  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
362  }
363 }
364 #endif /* LV_HAVE_NEON */
365 
366 
367 #ifdef LV_HAVE_GENERIC
368 
370  float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
371 {
372  const float* complexVectorPtr = (float*)complexVector;
373  float* magnitudeVectorPtr = magnitudeVector;
374  unsigned int number = 0;
375  for (number = 0; number < num_points; number++) {
376  const float real = *complexVectorPtr++;
377  const float imag = *complexVectorPtr++;
378  *magnitudeVectorPtr++ = (real * real) + (imag * imag);
379  }
380 }
381 #endif /* LV_HAVE_GENERIC */
382 
383 #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
static void volk_32fc_magnitude_squared_32f_u_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:155
static void volk_32fc_magnitude_squared_32f_a_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:256
static void volk_32fc_magnitude_squared_32f_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:191
static __m128 _mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse3_intrinsics.h:51
static void volk_32fc_magnitude_squared_32f_u_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:82
static void volk_32fc_magnitude_squared_32f_neon(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:329
static void volk_32fc_magnitude_squared_32f_a_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:219
static __m128 _mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse_intrinsics.h:32
float complex lv_32fc_t
Definition: volk_complex.h:70
static __m256 _mm256_magnitudesquared_ps(__m256 cplxValue1, __m256 cplxValue2)
Definition: volk_avx_intrinsics.h:73
static void volk_32fc_magnitude_squared_32f_a_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:293
static void volk_32fc_magnitude_squared_32f_u_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:118
static void volk_32fc_magnitude_squared_32f_a_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_squared_32f.h:369