Vector Optimized Library of Kernels  2.4
Architecture-tuned implementations of math kernels
volk_sse_intrinsics.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2015 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*
24  * This file is intended to hold SSE intrinsics of intrinsics.
25  * They should be used in VOLK kernels to avoid copy-pasta.
26  */
27 
28 #ifndef INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_
29 #define INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_
30 #include <xmmintrin.h>
31 
32 static inline __m128 _mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2)
33 {
34  __m128 iValue, qValue;
35  // Arrange in i1i2i3i4 format
36  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
37  // Arrange in q1q2q3q4 format
38  qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
39  iValue = _mm_mul_ps(iValue, iValue); // Square the I values
40  qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
41  return _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
42 }
43 
44 static inline __m128 _mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2)
45 {
46  return _mm_sqrt_ps(_mm_magnitudesquared_ps(cplxValue1, cplxValue2));
47 }
48 
49 static inline __m128 _mm_scaled_norm_dist_ps_sse(const __m128 symbols0,
50  const __m128 symbols1,
51  const __m128 points0,
52  const __m128 points1,
53  const __m128 scalar)
54 {
55  // calculate scalar * |x - y|^2
56  const __m128 diff0 = _mm_sub_ps(symbols0, points0);
57  const __m128 diff1 = _mm_sub_ps(symbols1, points1);
58  const __m128 norms = _mm_magnitudesquared_ps(diff0, diff1);
59  return _mm_mul_ps(norms, scalar);
60 }
61 
62 #endif /* INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_ */
static __m128 _mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse_intrinsics.h:44
static __m128 _mm_scaled_norm_dist_ps_sse(const __m128 symbols0, const __m128 symbols1, const __m128 points0, const __m128 points1, const __m128 scalar)
Definition: volk_sse_intrinsics.h:49
static __m128 _mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse_intrinsics.h:32