Vector Optimized Library of Kernels  2.4
Architecture-tuned implementations of math kernels
volk_32fc_s32f_deinterleave_real_16i.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
73 #ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
74 #define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
75 
76 #include <inttypes.h>
77 #include <stdio.h>
78 #include <volk/volk_common.h>
79 
80 
81 #ifdef LV_HAVE_AVX2
82 #include <immintrin.h>
83 
84 static inline void
85 volk_32fc_s32f_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
86  const lv_32fc_t* complexVector,
87  const float scalar,
88  unsigned int num_points)
89 {
90  unsigned int number = 0;
91  const unsigned int eighthPoints = num_points / 8;
92 
93  const float* complexVectorPtr = (float*)complexVector;
94  int16_t* iBufferPtr = iBuffer;
95 
96  __m256 vScalar = _mm256_set1_ps(scalar);
97 
98  __m256 cplxValue1, cplxValue2, iValue;
99  __m256i a;
100  __m128i b;
101 
102  __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
103 
104  for (; number < eighthPoints; number++) {
105  cplxValue1 = _mm256_load_ps(complexVectorPtr);
106  complexVectorPtr += 8;
107 
108  cplxValue2 = _mm256_load_ps(complexVectorPtr);
109  complexVectorPtr += 8;
110 
111  // Arrange in i1i2i3i4 format
112  iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
113 
114  iValue = _mm256_mul_ps(iValue, vScalar);
115 
116  iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
117  a = _mm256_cvtps_epi32(iValue);
118  a = _mm256_packs_epi32(a, a);
119  a = _mm256_permutevar8x32_epi32(a, idx);
120  b = _mm256_extracti128_si256(a, 0);
121 
122  _mm_store_si128((__m128i*)iBufferPtr, b);
123  iBufferPtr += 8;
124  }
125 
126  number = eighthPoints * 8;
127  iBufferPtr = &iBuffer[number];
128  for (; number < num_points; number++) {
129  *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
130  complexVectorPtr++;
131  }
132 }
133 
134 
135 #endif /* LV_HAVE_AVX2 */
136 
137 #ifdef LV_HAVE_SSE
138 #include <xmmintrin.h>
139 
140 static inline void
142  const lv_32fc_t* complexVector,
143  const float scalar,
144  unsigned int num_points)
145 {
146  unsigned int number = 0;
147  const unsigned int quarterPoints = num_points / 4;
148 
149  const float* complexVectorPtr = (float*)complexVector;
150  int16_t* iBufferPtr = iBuffer;
151 
152  __m128 vScalar = _mm_set_ps1(scalar);
153 
154  __m128 cplxValue1, cplxValue2, iValue;
155 
156  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
157 
158  for (; number < quarterPoints; number++) {
159  cplxValue1 = _mm_load_ps(complexVectorPtr);
160  complexVectorPtr += 4;
161 
162  cplxValue2 = _mm_load_ps(complexVectorPtr);
163  complexVectorPtr += 4;
164 
165  // Arrange in i1i2i3i4 format
166  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
167 
168  iValue = _mm_mul_ps(iValue, vScalar);
169 
170  _mm_store_ps(floatBuffer, iValue);
171  *iBufferPtr++ = (int16_t)(floatBuffer[0]);
172  *iBufferPtr++ = (int16_t)(floatBuffer[1]);
173  *iBufferPtr++ = (int16_t)(floatBuffer[2]);
174  *iBufferPtr++ = (int16_t)(floatBuffer[3]);
175  }
176 
177  number = quarterPoints * 4;
178  iBufferPtr = &iBuffer[number];
179  for (; number < num_points; number++) {
180  *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
181  complexVectorPtr++;
182  }
183 }
184 
185 #endif /* LV_HAVE_SSE */
186 
187 
188 #ifdef LV_HAVE_GENERIC
189 
190 static inline void
192  const lv_32fc_t* complexVector,
193  const float scalar,
194  unsigned int num_points)
195 {
196  const float* complexVectorPtr = (float*)complexVector;
197  int16_t* iBufferPtr = iBuffer;
198  unsigned int number = 0;
199  for (number = 0; number < num_points; number++) {
200  *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
201  complexVectorPtr++;
202  }
203 }
204 
205 #endif /* LV_HAVE_GENERIC */
206 
207 #endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H */
208 
209 #ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
210 #define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
211 
212 #include <inttypes.h>
213 #include <stdio.h>
214 #include <volk/volk_common.h>
215 
216 #ifdef LV_HAVE_AVX2
217 #include <immintrin.h>
218 
219 static inline void
220 volk_32fc_s32f_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
221  const lv_32fc_t* complexVector,
222  const float scalar,
223  unsigned int num_points)
224 {
225  unsigned int number = 0;
226  const unsigned int eighthPoints = num_points / 8;
227 
228  const float* complexVectorPtr = (float*)complexVector;
229  int16_t* iBufferPtr = iBuffer;
230 
231  __m256 vScalar = _mm256_set1_ps(scalar);
232 
233  __m256 cplxValue1, cplxValue2, iValue;
234  __m256i a;
235  __m128i b;
236 
237  __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
238 
239  for (; number < eighthPoints; number++) {
240  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
241  complexVectorPtr += 8;
242 
243  cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
244  complexVectorPtr += 8;
245 
246  // Arrange in i1i2i3i4 format
247  iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
248 
249  iValue = _mm256_mul_ps(iValue, vScalar);
250 
251  iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
252  a = _mm256_cvtps_epi32(iValue);
253  a = _mm256_packs_epi32(a, a);
254  a = _mm256_permutevar8x32_epi32(a, idx);
255  b = _mm256_extracti128_si256(a, 0);
256 
257  _mm_storeu_si128((__m128i*)iBufferPtr, b);
258  iBufferPtr += 8;
259  }
260 
261  number = eighthPoints * 8;
262  iBufferPtr = &iBuffer[number];
263  for (; number < num_points; number++) {
264  *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
265  complexVectorPtr++;
266  }
267 }
268 
269 #endif /* LV_HAVE_AVX2 */
270 
271 #endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H */
static void volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t *iBuffer, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_deinterleave_real_16i.h:141
static void volk_32fc_s32f_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_deinterleave_real_16i.h:191
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
float complex lv_32fc_t
Definition: volk_complex.h:70