Vector Optimized Library of Kernels  3.2.0
Architecture-tuned implementations of math kernels
volk_32f_x2_interleave_32fc.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
60 #ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H
61 #define INCLUDED_volk_32f_x2_interleave_32fc_a_H
62 
63 #include <inttypes.h>
64 #include <stdio.h>
65 
66 #ifdef LV_HAVE_AVX
67 #include <immintrin.h>
68 
69 static inline void volk_32f_x2_interleave_32fc_a_avx(lv_32fc_t* complexVector,
70  const float* iBuffer,
71  const float* qBuffer,
72  unsigned int num_points)
73 {
74  unsigned int number = 0;
75  float* complexVectorPtr = (float*)complexVector;
76  const float* iBufferPtr = iBuffer;
77  const float* qBufferPtr = qBuffer;
78 
79  const uint64_t eighthPoints = num_points / 8;
80 
81  __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
82  for (; number < eighthPoints; number++) {
83  iValue = _mm256_load_ps(iBufferPtr);
84  qValue = _mm256_load_ps(qBufferPtr);
85 
86  // Interleaves the lower two values in the i and q variables into one buffer
87  cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
88  // Interleaves the upper two values in the i and q variables into one buffer
89  cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
90 
91  cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
92  _mm256_store_ps(complexVectorPtr, cplxValue);
93  complexVectorPtr += 8;
94 
95  cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
96  _mm256_store_ps(complexVectorPtr, cplxValue);
97  complexVectorPtr += 8;
98 
99  iBufferPtr += 8;
100  qBufferPtr += 8;
101  }
102 
103  number = eighthPoints * 8;
104  for (; number < num_points; number++) {
105  *complexVectorPtr++ = *iBufferPtr++;
106  *complexVectorPtr++ = *qBufferPtr++;
107  }
108 }
109 
110 #endif /* LV_HAV_AVX */
111 
112 #ifdef LV_HAVE_SSE
113 #include <xmmintrin.h>
114 
115 static inline void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t* complexVector,
116  const float* iBuffer,
117  const float* qBuffer,
118  unsigned int num_points)
119 {
120  unsigned int number = 0;
121  float* complexVectorPtr = (float*)complexVector;
122  const float* iBufferPtr = iBuffer;
123  const float* qBufferPtr = qBuffer;
124 
125  const uint64_t quarterPoints = num_points / 4;
126 
127  __m128 iValue, qValue, cplxValue;
128  for (; number < quarterPoints; number++) {
129  iValue = _mm_load_ps(iBufferPtr);
130  qValue = _mm_load_ps(qBufferPtr);
131 
132  // Interleaves the lower two values in the i and q variables into one buffer
133  cplxValue = _mm_unpacklo_ps(iValue, qValue);
134  _mm_store_ps(complexVectorPtr, cplxValue);
135  complexVectorPtr += 4;
136 
137  // Interleaves the upper two values in the i and q variables into one buffer
138  cplxValue = _mm_unpackhi_ps(iValue, qValue);
139  _mm_store_ps(complexVectorPtr, cplxValue);
140  complexVectorPtr += 4;
141 
142  iBufferPtr += 4;
143  qBufferPtr += 4;
144  }
145 
146  number = quarterPoints * 4;
147  for (; number < num_points; number++) {
148  *complexVectorPtr++ = *iBufferPtr++;
149  *complexVectorPtr++ = *qBufferPtr++;
150  }
151 }
152 #endif /* LV_HAVE_SSE */
153 
154 
155 #ifdef LV_HAVE_NEON
156 #include <arm_neon.h>
157 
158 static inline void volk_32f_x2_interleave_32fc_neon(lv_32fc_t* complexVector,
159  const float* iBuffer,
160  const float* qBuffer,
161  unsigned int num_points)
162 {
163  unsigned int quarter_points = num_points / 4;
164  unsigned int number;
165  float* complexVectorPtr = (float*)complexVector;
166 
167  float32x4x2_t complex_vec;
168  for (number = 0; number < quarter_points; ++number) {
169  complex_vec.val[0] = vld1q_f32(iBuffer);
170  complex_vec.val[1] = vld1q_f32(qBuffer);
171  vst2q_f32(complexVectorPtr, complex_vec);
172  iBuffer += 4;
173  qBuffer += 4;
174  complexVectorPtr += 8;
175  }
176 
177  for (number = quarter_points * 4; number < num_points; ++number) {
178  *complexVectorPtr++ = *iBuffer++;
179  *complexVectorPtr++ = *qBuffer++;
180  }
181 }
182 #endif /* LV_HAVE_NEON */
183 
184 
185 #ifdef LV_HAVE_GENERIC
186 
187 static inline void volk_32f_x2_interleave_32fc_generic(lv_32fc_t* complexVector,
188  const float* iBuffer,
189  const float* qBuffer,
190  unsigned int num_points)
191 {
192  float* complexVectorPtr = (float*)complexVector;
193  const float* iBufferPtr = iBuffer;
194  const float* qBufferPtr = qBuffer;
195  unsigned int number;
196 
197  for (number = 0; number < num_points; number++) {
198  *complexVectorPtr++ = *iBufferPtr++;
199  *complexVectorPtr++ = *qBufferPtr++;
200  }
201 }
202 #endif /* LV_HAVE_GENERIC */
203 
204 
205 #endif /* INCLUDED_volk_32f_x2_interleave_32fc_a_H */
206 
207 #ifndef INCLUDED_volk_32f_x2_interleave_32fc_u_H
208 #define INCLUDED_volk_32f_x2_interleave_32fc_u_H
209 
210 #include <inttypes.h>
211 #include <stdio.h>
212 
213 #ifdef LV_HAVE_AVX
214 #include <immintrin.h>
215 
216 static inline void volk_32f_x2_interleave_32fc_u_avx(lv_32fc_t* complexVector,
217  const float* iBuffer,
218  const float* qBuffer,
219  unsigned int num_points)
220 {
221  unsigned int number = 0;
222  float* complexVectorPtr = (float*)complexVector;
223  const float* iBufferPtr = iBuffer;
224  const float* qBufferPtr = qBuffer;
225 
226  const uint64_t eighthPoints = num_points / 8;
227 
228  __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
229  for (; number < eighthPoints; number++) {
230  iValue = _mm256_loadu_ps(iBufferPtr);
231  qValue = _mm256_loadu_ps(qBufferPtr);
232 
233  // Interleaves the lower two values in the i and q variables into one buffer
234  cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
235  // Interleaves the upper two values in the i and q variables into one buffer
236  cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
237 
238  cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
239  _mm256_storeu_ps(complexVectorPtr, cplxValue);
240  complexVectorPtr += 8;
241 
242  cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
243  _mm256_storeu_ps(complexVectorPtr, cplxValue);
244  complexVectorPtr += 8;
245 
246  iBufferPtr += 8;
247  qBufferPtr += 8;
248  }
249 
250  number = eighthPoints * 8;
251  for (; number < num_points; number++) {
252  *complexVectorPtr++ = *iBufferPtr++;
253  *complexVectorPtr++ = *qBufferPtr++;
254  }
255 }
256 #endif /* LV_HAVE_AVX */
257 
258 #ifdef LV_HAVE_RVV
259 #include <riscv_vector.h>
260 
261 static inline void volk_32f_x2_interleave_32fc_rvv(lv_32fc_t* complexVector,
262  const float* iBuffer,
263  const float* qBuffer,
264  unsigned int num_points)
265 {
266  uint64_t* out = (uint64_t*)complexVector;
267  size_t n = num_points;
268  for (size_t vl; n > 0; n -= vl, out += vl, iBuffer += vl, qBuffer += vl) {
269  vl = __riscv_vsetvl_e32m4(n);
270  vuint32m4_t vr = __riscv_vle32_v_u32m4((const uint32_t*)iBuffer, vl);
271  vuint32m4_t vi = __riscv_vle32_v_u32m4((const uint32_t*)qBuffer, vl);
272  vuint64m8_t vc =
273  __riscv_vwmaccu(__riscv_vwaddu_vv(vr, vi, vl), 0xFFFFFFFF, vi, vl);
274  __riscv_vse64(out, vc, vl);
275  }
276 }
277 #endif /*LV_HAVE_RVV*/
278 
279 #ifdef LV_HAVE_RVVSEG
280 #include <riscv_vector.h>
281 
282 static inline void volk_32f_x2_interleave_32fc_rvvseg(lv_32fc_t* complexVector,
283  const float* iBuffer,
284  const float* qBuffer,
285  unsigned int num_points)
286 {
287  size_t n = num_points;
288  for (size_t vl; n > 0; n -= vl, complexVector += vl, iBuffer += vl, qBuffer += vl) {
289  vl = __riscv_vsetvl_e32m4(n);
290  vfloat32m4_t vr = __riscv_vle32_v_f32m4(iBuffer, vl);
291  vfloat32m4_t vi = __riscv_vle32_v_f32m4(qBuffer, vl);
292  __riscv_vsseg2e32((float*)complexVector, __riscv_vcreate_v_f32m4x2(vr, vi), vl);
293  }
294 }
295 #endif /*LV_HAVE_RVVSEG*/
296 
297 #endif /* INCLUDED_volk_32f_x2_interleave_32fc_u_H */
static void volk_32f_x2_interleave_32fc_a_avx(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:69
static void volk_32f_x2_interleave_32fc_generic(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:187
static void volk_32f_x2_interleave_32fc_neon(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:158
static void volk_32f_x2_interleave_32fc_u_avx(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:216
static void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t *complexVector, const float *iBuffer, const float *qBuffer, unsigned int num_points)
Definition: volk_32f_x2_interleave_32fc.h:115
float complex lv_32fc_t
Definition: volk_complex.h:74