53 #ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H 54 #define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H 60 #include <immintrin.h> 62 static inline void volk_8ic_deinterleave_real_8i_a_avx2(int8_t* iBuffer,
64 unsigned int num_points)
66 unsigned int number = 0;
67 const int8_t* complexVectorPtr = (int8_t*)complexVector;
68 int8_t* iBufferPtr = iBuffer;
69 __m256i moveMask1 = _mm256_set_epi8(0x80,
101 __m256i moveMask2 = _mm256_set_epi8(14,
133 __m256i complexVal1, complexVal2, outputVal;
135 unsigned int thirtysecondPoints = num_points / 32;
137 for (number = 0; number < thirtysecondPoints; number++) {
139 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
140 complexVectorPtr += 32;
141 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
142 complexVectorPtr += 32;
144 complexVal1 = _mm256_shuffle_epi8(complexVal1, moveMask1);
145 complexVal2 = _mm256_shuffle_epi8(complexVal2, moveMask2);
146 outputVal = _mm256_or_si256(complexVal1, complexVal2);
147 outputVal = _mm256_permute4x64_epi64(outputVal, 0xd8);
149 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
153 number = thirtysecondPoints * 32;
154 for (; number < num_points; number++) {
155 *iBufferPtr++ = *complexVectorPtr++;
163 #include <tmmintrin.h> 167 unsigned int num_points)
169 unsigned int number = 0;
170 const int8_t* complexVectorPtr = (int8_t*)complexVector;
171 int8_t* iBufferPtr = iBuffer;
172 __m128i moveMask1 = _mm_set_epi8(
173 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
174 __m128i moveMask2 = _mm_set_epi8(
175 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
176 __m128i complexVal1, complexVal2, outputVal;
178 unsigned int sixteenthPoints = num_points / 16;
180 for (number = 0; number < sixteenthPoints; number++) {
181 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);
182 complexVectorPtr += 16;
183 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);
184 complexVectorPtr += 16;
186 complexVal1 = _mm_shuffle_epi8(complexVal1, moveMask1);
187 complexVal2 = _mm_shuffle_epi8(complexVal2, moveMask2);
189 outputVal = _mm_or_si128(complexVal1, complexVal2);
191 _mm_store_si128((__m128i*)iBufferPtr, outputVal);
195 number = sixteenthPoints * 16;
196 for (; number < num_points; number++) {
197 *iBufferPtr++ = *complexVectorPtr++;
205 #include <immintrin.h> 209 unsigned int num_points)
211 unsigned int number = 0;
212 const int8_t* complexVectorPtr = (int8_t*)complexVector;
213 int8_t* iBufferPtr = iBuffer;
214 __m128i moveMaskL = _mm_set_epi8(
215 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
216 __m128i moveMaskH = _mm_set_epi8(
217 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
218 __m256i complexVal1, complexVal2, outputVal;
219 __m128i complexVal1H, complexVal1L, complexVal2H, complexVal2L, outputVal1,
222 unsigned int thirtysecondPoints = num_points / 32;
224 for (number = 0; number < thirtysecondPoints; number++) {
226 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
227 complexVectorPtr += 32;
228 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
229 complexVectorPtr += 32;
231 complexVal1H = _mm256_extractf128_si256(complexVal1, 1);
232 complexVal1L = _mm256_extractf128_si256(complexVal1, 0);
233 complexVal2H = _mm256_extractf128_si256(complexVal2, 1);
234 complexVal2L = _mm256_extractf128_si256(complexVal2, 0);
236 complexVal1H = _mm_shuffle_epi8(complexVal1H, moveMaskH);
237 complexVal1L = _mm_shuffle_epi8(complexVal1L, moveMaskL);
238 outputVal1 = _mm_or_si128(complexVal1H, complexVal1L);
241 complexVal2H = _mm_shuffle_epi8(complexVal2H, moveMaskH);
242 complexVal2L = _mm_shuffle_epi8(complexVal2L, moveMaskL);
243 outputVal2 = _mm_or_si128(complexVal2H, complexVal2L);
245 __m256i dummy = _mm256_setzero_si256();
246 outputVal = _mm256_insertf128_si256(dummy, outputVal1, 0);
247 outputVal = _mm256_insertf128_si256(outputVal, outputVal2, 1);
250 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
254 number = thirtysecondPoints * 32;
255 for (; number < num_points; number++) {
256 *iBufferPtr++ = *complexVectorPtr++;
263 #ifdef LV_HAVE_GENERIC 267 unsigned int num_points)
269 unsigned int number = 0;
270 const int8_t* complexVectorPtr = (int8_t*)complexVector;
271 int8_t* iBufferPtr = iBuffer;
272 for (number = 0; number < num_points; number++) {
273 *iBufferPtr++ = *complexVectorPtr++;
281 #include <arm_neon.h> 285 unsigned int num_points)
288 unsigned int sixteenth_points = num_points / 16;
290 int8x16x2_t input_vector;
291 for (number = 0; number < sixteenth_points; ++number) {
292 input_vector = vld2q_s8((int8_t*)complexVector);
293 vst1q_s8(iBuffer, input_vector.val[0]);
298 const int8_t* complexVectorPtr = (int8_t*)complexVector;
299 int8_t* iBufferPtr = iBuffer;
300 for (number = sixteenth_points * 16; number < num_points; number++) {
301 *iBufferPtr++ = *complexVectorPtr++;
310 #ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_UNALIGNED8_H 311 #define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_UNALIGNED8_H 313 #include <inttypes.h> 317 #include <immintrin.h> 319 static inline void volk_8ic_deinterleave_real_8i_u_avx2(int8_t* iBuffer,
321 unsigned int num_points)
323 unsigned int number = 0;
324 const int8_t* complexVectorPtr = (int8_t*)complexVector;
325 int8_t* iBufferPtr = iBuffer;
326 __m256i moveMask1 = _mm256_set_epi8(0x80,
358 __m256i moveMask2 = _mm256_set_epi8(14,
390 __m256i complexVal1, complexVal2, outputVal;
392 unsigned int thirtysecondPoints = num_points / 32;
394 for (number = 0; number < thirtysecondPoints; number++) {
396 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
397 complexVectorPtr += 32;
398 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
399 complexVectorPtr += 32;
401 complexVal1 = _mm256_shuffle_epi8(complexVal1, moveMask1);
402 complexVal2 = _mm256_shuffle_epi8(complexVal2, moveMask2);
403 outputVal = _mm256_or_si256(complexVal1, complexVal2);
404 outputVal = _mm256_permute4x64_epi64(outputVal, 0xd8);
406 _mm256_storeu_si256((__m256i*)iBufferPtr, outputVal);
410 number = thirtysecondPoints * 32;
411 for (; number < num_points; number++) {
412 *iBufferPtr++ = *complexVectorPtr++;
static void volk_8ic_deinterleave_real_8i_a_avx(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_8i.h:207
static void volk_8ic_deinterleave_real_8i_generic(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_8i.h:265
static void volk_8ic_deinterleave_real_8i_neon(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_8i.h:283
static void volk_8ic_deinterleave_real_8i_a_ssse3(int8_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_8i.h:165
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:66