Vector Optimized Library of Kernels  2.4
Architecture-tuned implementations of math kernels
volk_64f_x2_max_64f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
71 #ifndef INCLUDED_volk_64f_x2_max_64f_a_H
72 #define INCLUDED_volk_64f_x2_max_64f_a_H
73 
74 #include <inttypes.h>
75 #include <stdio.h>
76 
77 #ifdef LV_HAVE_AVX512F
78 #include <immintrin.h>
79 
80 static inline void volk_64f_x2_max_64f_a_avx512f(double* cVector,
81  const double* aVector,
82  const double* bVector,
83  unsigned int num_points)
84 {
85  unsigned int number = 0;
86  const unsigned int eigthPoints = num_points / 8;
87 
88  double* cPtr = cVector;
89  const double* aPtr = aVector;
90  const double* bPtr = bVector;
91 
92  __m512d aVal, bVal, cVal;
93  for (; number < eigthPoints; number++) {
94 
95  aVal = _mm512_load_pd(aPtr);
96  bVal = _mm512_load_pd(bPtr);
97 
98  cVal = _mm512_max_pd(aVal, bVal);
99 
100  _mm512_store_pd(cPtr, cVal); // Store the results back into the C container
101 
102  aPtr += 8;
103  bPtr += 8;
104  cPtr += 8;
105  }
106 
107  number = eigthPoints * 8;
108  for (; number < num_points; number++) {
109  const double a = *aPtr++;
110  const double b = *bPtr++;
111  *cPtr++ = (a > b ? a : b);
112  }
113 }
114 #endif /* LV_HAVE_AVX512F */
115 
116 
117 #ifdef LV_HAVE_AVX
118 #include <immintrin.h>
119 
120 static inline void volk_64f_x2_max_64f_a_avx(double* cVector,
121  const double* aVector,
122  const double* bVector,
123  unsigned int num_points)
124 {
125  unsigned int number = 0;
126  const unsigned int quarterPoints = num_points / 4;
127 
128  double* cPtr = cVector;
129  const double* aPtr = aVector;
130  const double* bPtr = bVector;
131 
132  __m256d aVal, bVal, cVal;
133  for (; number < quarterPoints; number++) {
134 
135  aVal = _mm256_load_pd(aPtr);
136  bVal = _mm256_load_pd(bPtr);
137 
138  cVal = _mm256_max_pd(aVal, bVal);
139 
140  _mm256_store_pd(cPtr, cVal); // Store the results back into the C container
141 
142  aPtr += 4;
143  bPtr += 4;
144  cPtr += 4;
145  }
146 
147  number = quarterPoints * 4;
148  for (; number < num_points; number++) {
149  const double a = *aPtr++;
150  const double b = *bPtr++;
151  *cPtr++ = (a > b ? a : b);
152  }
153 }
154 #endif /* LV_HAVE_AVX */
155 
156 
157 #ifdef LV_HAVE_SSE2
158 #include <emmintrin.h>
159 
160 static inline void volk_64f_x2_max_64f_a_sse2(double* cVector,
161  const double* aVector,
162  const double* bVector,
163  unsigned int num_points)
164 {
165  unsigned int number = 0;
166  const unsigned int halfPoints = num_points / 2;
167 
168  double* cPtr = cVector;
169  const double* aPtr = aVector;
170  const double* bPtr = bVector;
171 
172  __m128d aVal, bVal, cVal;
173  for (; number < halfPoints; number++) {
174 
175  aVal = _mm_load_pd(aPtr);
176  bVal = _mm_load_pd(bPtr);
177 
178  cVal = _mm_max_pd(aVal, bVal);
179 
180  _mm_store_pd(cPtr, cVal); // Store the results back into the C container
181 
182  aPtr += 2;
183  bPtr += 2;
184  cPtr += 2;
185  }
186 
187  number = halfPoints * 2;
188  for (; number < num_points; number++) {
189  const double a = *aPtr++;
190  const double b = *bPtr++;
191  *cPtr++ = (a > b ? a : b);
192  }
193 }
194 #endif /* LV_HAVE_SSE2 */
195 
196 
197 #ifdef LV_HAVE_GENERIC
198 
199 static inline void volk_64f_x2_max_64f_generic(double* cVector,
200  const double* aVector,
201  const double* bVector,
202  unsigned int num_points)
203 {
204  double* cPtr = cVector;
205  const double* aPtr = aVector;
206  const double* bPtr = bVector;
207  unsigned int number = 0;
208 
209  for (number = 0; number < num_points; number++) {
210  const double a = *aPtr++;
211  const double b = *bPtr++;
212  *cPtr++ = (a > b ? a : b);
213  }
214 }
215 #endif /* LV_HAVE_GENERIC */
216 
217 
218 #endif /* INCLUDED_volk_64f_x2_max_64f_a_H */
219 
220 
221 #ifndef INCLUDED_volk_64f_x2_max_64f_u_H
222 #define INCLUDED_volk_64f_x2_max_64f_u_H
223 
224 #include <inttypes.h>
225 #include <stdio.h>
226 
227 #ifdef LV_HAVE_AVX512F
228 #include <immintrin.h>
229 
230 static inline void volk_64f_x2_max_64f_u_avx512f(double* cVector,
231  const double* aVector,
232  const double* bVector,
233  unsigned int num_points)
234 {
235  unsigned int number = 0;
236  const unsigned int eigthPoints = num_points / 8;
237 
238  double* cPtr = cVector;
239  const double* aPtr = aVector;
240  const double* bPtr = bVector;
241 
242  __m512d aVal, bVal, cVal;
243  for (; number < eigthPoints; number++) {
244 
245  aVal = _mm512_loadu_pd(aPtr);
246  bVal = _mm512_loadu_pd(bPtr);
247 
248  cVal = _mm512_max_pd(aVal, bVal);
249 
250  _mm512_storeu_pd(cPtr, cVal); // Store the results back into the C container
251 
252  aPtr += 8;
253  bPtr += 8;
254  cPtr += 8;
255  }
256 
257  number = eigthPoints * 8;
258  for (; number < num_points; number++) {
259  const double a = *aPtr++;
260  const double b = *bPtr++;
261  *cPtr++ = (a > b ? a : b);
262  }
263 }
264 #endif /* LV_HAVE_AVX512F */
265 
266 
267 #ifdef LV_HAVE_AVX
268 #include <immintrin.h>
269 
270 static inline void volk_64f_x2_max_64f_u_avx(double* cVector,
271  const double* aVector,
272  const double* bVector,
273  unsigned int num_points)
274 {
275  unsigned int number = 0;
276  const unsigned int quarterPoints = num_points / 4;
277 
278  double* cPtr = cVector;
279  const double* aPtr = aVector;
280  const double* bPtr = bVector;
281 
282  __m256d aVal, bVal, cVal;
283  for (; number < quarterPoints; number++) {
284 
285  aVal = _mm256_loadu_pd(aPtr);
286  bVal = _mm256_loadu_pd(bPtr);
287 
288  cVal = _mm256_max_pd(aVal, bVal);
289 
290  _mm256_storeu_pd(cPtr, cVal); // Store the results back into the C container
291 
292  aPtr += 4;
293  bPtr += 4;
294  cPtr += 4;
295  }
296 
297  number = quarterPoints * 4;
298  for (; number < num_points; number++) {
299  const double a = *aPtr++;
300  const double b = *bPtr++;
301  *cPtr++ = (a > b ? a : b);
302  }
303 }
304 #endif /* LV_HAVE_AVX */
305 
306 
307 #endif /* INCLUDED_volk_64f_x2_max_64f_u_H */
static void volk_64f_x2_max_64f_u_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:270
static void volk_64f_x2_max_64f_a_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:160
static void volk_64f_x2_max_64f_a_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:120
static void volk_64f_x2_max_64f_generic(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:199