44 #ifndef INCLUDED_volk_16i_max_star_16i_a_H
45 #define INCLUDED_volk_16i_max_star_16i_a_H
52 #include <emmintrin.h>
53 #include <tmmintrin.h>
54 #include <xmmintrin.h>
59 const unsigned int num_bytes = num_points * 2;
61 short candidate = src0[0];
63 __m128i xmm0, xmm1, xmm3, xmm4, xmm5, xmm6;
67 p_src0 = (__m128i*)src0;
69 int bound = num_bytes >> 4;
70 int leftovers = (num_bytes >> 1) & 7;
74 xmm1 = _mm_setzero_si128();
75 xmm0 = _mm_setzero_si128();
78 xmm0 = _mm_shuffle_epi8(xmm0, xmm1);
80 for (
i = 0;
i < bound; ++
i) {
81 xmm1 = _mm_load_si128(p_src0);
85 xmm3 = _mm_cmpgt_epi16(xmm0, xmm1);
86 xmm4 = _mm_cmpeq_epi16(xmm0, xmm1);
87 xmm5 = _mm_cmpgt_epi16(xmm1, xmm0);
89 xmm6 = _mm_xor_si128(xmm4, xmm5);
91 xmm3 = _mm_and_si128(xmm3, xmm0);
92 xmm4 = _mm_and_si128(xmm6, xmm1);
94 xmm0 = _mm_add_epi16(xmm3, xmm4);
97 _mm_store_si128((__m128i*)cands, xmm0);
99 for (
i = 0;
i < 8; ++
i) {
100 candidate = ((short)(candidate - cands[
i]) > 0) ? candidate : cands[
i];
103 for (
i = 0;
i < leftovers; ++
i) {
104 candidate = ((short)(candidate - src0[(bound << 3) +
i]) > 0)
106 : src0[(bound << 3) +
i];
109 target[0] = candidate;
114 #ifdef LV_HAVE_GENERIC
119 const unsigned int num_bytes = num_points * 2;
123 int bound = num_bytes >> 1;
125 short candidate = src0[0];
126 for (
i = 1;
i < bound; ++
i) {
127 candidate = ((short)(candidate - src0[
i]) > 0) ? candidate : src0[
i];
129 target[0] = candidate;
static void volk_16i_max_star_16i_a_ssse3(short *target, short *src0, unsigned int num_points)
Definition: volk_16i_max_star_16i.h:57
static void volk_16i_max_star_16i_generic(short *target, short *src0, unsigned int num_points)
Definition: volk_16i_max_star_16i.h:117
for i
Definition: volk_config_fixed.tmpl.h:13