60 #ifndef INCLUDED_volk_64u_popcnt_a_H 61 #define INCLUDED_volk_64u_popcnt_a_H 67 #ifdef LV_HAVE_GENERIC 76 uint32_t retVal = (uint32_t)(value & 0x00000000FFFFFFFFull);
78 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
79 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
80 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
81 retVal = (retVal + (retVal >> 8));
82 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
83 uint64_t retVal64 = retVal;
86 retVal = (uint32_t)((value & 0xFFFFFFFF00000000ull) >> 32);
87 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
88 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
89 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
90 retVal = (retVal + (retVal >> 8));
91 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
100 #if LV_HAVE_SSE4_2 && LV_HAVE_64 102 #include <nmmintrin.h> 104 static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret,
const uint64_t value)
106 *ret = _mm_popcnt_u64(value);
113 #include <arm_neon.h> 116 uint8x8_t input_val, count8x8_val;
117 uint16x4_t count16x4_val;
118 uint32x2_t count32x2_val;
119 uint64x1_t count64x1_val;
121 input_val = vld1_u8((
unsigned char*)&value);
122 count8x8_val = vcnt_u8(input_val);
123 count16x4_val = vpaddl_u8(count8x8_val);
124 count32x2_val = vpaddl_u16(count16x4_val);
125 count64x1_val = vpaddl_u32(count32x2_val);
126 vst1_u64(ret, count64x1_val);
static void volk_64u_popcnt_generic(uint64_t *ret, const uint64_t value)
Definition: volk_64u_popcnt.h:70
static void volk_64u_popcnt_neon(uint64_t *ret, const uint64_t value)
Definition: volk_64u_popcnt.h:114