Vector Optimized Library of Kernels  3.2.0
Architecture-tuned implementations of math kernels
volk_32u_popcnt.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
43 #ifndef INCLUDED_VOLK_32u_POPCNT_A16_H
44 #define INCLUDED_VOLK_32u_POPCNT_A16_H
45 
46 #include <inttypes.h>
47 #include <stdio.h>
48 
49 #ifdef LV_HAVE_GENERIC
50 
51 static inline void volk_32u_popcnt_generic(uint32_t* ret, const uint32_t value)
52 {
53  // This is faster than a lookup table
54  uint32_t retVal = value;
55 
56  retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
57  retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
58  retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
59  retVal = (retVal + (retVal >> 8));
60  retVal = (retVal + (retVal >> 16)) & 0x0000003F;
61 
62  *ret = retVal;
63 }
64 
65 #endif /*LV_HAVE_GENERIC*/
66 
67 
68 #ifdef LV_HAVE_SSE4_2
69 
70 #include <nmmintrin.h>
71 
72 static inline void volk_32u_popcnt_a_sse4_2(uint32_t* ret, const uint32_t value)
73 {
74  *ret = _mm_popcnt_u32(value);
75 }
76 
77 #endif /*LV_HAVE_SSE4_2*/
78 
79 #ifdef LV_HAVE_RVV
80 #include <riscv_vector.h>
81 
82 static inline void volk_32u_popcnt_rvv(uint32_t* ret, const uint32_t value)
83 {
84  *ret = __riscv_vcpop(__riscv_vreinterpret_b4(__riscv_vmv_s_x_u64m1(value, 1)), 32);
85 }
86 #endif /*LV_HAVE_RVV*/
87 
88 #ifdef LV_HAVE_RVA22V
89 #include <riscv_bitmanip.h>
90 
91 static inline void volk_32u_popcnt_rva22(uint32_t* ret, const uint32_t value)
92 {
93  *ret = __riscv_cpop_32(value);
94 }
95 #endif /*LV_HAVE_RVA22V*/
96 
97 #endif /*INCLUDED_VOLK_32u_POPCNT_A16_H*/
static void volk_32u_popcnt_a_sse4_2(uint32_t *ret, const uint32_t value)
Definition: volk_32u_popcnt.h:72
static void volk_32u_popcnt_generic(uint32_t *ret, const uint32_t value)
Definition: volk_32u_popcnt.h:51