librsync  2.3.2
hashtable.h
Go to the documentation of this file.
1/*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2 *
3 * hashtable.h -- a generic open addressing hashtable.
4 *
5 * Copyright (C) 2003 by Donovan Baarda <abo@minkirri.apana.org.au>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
20#ifndef _HASHTABLE_H_
21# define _HASHTABLE_H_
22
23# include <assert.h>
24# include <stdlib.h>
25# include <stdbool.h>
26
27/** \file hashtable.h
28 * A generic open addressing hashtable.
29 *
30 * This is a minimal hashtable containing pointers to arbitrary entries with
31 * configurable hashtable size and support for custom hash() and cmp() methods.
32 * The cmp() method can either be a simple comparison between two keys, or can
33 * be against a special match object containing additional mutable state. This
34 * allows for things like deferred and cached evaluation of costly comparison
35 * data. The hash() function doesn't need to avoid clustering behaviour.
36 *
37 * It uses open addressing with quadratic probing for collisions. The
38 * MurmurHash3 finalization function is optionally used on the hash() output to
39 * avoid clustering and can be disabled by setting HASHTABLE_NMIX32. There is
40 * no support for removing entries, only adding them. Multiple entries with the
41 * same key can be added, and you can use a fancy cmp() function to find
42 * particular entries by more than just their key. There is an iterator for
43 * iterating through all entries in the hashtable. There are optional
44 * NAME_find() find/match/hashcmp/entrycmp stats counters that can be disabled
45 * by defining HASHTABLE_NSTATS. There is an optional simple k=1 bloom filter
46 * for speed that can be disabled by defining HASHTABLE_NBLOOM.
47 *
48 * The types and methods of the hashtable and its contents are specified by
49 * using \#define parameters set to their basenames (the prefixes for the *_t
50 * type and *_func() methods) before doing \#include "hashtable.h". This
51 * produces static inline type-safe methods that are either application
52 * optimized for speed or wrappers around void* implementation methods for
53 * compactness.
54 *
55 * \param ENTRY - the entry type basename.
56 *
57 * \param KEY - optional key type basename (default: ENTRY).
58 *
59 * \param MATCH - optional match type basename (default: KEY).
60 *
61 * \param NAME - optional hashtable type basename (default: ENTRY_hashtable).
62 *
63 * Example: \code
64 * typedef ... mykey_t;
65 * int mykey_hash(mykey_t const *e);
66 * int mykey_cmp(mykey_t *e, mykey_t const *o);
67 *
68 * typedef struct myentry {
69 * mykey_t key; // Inherit from mykey_t.
70 * ...extra entry value data...
71 * } myentry_t;
72 * void myentry_init(myentry_t *e, ...);
73 *
74 * #define ENTRY myentry
75 * #define KEY mykey
76 * #include "hashtable.h"
77 *
78 * hashtable_t *t;
79 * myentry_t entries[300];
80 * mykey_t k;
81 * myentry_t *e;
82 *
83 * t = myentry_hashtable_new(300);
84 * myentry_init(&entries[5], ...);
85 * myentry_hashtable_add(t, &entries[5]);
86 * k = ...;
87 * e = myentry_hashtable_find(t, &k);
88 *
89 * int i;
90 * for (e = myentry_hashtable_iter(t, &i); e != NULL;
91 * e = myentry_hashtable_next(t, &i))
92 * ...
93 *
94 * myentry_hashtable_free(t);
95 * \endcode
96 *
97 * The mykey_hash() and mykey_cmp() fuctions will typically take pointers to
98 * mykey/myentry instances the same as the pointers stored in the hashtable.
99 * However it is also possible for them to take "match objects" that are a
100 * "subclass" of the entry type that contain additional state for complicated
101 * comparision operations.
102 *
103 * Example: \code
104 * typedef struct mymatch {
105 * mykey_t key; // Inherit from mykey_t;
106 * ...extra match criteria and state data...
107 * } mymatch_t;
108 * int mymatch_cmp(mymatch_t *m, myentry_t const *e);
109 *
110 * #define ENTRY myentry
111 * #define KEY mykey
112 * #define MATCH mymatch
113 * #include "hashtable.h"
114 *
115 * ...
116 * mymatch_t m;
117 *
118 * t = myentry_hashtable_new(300);
119 * ...
120 * m = ...;
121 * e = myentry_hashtable_find(t, &m);
122 * \endcode
123 *
124 * The mymatch_cmp() function is only called for finding hashtable entries and
125 * can mutate the mymatch_t object for doing things like deferred and cached
126 * evaluation of expensive match data. It can also access the whole myentry_t
127 * object to match against more than just the key. */
128
129/** The hashtable type. */
130typedef struct hashtable {
131 int size; /**< Size of allocated hashtable. */
132 int count; /**< Number of entries in hashtable. */
133 unsigned tmask; /**< Mask to get the hashtable index. */
134# ifndef HASHTABLE_NBLOOM
135 unsigned bshift; /**< Shift to get the bloomfilter index. */
136# endif
137# ifndef HASHTABLE_NSTATS
138 /* The following are for accumulating NAME_find() stats. */
139 long find_count; /**< The count of finds tried. */
140 long match_count; /**< The count of matches found. */
141 long hashcmp_count; /**< The count of hash compares done. */
142 long entrycmp_count; /**< The count of entry compares done. */
143# endif
144# ifndef HASHTABLE_NBLOOM
145 unsigned char *kbloom; /**< Bloom filter of hash keys with k=1. */
146# endif
147 void **etable; /**< Table of pointers to entries. */
148 unsigned ktable[]; /**< Table of hash keys. */
150
151/* void* implementations for the type-safe static inline wrappers below. */
152hashtable_t *_hashtable_new(int size);
153void _hashtable_free(hashtable_t *t);
154
155# ifndef HASHTABLE_NBLOOM
156static inline void hashtable_setbloom(hashtable_t *t, unsigned const h)
157{
158 /* Use upper bits for a "different hash". */
159 unsigned const i = h >> t->bshift;
160 t->kbloom[i / 8] |= (unsigned char)(1 << (i % 8));
161}
162
163static inline bool hashtable_getbloom(hashtable_t *t, unsigned const h)
164{
165 /* Use upper bits for a "different hash". */
166 unsigned const i = h >> t->bshift;
167 return (t->kbloom[i / 8] >> (i % 8)) & 1;
168}
169# endif
170
171/** MurmurHash3 finalization mix function. */
172static inline unsigned mix32(unsigned h)
173{
174 h ^= h >> 16;
175 h *= 0x85ebca6b;
176 h ^= h >> 13;
177 h *= 0xc2b2ae35;
178 h ^= h >> 16;
179 return h;
180}
181
182/** Ensure hash's are never zero. */
183static inline unsigned nozero(unsigned h)
184{
185 return h ? h : (unsigned)-1;
186}
187
188#endif /* _HASHTABLE_H_ */
189
190/* If ENTRY is defined, define type-dependent static inline methods. */
191#ifdef ENTRY
192
193# define _JOIN2(x, y) x##y
194# define _JOIN(x, y) _JOIN2(x, y)
195
196# ifndef KEY
197# define KEY ENTRY
198# endif
199
200# ifndef MATCH
201# define MATCH KEY
202# endif
203
204# ifndef NAME
205# define NAME _JOIN(ENTRY, _hashtable)
206# endif
207
208# define ENTRY_t _JOIN(ENTRY, _t) /**< The entry type. */
209# define KEY_t _JOIN(KEY, _t) /**< The key type. */
210# define MATCH_t _JOIN(MATCH, _t) /**< The match type. */
211# define KEY_hash _JOIN(KEY, _hash) /**< The key hash(k) method. */
212# define MATCH_cmp _JOIN(MATCH, _cmp) /**< The match cmp(m, e) method. */
213/* The names for all the hashtable methods. */
214# define NAME_new _JOIN(NAME, _new)
215# define NAME_free _JOIN(NAME, _free)
216# define NAME_stats_init _JOIN(NAME, _stats_init)
217# define NAME_add _JOIN(NAME, _add)
218# define NAME_find _JOIN(NAME, _find)
219# define NAME_iter _JOIN(NAME, _iter)
220# define NAME_next _JOIN(NAME, _next)
221
222/* Modified hash() with/without mix32() and reserving zero for empty buckets. */
223# ifdef HASHTABLE_NMIX32
224# define _KEY_HASH(k) nozero(KEY_hash((KEY_t *)k))
225# else
226# define _KEY_HASH(k) nozero(mix32(KEY_hash((KEY_t *)k)))
227# endif
228
229/* Loop macro for probing table t for key hash hk, iterating with index i and
230 entry hash h, terminating at an empty bucket. */
231# define _for_probe(t, hk, i, h) \
232 unsigned const *const ktable = t->ktable;\
233 unsigned const tmask = t->tmask;\
234 unsigned i, s, h;\
235 for (i = hk & tmask, s = 0; (h = ktable[i]); i = (i + ++s) & tmask)
236
237/* Conditional macro for incrementing stats counters. */
238# ifndef HASHTABLE_NSTATS
239# define _stats_inc(c) (c++)
240# else
241# define _stats_inc(c)
242# endif
243
244/** Allocate and initialize a hashtable instance.
245 *
246 * The provided size is used as an indication of the number of entries you wish
247 * to add, but the allocated size will probably be larger depending on the
248 * implementation to enable optimisations or avoid degraded performance. It may
249 * be possible to fill the table beyond the requested size, but performance can
250 * start to degrade badly if it is over filled.
251 *
252 * \param size - The desired minimum size of the hash table.
253 *
254 * \return The initialized hashtable instance or NULL if it failed. */
255static inline hashtable_t *NAME_new(int size)
256{
257 return _hashtable_new(size);
258}
259
260/** Destroy and free a hashtable instance.
261 *
262 * This will free the hashtable, but will not free the entries in the
263 * hashtable. If you want to free the entries too, use a hashtable iterator to
264 * free the the entries first.
265 *
266 * \param *t - The hashtable to destroy and free. */
267static inline void NAME_free(hashtable_t *t)
268{
269 _hashtable_free(t);
270}
271
272/** Initialize hashtable stats counters.
273 *
274 * This will reset all the stats counters for the hashtable,
275 *
276 * \param *t - The hashtable to initializ stats for. */
277static inline void NAME_stats_init(hashtable_t *t)
278{
279# ifndef HASHTABLE_NSTATS
281# endif
282}
283
284/** Add an entry to a hashtable.
285 *
286 * This doesn't use MATCH_cmp() or do any checks for existing copies or
287 * instances, so it will add duplicates. If you want to avoid adding
288 * duplicates, use NAME_find() to check for existing entries first.
289 *
290 * \param *t - The hashtable to add to.
291 *
292 * \param *e - The entry object to add.
293 *
294 * \return The added entry, or NULL if the table is full. */
295static inline ENTRY_t *NAME_add(hashtable_t *t, ENTRY_t *e)
296{
297 unsigned he = _KEY_HASH(e);
298
299 assert(e != NULL);
300 if (t->count + 1 == t->size)
301 return NULL;
302# ifndef HASHTABLE_NBLOOM
303 hashtable_setbloom(t, he);
304# endif
305 _for_probe(t, he, i, h);
306 t->count++;
307 t->ktable[i] = he;
308 return t->etable[i] = e;
309}
310
311/** Find an entry in a hashtable.
312 *
313 * Uses MATCH_cmp() to find the first matching entry in the table in the same
314 * hash() bucket.
315 *
316 * \param *t - The hashtable to search.
317 *
318 * \param *m - The key or match object to search for.
319 *
320 * \return The first found entry, or NULL if nothing was found. */
321static inline ENTRY_t *NAME_find(hashtable_t *t, MATCH_t *m)
322{
323 assert(m != NULL);
324 unsigned hm = _KEY_HASH(m);
325 ENTRY_t *e;
326
327 _stats_inc(t->find_count);
328# ifndef HASHTABLE_NBLOOM
329 if (!hashtable_getbloom(t, hm))
330 return NULL;
331# endif
332 _for_probe(t, hm, i, he) {
333 _stats_inc(t->hashcmp_count);
334 if (hm == he) {
335 _stats_inc(t->entrycmp_count);
336 if (!MATCH_cmp(m, e = t->etable[i])) {
337 _stats_inc(t->match_count);
338 return e;
339 }
340 }
341 }
342 /* Also count the compare for the empty bucket. */
343 _stats_inc(t->hashcmp_count);
344 return NULL;
345}
346
347static inline ENTRY_t *NAME_next(hashtable_t *t, int *i);
348
349/** Initialize a iteration and return the first entry.
350 *
351 * This works together with NAME_next() for iterating through all entries in a
352 * hashtable.
353 *
354 * Example: \code
355 * for (e = NAME_iter(t, &i); e != NULL; e = NAME_next(t, &i))
356 * ...
357 * \endcode
358 *
359 * \param *t - the hashtable to iterate over.
360 *
361 * \param *i - the int iterator index to initialize.
362 *
363 * \return The first entry or NULL if the hashtable is empty. */
364static inline ENTRY_t *NAME_iter(hashtable_t *t, int *i)
365{
366 assert(t != NULL);
367 assert(i != NULL);
368 *i = 0;
369 return NAME_next(t, i);
370}
371
372/** Get the next entry from a hashtable iterator or NULL when finished.
373 *
374 * This works together with NAME_iter() for iterating through all entries in a
375 * hashtable.
376 *
377 * \param *t - the hashtable to iterate over.
378 *
379 * \param *i - the int iterator index to use.
380 *
381 * \return The next entry or NULL if the iterator is finished. */
382static inline ENTRY_t *NAME_next(hashtable_t *t, int *i)
383{
384 assert(t != NULL);
385 assert(i != NULL);
386 ENTRY_t *e = NULL;
387
388 while ((*i < t->size) && !(e = t->etable[(*i)++])) ;
389 return e;
390}
391
392# undef ENTRY
393# undef KEY
394# undef MATCH
395# undef NAME
396# undef ENTRY_t
397# undef KEY_t
398# undef MATCH_t
399# undef KEY_hash
400# undef MATCH_cmp
401# undef NAME_new
402# undef NAME_free
403# undef NAME_stats_init
404# undef NAME_add
405# undef NAME_find
406# undef NAME_iter
407# undef NAME_next
408# undef _KEY_HASH
409#endif /* ENTRY */
#define ENTRY_t
The entry type.
Definition: hashtable.h:208
#define MATCH_cmp
The match cmp(m, e) method.
Definition: hashtable.h:212
struct hashtable hashtable_t
The hashtable type.
#define MATCH_t
The match type.
Definition: hashtable.h:210
static unsigned mix32(unsigned h)
MurmurHash3 finalization mix function.
Definition: hashtable.h:172
static unsigned nozero(unsigned h)
Ensure hash's are never zero.
Definition: hashtable.h:183
The hashtable type.
Definition: hashtable.h:130
long find_count
The count of finds tried.
Definition: hashtable.h:139
unsigned ktable[]
Table of hash keys.
Definition: hashtable.h:148
long match_count
The count of matches found.
Definition: hashtable.h:140
int count
Number of entries in hashtable.
Definition: hashtable.h:132
unsigned char * kbloom
Bloom filter of hash keys with k=1.
Definition: hashtable.h:145
long entrycmp_count
The count of entry compares done.
Definition: hashtable.h:142
int size
Size of allocated hashtable.
Definition: hashtable.h:131
unsigned tmask
Mask to get the hashtable index.
Definition: hashtable.h:133
void ** etable
Table of pointers to entries.
Definition: hashtable.h:147
long hashcmp_count
The count of hash compares done.
Definition: hashtable.h:141
unsigned bshift
Shift to get the bloomfilter index.
Definition: hashtable.h:135