1 | /* |
2 | * BSD-3-Clause |
3 | * |
4 | * Copyright 2021 Ozan Tezcan |
5 | * All rights reserved. |
6 | * |
7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions are met: |
9 | * |
10 | * 1. Redistributions of source code must retain the above copyright notice, |
11 | * this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright notice, |
13 | * this list of conditions and the following disclaimer in the documentation |
14 | * and/or other materials provided with the distribution. |
15 | * 3. Neither the name of the copyright holder nor the names of its contributors |
16 | * may be used to endorse or promote products derived from this software |
17 | * without specific prior written permission. |
18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
20 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
22 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
23 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, |
24 | * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT |
25 | * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ |
31 | |
32 | #include "sc_map.h" |
33 | |
34 | #include <string.h> |
35 | |
36 | #ifndef SC_MAP_MAX |
37 | #define SC_MAP_MAX UINT32_MAX |
38 | #endif |
39 | |
40 | #define sc_map_def_strkey(name, K, V, cmp, hash_fn) \ |
41 | bool sc_map_cmp_##name(struct sc_map_item_##name *t, K key, \ |
42 | uint32_t hash) \ |
43 | { \ |
44 | return t->hash == hash && cmp(t->key, key); \ |
45 | } \ |
46 | \ |
47 | void sc_map_assign_##name(struct sc_map_item_##name *t, K key, \ |
48 | V value, uint32_t hash) \ |
49 | { \ |
50 | t->key = key; \ |
51 | t->value = value; \ |
52 | t->hash = hash; \ |
53 | } \ |
54 | \ |
55 | uint32_t sc_map_hashof_##name(struct sc_map_item_##name *t) \ |
56 | { \ |
57 | return t->hash; \ |
58 | } \ |
59 | \ |
60 | sc_map_def(name, K, V, cmp, hash_fn) |
61 | |
62 | #define sc_map_def_scalar(name, K, V, cmp, hash_fn) \ |
63 | bool sc_map_cmp_##name(struct sc_map_item_##name *t, K key, \ |
64 | uint32_t hash) \ |
65 | { \ |
66 | (void) hash; \ |
67 | return cmp(t->key, key); \ |
68 | } \ |
69 | \ |
70 | void sc_map_assign_##name(struct sc_map_item_##name *t, K key, \ |
71 | V value, uint32_t hash) \ |
72 | { \ |
73 | (void) hash; \ |
74 | t->key = key; \ |
75 | t->value = value; \ |
76 | } \ |
77 | \ |
78 | uint32_t sc_map_hashof_##name(struct sc_map_item_##name *t) \ |
79 | { \ |
80 | return hash_fn(t->key); \ |
81 | } \ |
82 | \ |
83 | sc_map_def(name, K, V, cmp, hash_fn) |
84 | |
85 | #define sc_map_def(name, K, V, cmp, hash_fn) \ |
86 | \ |
87 | static const struct sc_map_item_##name empty_items_##name[2]; \ |
88 | \ |
89 | static const struct sc_map_##name sc_map_empty_##name = { \ |
90 | .cap = 1, \ |
91 | .mem = (struct sc_map_item_##name *) &empty_items_##name[1]}; \ |
92 | \ |
93 | static void *sc_map_alloc_##name(uint32_t *cap, uint32_t factor) \ |
94 | { \ |
95 | uint32_t v = *cap; \ |
96 | struct sc_map_item_##name *t; \ |
97 | \ |
98 | if (*cap > SC_MAP_MAX / factor) { \ |
99 | return NULL; \ |
100 | } \ |
101 | \ |
102 | /* Find next power of two */ \ |
103 | v = v < 8 ? 8 : (v * factor); \ |
104 | v--; \ |
105 | for (uint32_t i = 1; i < sizeof(v) * 8; i *= 2) { \ |
106 | v |= v >> i; \ |
107 | } \ |
108 | v++; \ |
109 | \ |
110 | *cap = v; \ |
111 | t = sc_map_calloc(sizeof(*t), v + 1); \ |
112 | return t ? &t[1] : NULL; \ |
113 | } \ |
114 | \ |
115 | bool sc_map_init_##name(struct sc_map_##name *m, uint32_t cap, \ |
116 | uint32_t load_fac) \ |
117 | { \ |
118 | void *t; \ |
119 | uint32_t f = (load_fac == 0) ? 75 : load_fac; \ |
120 | \ |
121 | if (f > 95 || f < 25) { \ |
122 | return false; \ |
123 | } \ |
124 | \ |
125 | if (cap == 0) { \ |
126 | *m = sc_map_empty_##name; \ |
127 | m->load_fac = f; \ |
128 | return true; \ |
129 | } \ |
130 | \ |
131 | t = sc_map_alloc_##name(&cap, 1); \ |
132 | if (t == NULL) { \ |
133 | return false; \ |
134 | } \ |
135 | \ |
136 | m->mem = t; \ |
137 | m->size = 0; \ |
138 | m->used = false; \ |
139 | m->cap = cap; \ |
140 | m->load_fac = f; \ |
141 | m->remap = (uint32_t) (m->cap * ((double) m->load_fac / 100)); \ |
142 | \ |
143 | return true; \ |
144 | } \ |
145 | \ |
146 | void sc_map_term_##name(struct sc_map_##name *m) \ |
147 | { \ |
148 | if (m->mem != sc_map_empty_##name.mem) { \ |
149 | sc_map_free(&m->mem[-1]); \ |
150 | *m = sc_map_empty_##name; \ |
151 | } \ |
152 | } \ |
153 | \ |
154 | uint32_t sc_map_size_##name(struct sc_map_##name *m) \ |
155 | { \ |
156 | return m->size; \ |
157 | } \ |
158 | \ |
159 | void sc_map_clear_##name(struct sc_map_##name *m) \ |
160 | { \ |
161 | if (m->size > 0) { \ |
162 | for (uint32_t i = 0; i < m->cap; i++) { \ |
163 | m->mem[i].key = 0; \ |
164 | } \ |
165 | \ |
166 | m->used = false; \ |
167 | m->size = 0; \ |
168 | } \ |
169 | } \ |
170 | \ |
171 | static bool sc_map_remap_##name(struct sc_map_##name *m) \ |
172 | { \ |
173 | uint32_t pos, cap, mod; \ |
174 | struct sc_map_item_##name *new; \ |
175 | \ |
176 | if (m->size < m->remap) { \ |
177 | return true; \ |
178 | } \ |
179 | \ |
180 | cap = m->cap; \ |
181 | new = sc_map_alloc_##name(&cap, 2); \ |
182 | if (new == NULL) { \ |
183 | return false; \ |
184 | } \ |
185 | \ |
186 | mod = cap - 1; \ |
187 | \ |
188 | for (uint32_t i = 0; i < m->cap; i++) { \ |
189 | if (m->mem[i].key != 0) { \ |
190 | pos = sc_map_hashof_##name(&m->mem[i]) & mod; \ |
191 | \ |
192 | while (true) { \ |
193 | if (new[pos].key == 0) { \ |
194 | new[pos] = m->mem[i]; \ |
195 | break; \ |
196 | } \ |
197 | \ |
198 | pos = (pos + 1) & (mod); \ |
199 | } \ |
200 | } \ |
201 | } \ |
202 | \ |
203 | if (m->mem != sc_map_empty_##name.mem) { \ |
204 | new[-1] = m->mem[-1]; \ |
205 | sc_map_free(&m->mem[-1]); \ |
206 | } \ |
207 | \ |
208 | m->mem = new; \ |
209 | m->cap = cap; \ |
210 | m->remap = (uint32_t) (m->cap * ((double) m->load_fac / 100)); \ |
211 | \ |
212 | return true; \ |
213 | } \ |
214 | \ |
215 | V sc_map_put_##name(struct sc_map_##name *m, K key, V value) \ |
216 | { \ |
217 | V ret; \ |
218 | uint32_t pos, mod, h; \ |
219 | \ |
220 | m->oom = false; \ |
221 | \ |
222 | if (!sc_map_remap_##name(m)) { \ |
223 | m->oom = true; \ |
224 | return 0; \ |
225 | } \ |
226 | \ |
227 | if (key == 0) { \ |
228 | ret = (m->used) ? m->mem[-1].value : 0; \ |
229 | m->found = m->used; \ |
230 | m->size += !m->used; \ |
231 | m->used = true; \ |
232 | m->mem[-1].value = value; \ |
233 | \ |
234 | return ret; \ |
235 | } \ |
236 | \ |
237 | mod = m->cap - 1; \ |
238 | h = hash_fn(key); \ |
239 | pos = h & (mod); \ |
240 | \ |
241 | while (true) { \ |
242 | if (m->mem[pos].key == 0) { \ |
243 | m->size++; \ |
244 | } else if (!sc_map_cmp_##name(&m->mem[pos], key, h)) { \ |
245 | pos = (pos + 1) & (mod); \ |
246 | continue; \ |
247 | } \ |
248 | \ |
249 | m->found = m->mem[pos].key != 0; \ |
250 | ret = m->found ? m->mem[pos].value : 0; \ |
251 | sc_map_assign_##name(&m->mem[pos], key, value, h); \ |
252 | \ |
253 | return ret; \ |
254 | } \ |
255 | } \ |
256 | \ |
257 | /** NOLINTNEXTLINE */ \ |
258 | V sc_map_get_##name(struct sc_map_##name *m, K key) \ |
259 | { \ |
260 | const uint32_t mod = m->cap - 1; \ |
261 | uint32_t h, pos; \ |
262 | \ |
263 | if (key == 0) { \ |
264 | m->found = m->used; \ |
265 | return m->used ? m->mem[-1].value : 0; \ |
266 | } \ |
267 | \ |
268 | h = hash_fn(key); \ |
269 | pos = h & mod; \ |
270 | \ |
271 | while (true) { \ |
272 | if (m->mem[pos].key == 0) { \ |
273 | m->found = false; \ |
274 | return 0; \ |
275 | } else if (!sc_map_cmp_##name(&m->mem[pos], key, h)) { \ |
276 | pos = (pos + 1) & (mod); \ |
277 | continue; \ |
278 | } \ |
279 | \ |
280 | m->found = true; \ |
281 | return m->mem[pos].value; \ |
282 | } \ |
283 | } \ |
284 | \ |
285 | /** NOLINTNEXTLINE */ \ |
286 | V sc_map_del_##name(struct sc_map_##name *m, K key) \ |
287 | { \ |
288 | const uint32_t mod = m->cap - 1; \ |
289 | uint32_t pos, prev, it, p, h; \ |
290 | V ret; \ |
291 | \ |
292 | if (key == 0) { \ |
293 | m->found = m->used; \ |
294 | m->size -= m->used; \ |
295 | m->used = false; \ |
296 | \ |
297 | return m->found ? m->mem[-1].value : 0; \ |
298 | } \ |
299 | \ |
300 | h = hash_fn(key); \ |
301 | pos = h & (mod); \ |
302 | \ |
303 | while (true) { \ |
304 | if (m->mem[pos].key == 0) { \ |
305 | m->found = false; \ |
306 | return 0; \ |
307 | } else if (!sc_map_cmp_##name(&m->mem[pos], key, h)) { \ |
308 | pos = (pos + 1) & (mod); \ |
309 | continue; \ |
310 | } \ |
311 | \ |
312 | m->found = true; \ |
313 | ret = m->mem[pos].value; \ |
314 | \ |
315 | m->size--; \ |
316 | m->mem[pos].key = 0; \ |
317 | prev = pos; \ |
318 | it = pos; \ |
319 | \ |
320 | while (true) { \ |
321 | it = (it + 1) & (mod); \ |
322 | if (m->mem[it].key == 0) { \ |
323 | break; \ |
324 | } \ |
325 | \ |
326 | p = sc_map_hashof_##name(&m->mem[it]) & (mod); \ |
327 | \ |
328 | if ((p > it && (p <= prev || it >= prev)) || \ |
329 | (p <= prev && it >= prev)) { \ |
330 | \ |
331 | m->mem[prev] = m->mem[it]; \ |
332 | m->mem[it].key = 0; \ |
333 | prev = it; \ |
334 | } \ |
335 | } \ |
336 | \ |
337 | return ret; \ |
338 | } \ |
339 | } |
340 | |
341 | static uint32_t sc_map_hash_32(uint32_t a) |
342 | { |
343 | return a; |
344 | } |
345 | |
346 | static uint32_t sc_map_hash_64(uint64_t a) |
347 | { |
348 | return ((uint32_t) a) ^ (uint32_t) (a >> 32u); |
349 | } |
350 | |
351 | // clang-format off |
352 | uint32_t murmurhash(const char *key) |
353 | { |
354 | const uint64_t m = UINT64_C(0xc6a4a7935bd1e995); |
355 | const size_t len = strlen(key); |
356 | const unsigned char* p = (const unsigned char*) key; |
357 | const unsigned char *end = p + (len & ~(uint64_t) 0x7); |
358 | uint64_t h = (len * m); |
359 | |
360 | while (p != end) { |
361 | uint64_t k; |
362 | memcpy(&k, p, sizeof(k)); |
363 | |
364 | k *= m; |
365 | k ^= k >> 47u; |
366 | k *= m; |
367 | |
368 | h ^= k; |
369 | h *= m; |
370 | p += 8; |
371 | } |
372 | |
373 | switch (len & 7u) { |
374 | case 7: h ^= (uint64_t) p[6] << 48ul; // fall through |
375 | case 6: h ^= (uint64_t) p[5] << 40ul; // fall through |
376 | case 5: h ^= (uint64_t) p[4] << 32ul; // fall through |
377 | case 4: h ^= (uint64_t) p[3] << 24ul; // fall through |
378 | case 3: h ^= (uint64_t) p[2] << 16ul; // fall through |
379 | case 2: h ^= (uint64_t) p[1] << 8ul; // fall through |
380 | case 1: h ^= (uint64_t) p[0]; // fall through |
381 | h *= m; |
382 | default: |
383 | break; |
384 | } |
385 | |
386 | h ^= h >> 47u; |
387 | h *= m; |
388 | h ^= h >> 47u; |
389 | |
390 | return (uint32_t) h; |
391 | } |
392 | |
393 | #define sc_map_eq(a, b) ((a) == (b)) |
394 | #define sc_map_streq(a, b) (!strcmp(a, b)) |
395 | |
396 | // integer keys: name, key type, value type, cmp hash |
397 | sc_map_def_scalar(int, int, int, sc_map_eq, sc_map_hash_64) |
398 | sc_map_def_scalar(intv, int, void *, sc_map_eq, sc_map_hash_64) |
399 | sc_map_def_scalar(ints, int, const char *, sc_map_eq, sc_map_hash_64) |
400 | sc_map_def_scalar(ll, long long, long long, sc_map_eq, sc_map_hash_64) |
401 | sc_map_def_scalar(llv, long long, void *, sc_map_eq, sc_map_hash_64) |
402 | sc_map_def_scalar(lls, long long, const char *, sc_map_eq, sc_map_hash_64) |
403 | sc_map_def_scalar(32, uint32_t, uint32_t, sc_map_eq, sc_map_hash_32) |
404 | sc_map_def_scalar(64, uint64_t, uint64_t, sc_map_eq, sc_map_hash_64) |
405 | sc_map_def_scalar(64v, uint64_t, void *, sc_map_eq, sc_map_hash_64) |
406 | sc_map_def_scalar(64s, uint64_t, const char *, sc_map_eq, sc_map_hash_64) |
407 | |
408 | // string keys: name key type value type cmp hash |
409 | sc_map_def_strkey(str, const char *, const char *, sc_map_streq, murmurhash) |
410 | sc_map_def_strkey(sv, const char *, void *, sc_map_streq, murmurhash) |
411 | sc_map_def_strkey(s64, const char *, uint64_t, sc_map_streq, murmurhash) |
412 | sc_map_def_strkey(sll, const char *, long long, sc_map_streq, murmurhash) |
413 | |
414 | // clang-format on |
415 | |