Initial import
[project/ufp.git] / src / uht.c
1 #include <sys/stat.h>
2 #include <sys/mman.h>
3 #include <string.h>
4 #include <stdlib.h>
5 #include <fcntl.h>
6
7 #include <libubox/utils.h>
8
9 #include "xxhash32.h"
10 #include "uht.h"
11
12 #define ALIGN_OFS(ofs) ((ofs + UHT_ALIGN_MASK) & ~UHT_ALIGN_MASK)
13
14 #define UHT_HASHTBL_SIZE_SHIFT 5
15 #define UHT_HASHTBL_ORDER_MASK ((1 << UHT_HASHTBL_SIZE_SHIFT) - 1)
16
17 #define UHT_HASHTBL_KEY_FLAG_FIRST 1
18
19 struct uht_file_hdr {
20 uint8_t version;
21 uint8_t _pad[3];
22 uint32_t val;
23 };
24
25 struct uht_key {
26 uint32_t entry;
27 uint32_t len;
28 };
29
30 struct uht_entry {
31 struct avl_node node;
32 struct uht_key key;
33 };
34
35 struct uht_hashtbl_meta {
36 uint32_t *ht;
37 uint32_t *ht_slot;
38 uint32_t *ht_entry;
39 uint32_t elements;
40 uint8_t order;
41 };
42
43 static int
44 uht_key_comp(const void *k1, const void *k2, void *ptr)
45 {
46 const struct uht_key *key1 = k1, *key2 = k2;
47 struct uht_writer *wr = ptr;
48
49 if (key1->len != key2->len)
50 return key1->len - key2->len;
51
52 return memcmp(uht_entry_ptr(wr->buf, key1->entry),
53 uht_entry_ptr(wr->buf, key2->entry), key1->len);
54 }
55
56 static uint32_t
57 uht_writer_check_insert(struct uht_writer *wr, struct uht_key *key)
58 {
59 struct uht_entry *entry;
60
61 entry = avl_find_element(&wr->data, key, entry, node);
62 if (entry)
63 return entry->key.entry;
64
65 entry = calloc(1, sizeof(*entry));
66 entry->node.key = &entry->key;
67 entry->key = *key;
68 avl_insert(&wr->data, &entry->node);
69 wr->buf_ofs += key->len;
70
71 return key->entry;
72 }
73
74 void uht_writer_init(struct uht_writer *wr)
75 {
76 avl_init(&wr->data, uht_key_comp, false, wr);
77 wr->buf_len = 256;
78 wr->buf = calloc(1, wr->buf_len);
79 wr->buf_ofs = sizeof(struct uht_file_hdr);
80 }
81
82 static void *
83 __uht_writer_alloc(struct uht_writer *wr, struct uht_key *key, size_t size)
84 {
85 void *ret;
86
87 if (size >= (1 << 24) || wr->buf_ofs + size >= (1 << 30))
88 return NULL;
89
90 size = ALIGN_OFS(size);
91 while (wr->buf_ofs + size > wr->buf_len) {
92 wr->buf_len <<= 1;
93 wr->buf = realloc(wr->buf, wr->buf_len);
94 }
95
96 key->len = size;
97 key->entry = wr->buf_ofs << (UHT_TYPE_BITS - UHT_ALIGN_BITS);
98 ret = wr->buf + wr->buf_ofs;
99
100 return ret;
101 }
102
103 static void *
104 uht_writer_alloc(struct uht_writer *wr, struct uht_key *key, size_t size)
105 {
106 void *ret = __uht_writer_alloc(wr, key, size);
107 wr->buf_ofs += size;
108 return ret;
109 }
110
111 static uint32_t
112 uht_writer_add_generic(struct uht_writer *wr, const void *data, size_t len)
113 {
114 struct uht_key key;
115
116 memcpy(__uht_writer_alloc(wr, &key, len), data, len);
117 return uht_writer_check_insert(wr, &key);
118 }
119
120 static int uht_hashtbl_get_meta(struct uht_hashtbl_meta *meta, void *buf, size_t len,
121 uint32_t attr)
122 {
123 uint32_t val;
124
125 meta->ht = buf + uht_entry_offset(attr);
126 val = cpu_to_le32(*meta->ht);
127 meta->order = val & UHT_HASHTBL_ORDER_MASK;
128 meta->elements = val >> UHT_HASHTBL_SIZE_SHIFT;
129 meta->ht_slot = meta->ht + 1;
130 meta->ht_entry = meta->ht_slot + (1 << meta->order);
131 if ((void *)&meta->ht_entry[2 * meta->elements] > buf + len)
132 return -1;
133
134 return 0;
135 }
136
137 uint32_t uht_writer_hashtbl_alloc(struct uht_writer *wr, size_t n_members)
138 {
139 struct uht_key key;
140 uint32_t *ht, ht_size;
141 uint8_t order = 2;
142
143 if (n_members >= 1 << 24)
144 return 0;
145
146 while (n_members > 1U << order)
147 order++;
148
149 ht_size = 4 + (4 << order) + 8 * n_members;
150 ht = uht_writer_alloc(wr, &key, ht_size);
151 if (!ht)
152 return 0;
153
154 memset(ht, 0, ht_size);
155 *ht = order;
156
157 return key.entry | UHT_HASHTBL;
158 }
159
160
161 void uht_writer_hashtbl_add_element(struct uht_writer *wr, uint32_t hashtbl,
162 const char *key, uint32_t val)
163 {
164 struct uht_hashtbl_meta meta = {};
165 uint32_t key_attr = uht_writer_add_string(wr, key);
166 uint32_t *ht_next;
167
168 if (uht_hashtbl_get_meta(&meta, wr->buf, wr->buf_ofs, hashtbl))
169 return;
170
171 ht_next = &meta.ht_entry[2 * meta.elements];
172 *meta.ht = cpu_to_le32(le32_to_cpu(*meta.ht) + (1 << UHT_HASHTBL_SIZE_SHIFT));
173 ht_next[0] = cpu_to_le32(key_attr);
174 ht_next[1] = cpu_to_le32(val);
175 }
176
177 static uint32_t
178 uht_hashtbl_key_slot(const char *key, uint8_t order)
179 {
180 uint32_t mask = (1 << order) - 1;
181
182 return XXH32(key, strlen(key), 0) & mask;
183 }
184
185
186 static uint32_t
187 uht_hashtbl_entry_key_slot(struct uht_writer *wr, uint32_t k, uint8_t order)
188 {
189 return uht_hashtbl_key_slot(uht_entry_ptr(wr->buf, k), order);
190 }
191
192 struct uht_writer *__sort_wr;
193 static uint8_t __sort_order;
194 static int __entry_sort_fn(const void *a1, const void *a2)
195 {
196 struct uht_writer *wr = __sort_wr;
197 const uint32_t *k1 = a1, *k2 = a2;
198 uint32_t slot1 = uht_hashtbl_entry_key_slot(wr, le32_to_cpu(*k1), __sort_order);
199 uint32_t slot2 = uht_hashtbl_entry_key_slot(wr, le32_to_cpu(*k2), __sort_order);
200
201 return slot1 - slot2;
202 }
203
204 void uht_writer_hashtbl_done(struct uht_writer *wr, uint32_t hashtbl)
205 {
206 struct uht_hashtbl_meta meta = {};
207 uint32_t last_slot = ~0;
208
209 if (uht_hashtbl_get_meta(&meta, wr->buf, wr->buf_ofs, hashtbl))
210 return;
211
212 __sort_wr = wr;
213 __sort_order = meta.order;
214 qsort(meta.ht_entry, meta.elements, 8, __entry_sort_fn);
215 for (size_t i = 0; i < meta.elements; i++) {
216 uint32_t key_attr = cpu_to_le32(meta.ht_entry[2 * i]);
217 uint32_t slot = uht_hashtbl_entry_key_slot(wr, key_attr, __sort_order);
218 meta.ht_entry[2 * i] &= ~cpu_to_le32(UHT_TYPE_MASK);
219 if (slot != last_slot)
220 meta.ht_entry[2 * i] |= cpu_to_le32(UHT_HASHTBL_KEY_FLAG_FIRST);
221 meta.ht_slot[slot] = cpu_to_le32(i);
222 last_slot = slot;
223 }
224 }
225
226 uint32_t uht_writer_add_array(struct uht_writer *wr, uint32_t *values, size_t n)
227 {
228 struct uht_key key;
229 uint32_t *data;
230
231 data = __uht_writer_alloc(wr, &key, 4 + n * 4);
232 *(data++) = cpu_to_le32(n);
233 for (size_t i = 0; i < n; i++)
234 *(data++) = cpu_to_le32(values[i]);
235
236 return uht_writer_check_insert(wr, &key) | UHT_ARRAY;
237 }
238
239 uint32_t uht_writer_add_object(struct uht_writer *wr, uint32_t *keys,
240 uint32_t *values, size_t n)
241 {
242 struct uht_key key;
243 uint32_t *data;
244
245 data = __uht_writer_alloc(wr, &key, 4 + n * 8);
246 *(data++) = cpu_to_le32(n);
247 for (size_t i = 0; i < n; i++) {
248 *(data++) = cpu_to_le32(keys[i]);
249 *(data++) = cpu_to_le32(values[i]);
250 }
251 return uht_writer_check_insert(wr, &key) | UHT_OBJECT;
252 }
253
254 uint32_t uht_writer_add_string(struct uht_writer *wr, const char *val)
255 {
256 return uht_writer_add_generic(wr, val, strlen(val) + 1) | UHT_STRING;
257 }
258
259 uint32_t uht_writer_add_double(struct uht_writer *wr, double val)
260 {
261 union {
262 double d;
263 uint64_t u64;
264 } v = {
265 .d = val
266 };
267 v.u64 = cpu_to_le64(v.u64);
268 return uht_writer_add_generic(wr, &v.u64, 8) | UHT_DOUBLE;
269 }
270
271 uint32_t uht_writer_add_int(struct uht_writer *wr, int64_t val)
272 {
273 val = cpu_to_le64(val);
274 return uht_writer_add_generic(wr, &val, 8) | UHT_INT;
275 }
276
277 int uht_writer_save(struct uht_writer *wr, FILE *out, uint32_t val)
278 {
279 struct uht_file_hdr *hdr = wr->buf;
280
281 hdr->val = val;
282
283 if (fwrite(wr->buf, 1, wr->buf_ofs, out) != wr->buf_ofs)
284 return -1;
285
286 return 0;
287 }
288
289 void uht_writer_free(struct uht_writer *wr)
290 {
291 struct uht_entry *e, *tmp;
292
293 avl_remove_all_elements(&wr->data, e, node, tmp)
294 free(e);
295 free(wr->buf);
296 memset(wr, 0, sizeof(*wr));
297 }
298
299 static inline uint32_t
300 __uht_iter_fetch(struct uht_reader_iter *iter)
301 {
302 return le32_to_cpu(*(iter->__data++));
303 }
304
305 struct uht_reader_iter
306 __uht_object_iter_init(struct uht_reader *r, uint32_t attr)
307 {
308 struct uht_reader_iter iter = {
309 .type = uht_entry_type(attr),
310 };
311
312 switch (iter.type) {
313 case UHT_HASHTBL:
314 iter.__data = uht_entry_ptr(r->data, attr);
315 iter.size = __uht_iter_fetch(&iter);
316 iter.__data += 1 << (iter.size & UHT_HASHTBL_ORDER_MASK);
317 iter.size >>= UHT_HASHTBL_SIZE_SHIFT;
318 break;
319 case UHT_ARRAY:
320 case UHT_OBJECT:
321 iter.__data = uht_entry_ptr(r->data, attr);
322 iter.size = __uht_iter_fetch(&iter);
323 break;
324 default:
325 break;
326 }
327
328 if (iter.index < iter.size)
329 __uht_object_iter_next(r, &iter);
330
331 return iter;
332 }
333
334 void __uht_object_iter_next(struct uht_reader *r, struct uht_reader_iter *iter)
335 {
336 if (iter->type != UHT_ARRAY) {
337 uint32_t key = __uht_iter_fetch(iter);
338 key &= ~UHT_TYPE_MASK;
339 key |= UHT_STRING;
340 iter->key = uht_reader_get_string(r, key);
341 }
342 iter->val = __uht_iter_fetch(iter);
343 }
344
345 uint32_t uht_reader_hashtbl_lookup(struct uht_reader *r, uint32_t hashtbl,
346 const char *key)
347 {
348 uint32_t *ht, *ht_end, val, slot, size, offset;
349 size_t key_len = strlen(key);
350 int32_t entry;
351 uint8_t order;
352
353 if (!uht_entry_valid(r->len, hashtbl))
354 return 0;
355
356 ht = uht_entry_ptr(r->data, hashtbl);
357 val = le32_to_cpu(*ht);
358 order = val & UHT_HASHTBL_ORDER_MASK;
359 size = val >> UHT_HASHTBL_SIZE_SHIFT;
360 offset = (1 << order) + 2 * size;
361 ht_end = ht + offset;
362 offset <<= 2 + UHT_TYPE_BITS - UHT_ALIGN_BITS;
363
364 if (!uht_entry_valid(r->len, hashtbl + offset))
365 return 0;
366
367 ht++;
368 slot = uht_hashtbl_key_slot(key, order);
369 if (ht + slot >= ht_end)
370 return 0;
371
372 entry = le32_to_cpu(ht[slot]) * 2;
373 if ((uint32_t)entry > 2 * size)
374 return 0;
375
376 ht += 1 << order;
377 while (entry >= 0) {
378 uint32_t cur_entry = le32_to_cpu(ht[entry]);
379 const char *cur_key;
380 size_t off;
381
382 cur_entry &= ~UHT_TYPE_MASK;
383 cur_entry |= UHT_STRING;
384 if (!uht_entry_valid(r->len, cur_entry))
385 return 0;
386
387 cur_key = uht_reader_get_string(r, cur_entry);
388 off = cur_key - (const char *)r->data;
389 if (off + key_len >= r->len)
390 return 0;
391
392 if (!strncmp(key, cur_key, key_len + 1)) {
393 cur_entry = le32_to_cpu(ht[entry + 1]);
394 if (!uht_entry_valid(r->len, cur_entry))
395 return 0;
396
397 return cur_entry;
398 }
399
400 if (ht[entry] & cpu_to_le32(UHT_HASHTBL_KEY_FLAG_FIRST))
401 return 0;
402
403 entry -= 2;
404 }
405
406 return 0;
407 }
408
409 int uht_reader_open(struct uht_reader *r, const char *file)
410 {
411 const struct uht_file_hdr *hdr;
412 struct stat st;
413 int fd;
414
415 fd = open(file, O_RDONLY);
416 if (fd < 0)
417 return -1;
418
419 if (fstat(fd, &st) < 0)
420 goto close_fd;
421
422 if (st.st_size < (off_t)sizeof(struct uht_file_hdr))
423 goto close_fd;
424
425 r->fd = fd;
426 r->data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
427 r->len = st.st_size;
428 hdr = r->data;
429 r->val = hdr->val;
430
431 return 0;
432
433 close_fd:
434 close(fd);
435 return -1;
436 }
437
438 void uht_reader_close(struct uht_reader *r)
439 {
440 munmap(r->data, r->len);
441 close(r->fd);
442 }