File: | rpmio/rpmstrpool.c |
Warning: | line 181, column 5 Value stored to 'ht' is never read |
1 | #include "system.h" |
2 | #include <stdio.h> |
3 | #include <stdlib.h> |
4 | #include <rpm/rpmstring.h> |
5 | #include <rpm/rpmstrpool.h> |
6 | #include "debug.h" |
7 | |
8 | #define STRDATA_CHUNKS1024 1024 |
9 | #define STRDATA_CHUNK65536 65536 |
10 | #define STROFFS_CHUNK2048 2048 |
11 | /* XXX this is ridiculously small... */ |
12 | #define STRHASH_INITSIZE1024 1024 |
13 | |
14 | static int pool_debug = 0; |
15 | |
16 | typedef struct poolHash_s * poolHash; |
17 | typedef struct poolHashBucket_s poolHashBucket; |
18 | |
19 | struct poolHashBucket_s { |
20 | rpmsid keyid; |
21 | }; |
22 | |
23 | struct poolHash_s { |
24 | int numBuckets; |
25 | poolHashBucket * buckets; |
26 | int keyCount; |
27 | }; |
28 | |
29 | struct rpmstrPool_s { |
30 | const char ** offs; /* pointers into data area */ |
31 | rpmsid offs_size; /* largest offset index */; |
32 | rpmsid offs_alloced; /* offsets allocation size */ |
33 | |
34 | char ** chunks; /* memory chunks for storing the strings */ |
35 | size_t chunks_size; /* current chunk */ |
36 | size_t chunks_allocated; /* allocated size of the chunks array */ |
37 | size_t chunk_allocated; /* size of the current chunk */ |
38 | size_t chunk_used; /* usage of the current chunk */ |
39 | |
40 | poolHash hash; /* string -> sid hash table */ |
41 | int frozen; /* are new id additions allowed? */ |
42 | int nrefs; /* refcount */ |
43 | }; |
44 | |
45 | /* calculate hash and string length on at once */ |
46 | static inline unsigned int rstrlenhash(const char * str, size_t * len) |
47 | { |
48 | /* Jenkins One-at-a-time hash */ |
49 | unsigned int hash = 0xe4721b68; |
50 | const char * s = str; |
51 | |
52 | while (*s != '\0') { |
53 | hash += *s; |
54 | hash += (hash << 10); |
55 | hash ^= (hash >> 6); |
56 | s++; |
57 | } |
58 | hash += (hash << 3); |
59 | hash ^= (hash >> 11); |
60 | hash += (hash << 15); |
61 | |
62 | if (len) |
63 | *len = (s - str); |
64 | |
65 | return hash; |
66 | } |
67 | |
68 | static inline unsigned int rstrnlenhash(const char * str, size_t n, size_t * len) |
69 | { |
70 | /* Jenkins One-at-a-time hash */ |
71 | unsigned int hash = 0xe4721b68; |
72 | const char * s = str; |
73 | |
74 | while (*s != '\0' && n-- > 0) { |
75 | hash += *s; |
76 | hash += (hash << 10); |
77 | hash ^= (hash >> 6); |
78 | s++; |
79 | } |
80 | hash += (hash << 3); |
81 | hash ^= (hash >> 11); |
82 | hash += (hash << 15); |
83 | |
84 | if (len) |
85 | *len = (s - str); |
86 | |
87 | return hash; |
88 | } |
89 | |
90 | static inline unsigned int rstrnhash(const char * string, size_t n) |
91 | { |
92 | return rstrnlenhash(string, n, NULL((void*)0)); |
93 | } |
94 | |
95 | unsigned int rstrhash(const char * string) |
96 | { |
97 | return rstrlenhash(string, NULL((void*)0)); |
98 | } |
99 | |
100 | static inline unsigned int hashbucket(unsigned int hash, unsigned int number) |
101 | { |
102 | return hash + number*number; |
103 | } |
104 | |
105 | static poolHash poolHashCreate(int numBuckets) |
106 | { |
107 | poolHash ht; |
108 | |
109 | ht = xmalloc(sizeof(*ht))rmalloc((sizeof(*ht))); |
110 | ht->numBuckets = numBuckets; |
111 | ht->buckets = xcalloc(numBuckets, sizeof(*ht->buckets))rcalloc((numBuckets), (sizeof(*ht->buckets))); |
112 | ht->keyCount = 0; |
113 | return ht; |
114 | } |
115 | |
116 | static void poolHashResize(rpmstrPool pool, int numBuckets) |
117 | { |
118 | poolHash ht = pool->hash; |
119 | poolHashBucket * buckets = xcalloc(numBuckets, sizeof(*ht->buckets))rcalloc((numBuckets), (sizeof(*ht->buckets))); |
120 | |
121 | for (int i=0; i<ht->numBuckets; i++) { |
122 | if (!ht->buckets[i].keyid) continue; |
123 | unsigned int keyHash = rstrhash(rpmstrPoolStr(pool, ht->buckets[i].keyid)); |
124 | for (unsigned int j=0;;j++) { |
125 | unsigned int hash = hashbucket(keyHash, j) % numBuckets; |
126 | if (!buckets[hash].keyid) { |
127 | buckets[hash].keyid = ht->buckets[i].keyid; |
128 | break; |
129 | } |
130 | } |
131 | } |
132 | free((void *)(ht->buckets)); |
133 | ht->buckets = buckets; |
134 | ht->numBuckets = numBuckets; |
135 | } |
136 | |
137 | static void poolHashAddHEntry(rpmstrPool pool, const char * key, unsigned int keyHash, rpmsid keyid) |
138 | { |
139 | poolHash ht = pool->hash; |
140 | |
141 | /* keep load factor between 0.25 and 0.5 */ |
142 | if (2*(ht->keyCount) > ht->numBuckets) { |
143 | poolHashResize(pool, ht->numBuckets * 2); |
144 | } |
145 | |
146 | for (unsigned int i=0;;i++) { |
147 | unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets; |
148 | if (!ht->buckets[hash].keyid) { |
149 | ht->buckets[hash].keyid = keyid; |
150 | ht->keyCount++; |
151 | break; |
152 | } else if (!strcmp(rpmstrPoolStr(pool, ht->buckets[hash].keyid), key)) { |
153 | return; |
154 | } |
155 | } |
156 | } |
157 | |
158 | static void poolHashAddEntry(rpmstrPool pool, const char * key, rpmsid keyid) |
159 | { |
160 | poolHashAddHEntry(pool, key, rstrhash(key), keyid); |
161 | } |
162 | |
163 | static void poolHashEmpty( poolHash ht) |
164 | { |
165 | unsigned int i; |
166 | |
167 | if (ht->keyCount == 0) return; |
168 | |
169 | for (i = 0; i < ht->numBuckets; i++) { |
170 | ht->buckets[i].keyid = 0; |
171 | } |
172 | ht->keyCount = 0; |
173 | } |
174 | |
175 | static poolHash poolHashFree(poolHash ht) |
176 | { |
177 | if (ht==NULL((void*)0)) |
178 | return ht; |
179 | poolHashEmpty(ht); |
180 | ht->buckets = _free(ht->buckets)rfree((ht->buckets)); |
181 | ht = _free(ht)rfree((ht)); |
Value stored to 'ht' is never read | |
182 | |
183 | return NULL((void*)0); |
184 | } |
185 | |
186 | static void poolHashPrintStats(rpmstrPool pool) |
187 | { |
188 | poolHash ht = pool->hash; |
189 | int i; |
190 | int collisions = 0; |
191 | int maxcollisions = 0; |
192 | |
193 | for (i=0; i<ht->numBuckets; i++) { |
194 | unsigned int keyHash = rstrhash(rpmstrPoolStr(pool, ht->buckets[i].keyid)); |
195 | for (unsigned int j=0;;j++) { |
196 | unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets; |
197 | if (hash==i) { |
198 | collisions += j; |
199 | maxcollisions = maxcollisions > j ? maxcollisions : j; |
200 | break; |
201 | } |
202 | } |
203 | } |
204 | fprintf(stderrstderr, "Hashsize: %i\n", ht->numBuckets); |
205 | fprintf(stderrstderr, "Keys: %i\n", ht->keyCount); |
206 | fprintf(stderrstderr, "Collisions: %i\n", collisions); |
207 | fprintf(stderrstderr, "Max collisions: %i\n", maxcollisions); |
208 | } |
209 | |
210 | static void rpmstrPoolRehash(rpmstrPool pool) |
211 | { |
212 | int sizehint; |
213 | |
214 | if (pool->offs_size < STRHASH_INITSIZE1024) |
215 | sizehint = STRHASH_INITSIZE1024; |
216 | else |
217 | sizehint = pool->offs_size * 2; |
218 | |
219 | if (pool->hash) |
220 | pool->hash = poolHashFree(pool->hash); |
221 | |
222 | pool->hash = poolHashCreate(sizehint); |
223 | for (int i = 1; i <= pool->offs_size; i++) |
224 | poolHashAddEntry(pool, rpmstrPoolStr(pool, i), i); |
225 | } |
226 | |
227 | rpmstrPool rpmstrPoolCreate(void) |
228 | { |
229 | rpmstrPool pool = xcalloc(1, sizeof(*pool))rcalloc((1), (sizeof(*pool))); |
230 | |
231 | pool->offs_alloced = STROFFS_CHUNK2048; |
232 | pool->offs = xcalloc(pool->offs_alloced, sizeof(*pool->offs))rcalloc((pool->offs_alloced), (sizeof(*pool->offs))); |
233 | |
234 | pool->chunks_allocated = STRDATA_CHUNKS1024; |
235 | pool->chunks = xcalloc(pool->chunks_allocated, sizeof(*pool->chunks))rcalloc((pool->chunks_allocated), (sizeof(*pool->chunks ))); |
236 | pool->chunks_size = 1; |
237 | pool->chunk_allocated = STRDATA_CHUNK65536; |
238 | pool->chunks[pool->chunks_size] = xcalloc(1, pool->chunk_allocated)rcalloc((1), (pool->chunk_allocated)); |
239 | pool->offs[1] = pool->chunks[pool->chunks_size]; |
240 | |
241 | rpmstrPoolRehash(pool); |
242 | pool->nrefs = 1; |
243 | return pool; |
244 | } |
245 | |
246 | rpmstrPool rpmstrPoolFree(rpmstrPool pool) |
247 | { |
248 | if (pool) { |
249 | if (pool->nrefs > 1) { |
250 | pool->nrefs--; |
251 | } else { |
252 | if (pool_debug) |
253 | poolHashPrintStats(pool); |
254 | poolHashFree(pool->hash); |
255 | free(pool->offs); |
256 | for (int i=1;i<=pool->chunks_size;i++) { |
257 | pool->chunks[i] = _free(pool->chunks[i])rfree((pool->chunks[i])); |
258 | } |
259 | free(pool->chunks); |
260 | free(pool); |
261 | } |
262 | } |
263 | return NULL((void*)0); |
264 | } |
265 | |
266 | rpmstrPool rpmstrPoolLink(rpmstrPool pool) |
267 | { |
268 | if (pool) |
269 | pool->nrefs++; |
270 | return pool; |
271 | } |
272 | |
273 | void rpmstrPoolFreeze(rpmstrPool pool, int keephash) |
274 | { |
275 | if (pool && !pool->frozen) { |
276 | if (!keephash) { |
277 | pool->hash = poolHashFree(pool->hash); |
278 | } |
279 | pool->offs_alloced = pool->offs_size + 2; /* space for end marker */ |
280 | pool->offs = xrealloc(pool->offs,rrealloc((pool->offs), (pool->offs_alloced * sizeof(*pool ->offs))) |
281 | pool->offs_alloced * sizeof(*pool->offs))rrealloc((pool->offs), (pool->offs_alloced * sizeof(*pool ->offs))); |
282 | pool->frozen = 1; |
283 | } |
284 | } |
285 | |
286 | void rpmstrPoolUnfreeze(rpmstrPool pool) |
287 | { |
288 | if (pool) { |
289 | if (pool->hash == NULL((void*)0)) { |
290 | rpmstrPoolRehash(pool); |
291 | } |
292 | pool->frozen = 0; |
293 | } |
294 | } |
295 | |
296 | static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigned int hash) |
297 | { |
298 | char *t = NULL((void*)0); |
299 | size_t ssize = slen + 1; |
300 | |
301 | pool->offs_size += 1; |
302 | if (pool->offs_alloced <= pool->offs_size) { |
303 | pool->offs_alloced += STROFFS_CHUNK2048; |
304 | pool->offs = xrealloc(pool->offs,rrealloc((pool->offs), (pool->offs_alloced * sizeof(*pool ->offs))) |
305 | pool->offs_alloced * sizeof(*pool->offs))rrealloc((pool->offs), (pool->offs_alloced * sizeof(*pool ->offs))); |
306 | } |
307 | |
308 | /* Do we need a new chunk to store the string? */ |
309 | if (ssize > pool->chunk_allocated - pool->chunk_used) { |
310 | pool->chunks_size += 1; |
311 | /* Grow chunks array if needed */ |
312 | if (pool->chunks_size >= pool->chunks_allocated) { |
313 | pool->chunks_allocated += pool->chunks_allocated; |
314 | pool->chunks = xrealloc(pool->chunks,rrealloc((pool->chunks), (pool->chunks_allocated * sizeof (*pool->chunks))) |
315 | pool->chunks_allocated * sizeof(*pool->chunks))rrealloc((pool->chunks), (pool->chunks_allocated * sizeof (*pool->chunks))); |
316 | } |
317 | |
318 | /* Ensure the string fits in the new chunk we're about to allocate */ |
319 | if (ssize > pool->chunk_allocated) { |
320 | pool->chunk_allocated = 2 * ssize; |
321 | } |
322 | |
323 | pool->chunks[pool->chunks_size] = xcalloc(1, pool->chunk_allocated)rcalloc((1), (pool->chunk_allocated)); |
324 | pool->chunk_used = 0; |
325 | } |
326 | |
327 | /* Copy the string into current chunk, ensure termination */ |
328 | t = memcpy(pool->chunks[pool->chunks_size] + pool->chunk_used, s, slen); |
329 | t[slen] = '\0'; |
330 | pool->chunk_used += ssize; |
331 | |
332 | /* Actually add the string to the pool */ |
333 | pool->offs[pool->offs_size] = t; |
334 | poolHashAddHEntry(pool, t, hash, pool->offs_size); |
335 | |
336 | return pool->offs_size; |
337 | } |
338 | |
339 | static rpmsid rpmstrPoolGet(rpmstrPool pool, const char * key, size_t keylen, |
340 | unsigned int keyHash) |
341 | { |
342 | poolHash ht = pool->hash; |
343 | const char * s; |
344 | |
345 | |
346 | for (unsigned int i=0;; i++) { |
347 | unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets; |
348 | |
349 | if (!ht->buckets[hash].keyid) { |
350 | return 0; |
351 | } |
352 | |
353 | s = rpmstrPoolStr(pool, ht->buckets[hash].keyid); |
354 | /* pool string could be longer than keylen, require exact matche */ |
355 | if (strncmp(s, key, keylen) == 0 && s[keylen] == '\0') |
356 | return ht->buckets[hash].keyid; |
357 | } |
358 | } |
359 | |
360 | static inline rpmsid strn2id(rpmstrPool pool, const char *s, size_t slen, |
361 | unsigned int hash, int create) |
362 | { |
363 | rpmsid sid = 0; |
364 | |
365 | if (pool && pool->hash) { |
366 | sid = rpmstrPoolGet(pool, s, slen, hash); |
367 | if (sid == 0 && create && !pool->frozen) |
368 | sid = rpmstrPoolPut(pool, s, slen, hash); |
369 | } |
370 | return sid; |
371 | } |
372 | |
373 | rpmsid rpmstrPoolIdn(rpmstrPool pool, const char *s, size_t slen, int create) |
374 | { |
375 | rpmsid sid = 0; |
376 | |
377 | if (s != NULL((void*)0)) { |
378 | unsigned int hash = rstrnhash(s, slen); |
379 | sid = strn2id(pool, s, slen, hash, create); |
380 | } |
381 | return sid; |
382 | } |
383 | |
384 | rpmsid rpmstrPoolId(rpmstrPool pool, const char *s, int create) |
385 | { |
386 | rpmsid sid = 0; |
387 | |
388 | if (s != NULL((void*)0)) { |
389 | size_t slen; |
390 | unsigned int hash = rstrlenhash(s, &slen); |
391 | sid = strn2id(pool, s, slen, hash, create); |
392 | } |
393 | return sid; |
394 | } |
395 | |
396 | const char * rpmstrPoolStr(rpmstrPool pool, rpmsid sid) |
397 | { |
398 | const char *s = NULL((void*)0); |
399 | if (pool && sid > 0 && sid <= pool->offs_size) |
400 | s = pool->offs[sid]; |
401 | return s; |
402 | } |
403 | |
404 | size_t rpmstrPoolStrlen(rpmstrPool pool, rpmsid sid) |
405 | { |
406 | size_t slen = 0; |
407 | if (pool && sid > 0 && sid <= pool->offs_size) { |
408 | slen = strlen(pool->offs[sid]); |
409 | } |
410 | return slen; |
411 | } |
412 | |
413 | int rpmstrPoolStreq(rpmstrPool poolA, rpmsid sidA, |
414 | rpmstrPool poolB, rpmsid sidB) |
415 | { |
416 | if (poolA == poolB) |
417 | return (sidA == sidB); |
418 | else |
419 | return rstreq(rpmstrPoolStr(poolA, sidA), rpmstrPoolStr(poolB, sidB)); |
420 | } |
421 | |
422 | rpmsid rpmstrPoolNumStr(rpmstrPool pool) |
423 | { |
424 | return (pool != NULL((void*)0)) ? pool->offs_size : 0; |
425 | } |