| File: | rpmio/rpmstrpool.c |
| Warning: | line 181, column 5 Value stored to 'ht' is never read |
| 1 | #include "system.h" |
| 2 | #include <stdio.h> |
| 3 | #include <stdlib.h> |
| 4 | #include <rpm/rpmstring.h> |
| 5 | #include <rpm/rpmstrpool.h> |
| 6 | #include "debug.h" |
| 7 | |
| 8 | #define STRDATA_CHUNKS1024 1024 |
| 9 | #define STRDATA_CHUNK65536 65536 |
| 10 | #define STROFFS_CHUNK2048 2048 |
| 11 | /* XXX this is ridiculously small... */ |
| 12 | #define STRHASH_INITSIZE1024 1024 |
| 13 | |
| 14 | static int pool_debug = 0; |
| 15 | |
| 16 | typedef struct poolHash_s * poolHash; |
| 17 | typedef struct poolHashBucket_s poolHashBucket; |
| 18 | |
| 19 | struct poolHashBucket_s { |
| 20 | rpmsid keyid; |
| 21 | }; |
| 22 | |
| 23 | struct poolHash_s { |
| 24 | int numBuckets; |
| 25 | poolHashBucket * buckets; |
| 26 | int keyCount; |
| 27 | }; |
| 28 | |
| 29 | struct rpmstrPool_s { |
| 30 | const char ** offs; /* pointers into data area */ |
| 31 | rpmsid offs_size; /* largest offset index */; |
| 32 | rpmsid offs_alloced; /* offsets allocation size */ |
| 33 | |
| 34 | char ** chunks; /* memory chunks for storing the strings */ |
| 35 | size_t chunks_size; /* current chunk */ |
| 36 | size_t chunks_allocated; /* allocated size of the chunks array */ |
| 37 | size_t chunk_allocated; /* size of the current chunk */ |
| 38 | size_t chunk_used; /* usage of the current chunk */ |
| 39 | |
| 40 | poolHash hash; /* string -> sid hash table */ |
| 41 | int frozen; /* are new id additions allowed? */ |
| 42 | int nrefs; /* refcount */ |
| 43 | }; |
| 44 | |
| 45 | /* calculate hash and string length on at once */ |
| 46 | static inline unsigned int rstrlenhash(const char * str, size_t * len) |
| 47 | { |
| 48 | /* Jenkins One-at-a-time hash */ |
| 49 | unsigned int hash = 0xe4721b68; |
| 50 | const char * s = str; |
| 51 | |
| 52 | while (*s != '\0') { |
| 53 | hash += *s; |
| 54 | hash += (hash << 10); |
| 55 | hash ^= (hash >> 6); |
| 56 | s++; |
| 57 | } |
| 58 | hash += (hash << 3); |
| 59 | hash ^= (hash >> 11); |
| 60 | hash += (hash << 15); |
| 61 | |
| 62 | if (len) |
| 63 | *len = (s - str); |
| 64 | |
| 65 | return hash; |
| 66 | } |
| 67 | |
| 68 | static inline unsigned int rstrnlenhash(const char * str, size_t n, size_t * len) |
| 69 | { |
| 70 | /* Jenkins One-at-a-time hash */ |
| 71 | unsigned int hash = 0xe4721b68; |
| 72 | const char * s = str; |
| 73 | |
| 74 | while (*s != '\0' && n-- > 0) { |
| 75 | hash += *s; |
| 76 | hash += (hash << 10); |
| 77 | hash ^= (hash >> 6); |
| 78 | s++; |
| 79 | } |
| 80 | hash += (hash << 3); |
| 81 | hash ^= (hash >> 11); |
| 82 | hash += (hash << 15); |
| 83 | |
| 84 | if (len) |
| 85 | *len = (s - str); |
| 86 | |
| 87 | return hash; |
| 88 | } |
| 89 | |
| 90 | static inline unsigned int rstrnhash(const char * string, size_t n) |
| 91 | { |
| 92 | return rstrnlenhash(string, n, NULL((void*)0)); |
| 93 | } |
| 94 | |
| 95 | unsigned int rstrhash(const char * string) |
| 96 | { |
| 97 | return rstrlenhash(string, NULL((void*)0)); |
| 98 | } |
| 99 | |
| 100 | static inline unsigned int hashbucket(unsigned int hash, unsigned int number) |
| 101 | { |
| 102 | return hash + number*number; |
| 103 | } |
| 104 | |
| 105 | static poolHash poolHashCreate(int numBuckets) |
| 106 | { |
| 107 | poolHash ht; |
| 108 | |
| 109 | ht = xmalloc(sizeof(*ht))rmalloc((sizeof(*ht))); |
| 110 | ht->numBuckets = numBuckets; |
| 111 | ht->buckets = xcalloc(numBuckets, sizeof(*ht->buckets))rcalloc((numBuckets), (sizeof(*ht->buckets))); |
| 112 | ht->keyCount = 0; |
| 113 | return ht; |
| 114 | } |
| 115 | |
| 116 | static void poolHashResize(rpmstrPool pool, int numBuckets) |
| 117 | { |
| 118 | poolHash ht = pool->hash; |
| 119 | poolHashBucket * buckets = xcalloc(numBuckets, sizeof(*ht->buckets))rcalloc((numBuckets), (sizeof(*ht->buckets))); |
| 120 | |
| 121 | for (int i=0; i<ht->numBuckets; i++) { |
| 122 | if (!ht->buckets[i].keyid) continue; |
| 123 | unsigned int keyHash = rstrhash(rpmstrPoolStr(pool, ht->buckets[i].keyid)); |
| 124 | for (unsigned int j=0;;j++) { |
| 125 | unsigned int hash = hashbucket(keyHash, j) % numBuckets; |
| 126 | if (!buckets[hash].keyid) { |
| 127 | buckets[hash].keyid = ht->buckets[i].keyid; |
| 128 | break; |
| 129 | } |
| 130 | } |
| 131 | } |
| 132 | free((void *)(ht->buckets)); |
| 133 | ht->buckets = buckets; |
| 134 | ht->numBuckets = numBuckets; |
| 135 | } |
| 136 | |
| 137 | static void poolHashAddHEntry(rpmstrPool pool, const char * key, unsigned int keyHash, rpmsid keyid) |
| 138 | { |
| 139 | poolHash ht = pool->hash; |
| 140 | |
| 141 | /* keep load factor between 0.25 and 0.5 */ |
| 142 | if (2*(ht->keyCount) > ht->numBuckets) { |
| 143 | poolHashResize(pool, ht->numBuckets * 2); |
| 144 | } |
| 145 | |
| 146 | for (unsigned int i=0;;i++) { |
| 147 | unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets; |
| 148 | if (!ht->buckets[hash].keyid) { |
| 149 | ht->buckets[hash].keyid = keyid; |
| 150 | ht->keyCount++; |
| 151 | break; |
| 152 | } else if (!strcmp(rpmstrPoolStr(pool, ht->buckets[hash].keyid), key)) { |
| 153 | return; |
| 154 | } |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | static void poolHashAddEntry(rpmstrPool pool, const char * key, rpmsid keyid) |
| 159 | { |
| 160 | poolHashAddHEntry(pool, key, rstrhash(key), keyid); |
| 161 | } |
| 162 | |
| 163 | static void poolHashEmpty( poolHash ht) |
| 164 | { |
| 165 | unsigned int i; |
| 166 | |
| 167 | if (ht->keyCount == 0) return; |
| 168 | |
| 169 | for (i = 0; i < ht->numBuckets; i++) { |
| 170 | ht->buckets[i].keyid = 0; |
| 171 | } |
| 172 | ht->keyCount = 0; |
| 173 | } |
| 174 | |
| 175 | static poolHash poolHashFree(poolHash ht) |
| 176 | { |
| 177 | if (ht==NULL((void*)0)) |
| 178 | return ht; |
| 179 | poolHashEmpty(ht); |
| 180 | ht->buckets = _free(ht->buckets)rfree((ht->buckets)); |
| 181 | ht = _free(ht)rfree((ht)); |
Value stored to 'ht' is never read | |
| 182 | |
| 183 | return NULL((void*)0); |
| 184 | } |
| 185 | |
| 186 | static void poolHashPrintStats(rpmstrPool pool) |
| 187 | { |
| 188 | poolHash ht = pool->hash; |
| 189 | int i; |
| 190 | int collisions = 0; |
| 191 | int maxcollisions = 0; |
| 192 | |
| 193 | for (i=0; i<ht->numBuckets; i++) { |
| 194 | unsigned int keyHash = rstrhash(rpmstrPoolStr(pool, ht->buckets[i].keyid)); |
| 195 | for (unsigned int j=0;;j++) { |
| 196 | unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets; |
| 197 | if (hash==i) { |
| 198 | collisions += j; |
| 199 | maxcollisions = maxcollisions > j ? maxcollisions : j; |
| 200 | break; |
| 201 | } |
| 202 | } |
| 203 | } |
| 204 | fprintf(stderrstderr, "Hashsize: %i\n", ht->numBuckets); |
| 205 | fprintf(stderrstderr, "Keys: %i\n", ht->keyCount); |
| 206 | fprintf(stderrstderr, "Collisions: %i\n", collisions); |
| 207 | fprintf(stderrstderr, "Max collisions: %i\n", maxcollisions); |
| 208 | } |
| 209 | |
| 210 | static void rpmstrPoolRehash(rpmstrPool pool) |
| 211 | { |
| 212 | int sizehint; |
| 213 | |
| 214 | if (pool->offs_size < STRHASH_INITSIZE1024) |
| 215 | sizehint = STRHASH_INITSIZE1024; |
| 216 | else |
| 217 | sizehint = pool->offs_size * 2; |
| 218 | |
| 219 | if (pool->hash) |
| 220 | pool->hash = poolHashFree(pool->hash); |
| 221 | |
| 222 | pool->hash = poolHashCreate(sizehint); |
| 223 | for (int i = 1; i <= pool->offs_size; i++) |
| 224 | poolHashAddEntry(pool, rpmstrPoolStr(pool, i), i); |
| 225 | } |
| 226 | |
| 227 | rpmstrPool rpmstrPoolCreate(void) |
| 228 | { |
| 229 | rpmstrPool pool = xcalloc(1, sizeof(*pool))rcalloc((1), (sizeof(*pool))); |
| 230 | |
| 231 | pool->offs_alloced = STROFFS_CHUNK2048; |
| 232 | pool->offs = xcalloc(pool->offs_alloced, sizeof(*pool->offs))rcalloc((pool->offs_alloced), (sizeof(*pool->offs))); |
| 233 | |
| 234 | pool->chunks_allocated = STRDATA_CHUNKS1024; |
| 235 | pool->chunks = xcalloc(pool->chunks_allocated, sizeof(*pool->chunks))rcalloc((pool->chunks_allocated), (sizeof(*pool->chunks ))); |
| 236 | pool->chunks_size = 1; |
| 237 | pool->chunk_allocated = STRDATA_CHUNK65536; |
| 238 | pool->chunks[pool->chunks_size] = xcalloc(1, pool->chunk_allocated)rcalloc((1), (pool->chunk_allocated)); |
| 239 | pool->offs[1] = pool->chunks[pool->chunks_size]; |
| 240 | |
| 241 | rpmstrPoolRehash(pool); |
| 242 | pool->nrefs = 1; |
| 243 | return pool; |
| 244 | } |
| 245 | |
| 246 | rpmstrPool rpmstrPoolFree(rpmstrPool pool) |
| 247 | { |
| 248 | if (pool) { |
| 249 | if (pool->nrefs > 1) { |
| 250 | pool->nrefs--; |
| 251 | } else { |
| 252 | if (pool_debug) |
| 253 | poolHashPrintStats(pool); |
| 254 | poolHashFree(pool->hash); |
| 255 | free(pool->offs); |
| 256 | for (int i=1;i<=pool->chunks_size;i++) { |
| 257 | pool->chunks[i] = _free(pool->chunks[i])rfree((pool->chunks[i])); |
| 258 | } |
| 259 | free(pool->chunks); |
| 260 | free(pool); |
| 261 | } |
| 262 | } |
| 263 | return NULL((void*)0); |
| 264 | } |
| 265 | |
| 266 | rpmstrPool rpmstrPoolLink(rpmstrPool pool) |
| 267 | { |
| 268 | if (pool) |
| 269 | pool->nrefs++; |
| 270 | return pool; |
| 271 | } |
| 272 | |
| 273 | void rpmstrPoolFreeze(rpmstrPool pool, int keephash) |
| 274 | { |
| 275 | if (pool && !pool->frozen) { |
| 276 | if (!keephash) { |
| 277 | pool->hash = poolHashFree(pool->hash); |
| 278 | } |
| 279 | pool->offs_alloced = pool->offs_size + 2; /* space for end marker */ |
| 280 | pool->offs = xrealloc(pool->offs,rrealloc((pool->offs), (pool->offs_alloced * sizeof(*pool ->offs))) |
| 281 | pool->offs_alloced * sizeof(*pool->offs))rrealloc((pool->offs), (pool->offs_alloced * sizeof(*pool ->offs))); |
| 282 | pool->frozen = 1; |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | void rpmstrPoolUnfreeze(rpmstrPool pool) |
| 287 | { |
| 288 | if (pool) { |
| 289 | if (pool->hash == NULL((void*)0)) { |
| 290 | rpmstrPoolRehash(pool); |
| 291 | } |
| 292 | pool->frozen = 0; |
| 293 | } |
| 294 | } |
| 295 | |
| 296 | static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigned int hash) |
| 297 | { |
| 298 | char *t = NULL((void*)0); |
| 299 | size_t ssize = slen + 1; |
| 300 | |
| 301 | pool->offs_size += 1; |
| 302 | if (pool->offs_alloced <= pool->offs_size) { |
| 303 | pool->offs_alloced += STROFFS_CHUNK2048; |
| 304 | pool->offs = xrealloc(pool->offs,rrealloc((pool->offs), (pool->offs_alloced * sizeof(*pool ->offs))) |
| 305 | pool->offs_alloced * sizeof(*pool->offs))rrealloc((pool->offs), (pool->offs_alloced * sizeof(*pool ->offs))); |
| 306 | } |
| 307 | |
| 308 | /* Do we need a new chunk to store the string? */ |
| 309 | if (ssize > pool->chunk_allocated - pool->chunk_used) { |
| 310 | pool->chunks_size += 1; |
| 311 | /* Grow chunks array if needed */ |
| 312 | if (pool->chunks_size >= pool->chunks_allocated) { |
| 313 | pool->chunks_allocated += pool->chunks_allocated; |
| 314 | pool->chunks = xrealloc(pool->chunks,rrealloc((pool->chunks), (pool->chunks_allocated * sizeof (*pool->chunks))) |
| 315 | pool->chunks_allocated * sizeof(*pool->chunks))rrealloc((pool->chunks), (pool->chunks_allocated * sizeof (*pool->chunks))); |
| 316 | } |
| 317 | |
| 318 | /* Ensure the string fits in the new chunk we're about to allocate */ |
| 319 | if (ssize > pool->chunk_allocated) { |
| 320 | pool->chunk_allocated = 2 * ssize; |
| 321 | } |
| 322 | |
| 323 | pool->chunks[pool->chunks_size] = xcalloc(1, pool->chunk_allocated)rcalloc((1), (pool->chunk_allocated)); |
| 324 | pool->chunk_used = 0; |
| 325 | } |
| 326 | |
| 327 | /* Copy the string into current chunk, ensure termination */ |
| 328 | t = memcpy(pool->chunks[pool->chunks_size] + pool->chunk_used, s, slen); |
| 329 | t[slen] = '\0'; |
| 330 | pool->chunk_used += ssize; |
| 331 | |
| 332 | /* Actually add the string to the pool */ |
| 333 | pool->offs[pool->offs_size] = t; |
| 334 | poolHashAddHEntry(pool, t, hash, pool->offs_size); |
| 335 | |
| 336 | return pool->offs_size; |
| 337 | } |
| 338 | |
| 339 | static rpmsid rpmstrPoolGet(rpmstrPool pool, const char * key, size_t keylen, |
| 340 | unsigned int keyHash) |
| 341 | { |
| 342 | poolHash ht = pool->hash; |
| 343 | const char * s; |
| 344 | |
| 345 | |
| 346 | for (unsigned int i=0;; i++) { |
| 347 | unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets; |
| 348 | |
| 349 | if (!ht->buckets[hash].keyid) { |
| 350 | return 0; |
| 351 | } |
| 352 | |
| 353 | s = rpmstrPoolStr(pool, ht->buckets[hash].keyid); |
| 354 | /* pool string could be longer than keylen, require exact matche */ |
| 355 | if (strncmp(s, key, keylen) == 0 && s[keylen] == '\0') |
| 356 | return ht->buckets[hash].keyid; |
| 357 | } |
| 358 | } |
| 359 | |
| 360 | static inline rpmsid strn2id(rpmstrPool pool, const char *s, size_t slen, |
| 361 | unsigned int hash, int create) |
| 362 | { |
| 363 | rpmsid sid = 0; |
| 364 | |
| 365 | if (pool && pool->hash) { |
| 366 | sid = rpmstrPoolGet(pool, s, slen, hash); |
| 367 | if (sid == 0 && create && !pool->frozen) |
| 368 | sid = rpmstrPoolPut(pool, s, slen, hash); |
| 369 | } |
| 370 | return sid; |
| 371 | } |
| 372 | |
| 373 | rpmsid rpmstrPoolIdn(rpmstrPool pool, const char *s, size_t slen, int create) |
| 374 | { |
| 375 | rpmsid sid = 0; |
| 376 | |
| 377 | if (s != NULL((void*)0)) { |
| 378 | unsigned int hash = rstrnhash(s, slen); |
| 379 | sid = strn2id(pool, s, slen, hash, create); |
| 380 | } |
| 381 | return sid; |
| 382 | } |
| 383 | |
| 384 | rpmsid rpmstrPoolId(rpmstrPool pool, const char *s, int create) |
| 385 | { |
| 386 | rpmsid sid = 0; |
| 387 | |
| 388 | if (s != NULL((void*)0)) { |
| 389 | size_t slen; |
| 390 | unsigned int hash = rstrlenhash(s, &slen); |
| 391 | sid = strn2id(pool, s, slen, hash, create); |
| 392 | } |
| 393 | return sid; |
| 394 | } |
| 395 | |
| 396 | const char * rpmstrPoolStr(rpmstrPool pool, rpmsid sid) |
| 397 | { |
| 398 | const char *s = NULL((void*)0); |
| 399 | if (pool && sid > 0 && sid <= pool->offs_size) |
| 400 | s = pool->offs[sid]; |
| 401 | return s; |
| 402 | } |
| 403 | |
| 404 | size_t rpmstrPoolStrlen(rpmstrPool pool, rpmsid sid) |
| 405 | { |
| 406 | size_t slen = 0; |
| 407 | if (pool && sid > 0 && sid <= pool->offs_size) { |
| 408 | slen = strlen(pool->offs[sid]); |
| 409 | } |
| 410 | return slen; |
| 411 | } |
| 412 | |
| 413 | int rpmstrPoolStreq(rpmstrPool poolA, rpmsid sidA, |
| 414 | rpmstrPool poolB, rpmsid sidB) |
| 415 | { |
| 416 | if (poolA == poolB) |
| 417 | return (sidA == sidB); |
| 418 | else |
| 419 | return rstreq(rpmstrPoolStr(poolA, sidA), rpmstrPoolStr(poolB, sidB)); |
| 420 | } |
| 421 | |
| 422 | rpmsid rpmstrPoolNumStr(rpmstrPool pool) |
| 423 | { |
| 424 | return (pool != NULL((void*)0)) ? pool->offs_size : 0; |
| 425 | } |