Bug Summary

File:src/repopage.c
Warning:line 402, column 7
Value stored to 'litofs' is never read

Annotated Source Code

1/*
2 * Copyright (c) 2007-2012, Novell Inc.
3 *
4 * This program is licensed under the BSD license, read LICENSE.BSD
5 * for further information
6 */
7
8/*
9 * repopage.c
10 *
11 * Paging and compression functions for the vertical repository data.
12 * Vertical data is grouped by key, normal data is grouped by solvable.
13 * This makes searching for a string in vertical data fast as there's
14 * no need to skip over data if keys we're not interested in.
15 *
16 * The vertical data is split into pages, each page is compressed with a fast
17 * compression algorithm. These pages are read in on demand, not recently used
18 * pages automatically get dropped.
19 */
20
21#define _XOPEN_SOURCE500 500
22
23#include <sys/types.h>
24#include <stdint.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <unistd.h>
29#include <assert.h>
30#include <fcntl.h>
31#include <time.h>
32
33#include "repo.h"
34#include "repopage.h"
35
36
37
38#define BLOCK_SIZE(65536*1) (65536*1)
39#if BLOCK_SIZE(65536*1) <= 65536
40typedef uint16_t Ref;
41#else
42typedef uint32_t Ref;
43#endif
44
45/*
46 The format is tailored for fast decompression (i.e. only byte based),
47 and skewed to ASCII content (highest bit often not set):
48
49 a 0LLLLLLL
50 - self-describing ASCII character hex L
51 b 100lllll <l+1 bytes>
52 - literal run of length l+1
53 c 101oolll <8o>
54 - back ref of length l+2, at offset -(o+1) (o < 1 << 10)
55 d 110lllll <8o>
56 - back ref of length l+2+8, at offset -(o+1) (o < 1 << 8)
57 e 1110llll <8o> <8o>
58 - back ref of length l+3, at offset -(o+1) (o < 1 << 16)
59 f1 1111llll <8l> <8o> <8o>
60 - back ref, length l+19 (l < 1<<12), offset -(o+1) (o < 1<<16)
61 f2 11110lll <8l> <8o> <8o>
62 - back ref, length l+19 (l < 1<<11), offset -(o+1) (o < 1<<16)
63 g 11111lll <8l> <8o> <8o> <8o>
64 - back ref, length l+5 (l < 1<<11), offset -(o+1) (o < 1<<24)
65
66 Generally for a literal of length L we need L+1 bytes, hence it is
67 better to encode also very short backrefs (2 chars) as backrefs if
68 their offset is small, as that only needs two bytes. Except if we
69 already have a literal run, in that case it's better to append there,
70 instead of breaking it for a backref. So given a potential backref
71 at offset O, length L the strategy is as follows:
72
73 L < 2 : encode as 1-literal
74 L == 2, O > 1024 : encode as 1-literal
75 L == 2, have already literals: encode as 1-literal
76 O = O - 1
77 L >= 2, L <= 9, O < 1024 : encode as c
78 L >= 10, L <= 41, O < 256 : encode as d
79 else we have either O >= 1024, or L >= 42:
80 L < 3 : encode as 1-literal
81 L >= 3, L <= 18, O < 65536 : encode as e
82 L >= 19, L <= 4095+18, O < 65536 : encode as f
83 else we have either L >= 4096+18 or O >= 65536.
84 O >= 65536: encode as 1-literal, too bad
85 (with the current block size this can't happen)
86 L >= 4096+18, so reduce to 4095+18 : encode as f
87*/
88
89
90static unsigned int
91compress_buf(const unsigned char *in, unsigned int in_len,
92 unsigned char *out, unsigned int out_len)
93{
94 unsigned int oo = 0; /* out-offset */
95 unsigned int io = 0; /* in-offset */
96#define HS(65536) (65536)
97 Ref htab[HS(65536)];
98 Ref hnext[BLOCK_SIZE(65536*1)];
99 unsigned int litofs = 0;
100 memset(htab, -1, sizeof (htab));
101 memset(hnext, -1, sizeof (hnext));
102 while (io + 2 < in_len)
103 {
104 /* Search for a match of the string starting at IN, we have at
105 least three characters. */
106 unsigned int hval = in[io] | in[io + 1] << 8 | in[io + 2] << 16;
107 unsigned int try, mlen, mofs, tries;
108 hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
109 hval = hval & (HS(65536) - 1);
110 try = htab[hval];
111 hnext[io] = htab[hval];
112 htab[hval] = io;
113 mlen = 0;
114 mofs = 0;
115
116 for (tries = 0; try != -1 && tries < 12; tries++)
117 {
118 if (try < io
119 && in[try] == in[io] && in[try + 1] == in[io + 1])
120 {
121 mlen = 2;
122 mofs = (io - try) - 1;
123 break;
124 }
125 try = hnext[try];
126 }
127 for (; try != -1 && tries < 12; tries++)
128 {
129 /* assert(mlen >= 2); */
130 /* assert(io + mlen < in_len); */
131 /* Try a match starting from [io] with the strings at [try].
132 That's only sensible if TRY actually is before IO (can happen
133 with uninit hash table). If we have a previous match already
134 we're only going to take the new one if it's longer, hence
135 check the potentially last character. */
136 if (try < io && in[try + mlen] == in[io + mlen])
137 {
138 unsigned int this_len, this_ofs;
139 if (memcmp(in + try, in + io, mlen))
140 goto no_match;
141 this_len = mlen + 1;
142 /* Now try extending the match by more characters. */
143 for (;
144 io + this_len < in_len
145 && in[try + this_len] == in[io + this_len]; this_len++)
146 ;
147#if 0
148 unsigned int testi;
149 for (testi = 0; testi < this_len; testi++)
150 assert(in[try + testi] == in[io + testi])({ if (in[try + testi] == in[io + testi]) ; else __assert_fail
("in[try + testi] == in[io + testi]", "/home/brain/Projects/upstream/libsolv/src/repopage.c"
, 150, __PRETTY_FUNCTION__); })
;
151#endif
152 this_ofs = (io - try) - 1;
153 /*if (this_ofs > 65535)
154 goto no_match; */
155#if 0
156 assert(this_len >= 2)({ if (this_len >= 2) ; else __assert_fail ("this_len >= 2"
, "/home/brain/Projects/upstream/libsolv/src/repopage.c", 156
, __PRETTY_FUNCTION__); })
;
157 assert(this_len >= mlen)({ if (this_len >= mlen) ; else __assert_fail ("this_len >= mlen"
, "/home/brain/Projects/upstream/libsolv/src/repopage.c", 157
, __PRETTY_FUNCTION__); })
;
158 assert(this_len > mlen || (this_len == mlen && this_ofs > mofs))({ if (this_len > mlen || (this_len == mlen && this_ofs
> mofs)) ; else __assert_fail ("this_len > mlen || (this_len == mlen && this_ofs > mofs)"
, "/home/brain/Projects/upstream/libsolv/src/repopage.c", 158
, __PRETTY_FUNCTION__); })
;
159#endif
160 mlen = this_len, mofs = this_ofs;
161 /* If our match extends up to the end of input, no next
162 match can become better. This is not just an
163 optimization, it establishes a loop invariant
164 (io + mlen < in_len). */
165 if (io + mlen >= in_len)
166 goto match_done;
167 }
168 no_match:
169 try = hnext[try];
170 /*if (io - try - 1 >= 65536)
171 break;*/
172 }
173
174match_done:
175 if (mlen)
176 {
177 /*fprintf(stderr, "%d %d\n", mlen, mofs);*/
178 if (mlen == 2 && (litofs || mofs >= 1024))
179 mlen = 0;
180 /*else if (mofs >= 65536)
181 mlen = 0;*/
182 else if (mofs >= 65536)
183 {
184 if (mlen >= 2048 + 5)
185 mlen = 2047 + 5;
186 else if (mlen < 5)
187 mlen = 0;
188 }
189 else if (mlen < 3)
190 mlen = 0;
191 /*else if (mlen >= 4096 + 19)
192 mlen = 4095 + 19;*/
193 else if (mlen >= 2048 + 19)
194 mlen = 2047 + 19;
195 /* Skip this match if the next character would deliver a better one,
196 but only do this if we have the chance to really extend the
197 length (i.e. our current length isn't yet the (conservative)
198 maximum). */
199 if (mlen && mlen < (2048 + 5) && io + 3 < in_len)
200 {
201 unsigned int hval =
202 in[io + 1] | in[io + 2] << 8 | in[io + 3] << 16;
203 unsigned int try;
204 hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
205 hval = hval & (HS(65536) - 1);
206 try = htab[hval];
207 if (try < io + 1
208 && in[try] == in[io + 1] && in[try + 1] == in[io + 2])
209 {
210 unsigned int this_len;
211 this_len = 2;
212 for (;
213 io + 1 + this_len < in_len
214 && in[try + this_len] == in[io + 1 + this_len];
215 this_len++)
216 ;
217 if (this_len >= mlen)
218 mlen = 0;
219 }
220 }
221 }
222 if (!mlen)
223 {
224 if (!litofs)
225 litofs = io + 1;
226 io++;
227 }
228 else
229 {
230 if (litofs)
231 {
232 unsigned litlen;
233 litofs--;
234 litlen = io - litofs;
235 /* fprintf(stderr, "lit: %d\n", litlen); */
236 while (litlen)
237 {
238 unsigned int easy_sz;
239 /* Emit everything we can as self-describers. As soon as
240 we hit a byte we can't emit as such we're going to emit
241 a length descriptor anyway, so we can as well include
242 bytes < 0x80 which might follow afterwards in that run. */
243 for (easy_sz = 0;
244 easy_sz < litlen && in[litofs + easy_sz] < 0x80;
245 easy_sz++)
246 ;
247 if (easy_sz)
248 {
249 if (oo + easy_sz >= out_len)
250 return 0;
251 memcpy(out + oo, in + litofs, easy_sz);
252 litofs += easy_sz;
253 oo += easy_sz;
254 litlen -= easy_sz;
255 if (!litlen)
256 break;
257 }
258 if (litlen <= 32)
259 {
260 if (oo + 1 + litlen >= out_len)
261 return 0;
262 out[oo++] = 0x80 | (litlen - 1);
263 while (litlen--)
264 out[oo++] = in[litofs++];
265 break;
266 }
267 else
268 {
269 /* Literal length > 32, so chunk it. */
270 if (oo + 1 + 32 >= out_len)
271 return 0;
272 out[oo++] = 0x80 | 31;
273 memcpy(out + oo, in + litofs, 32);
274 oo += 32;
275 litofs += 32;
276 litlen -= 32;
277 }
278 }
279 litofs = 0;
280 }
281
282 /* fprintf(stderr, "ref: %d @ %d\n", mlen, mofs); */
283
284 if (mlen >= 2 && mlen <= 9 && mofs < 1024)
285 {
286 if (oo + 2 >= out_len)
287 return 0;
288 out[oo++] = 0xa0 | ((mofs & 0x300) >> 5) | (mlen - 2);
289 out[oo++] = mofs & 0xff;
290 }
291 else if (mlen >= 10 && mlen <= 41 && mofs < 256)
292 {
293 if (oo + 2 >= out_len)
294 return 0;
295 out[oo++] = 0xc0 | (mlen - 10);
296 out[oo++] = mofs;
297 }
298 else if (mofs >= 65536)
299 {
300 assert(mlen >= 5 && mlen < 2048 + 5)({ if (mlen >= 5 && mlen < 2048 + 5) ; else __assert_fail
("mlen >= 5 && mlen < 2048 + 5", "/home/brain/Projects/upstream/libsolv/src/repopage.c"
, 300, __PRETTY_FUNCTION__); })
;
301 if (oo + 5 >= out_len)
302 return 0;
303 out[oo++] = 0xf8 | ((mlen - 5) >> 8);
304 out[oo++] = (mlen - 5) & 0xff;
305 out[oo++] = mofs & 0xff;
306 out[oo++] = (mofs >> 8) & 0xff;
307 out[oo++] = mofs >> 16;
308 }
309 else if (mlen >= 3 && mlen <= 18)
310 {
311 assert(mofs < 65536)({ if (mofs < 65536) ; else __assert_fail ("mofs < 65536"
, "/home/brain/Projects/upstream/libsolv/src/repopage.c", 311
, __PRETTY_FUNCTION__); })
;
312 if (oo + 3 >= out_len)
313 return 0;
314 out[oo++] = 0xe0 | (mlen - 3);
315 out[oo++] = mofs & 0xff;
316 out[oo++] = mofs >> 8;
317 }
318 else
319 {
320 assert(mlen >= 19 && mlen <= 4095 + 19 && mofs < 65536)({ if (mlen >= 19 && mlen <= 4095 + 19 &&
mofs < 65536) ; else __assert_fail ("mlen >= 19 && mlen <= 4095 + 19 && mofs < 65536"
, "/home/brain/Projects/upstream/libsolv/src/repopage.c", 320
, __PRETTY_FUNCTION__); })
;
321 if (oo + 4 >= out_len)
322 return 0;
323 out[oo++] = 0xf0 | ((mlen - 19) >> 8);
324 out[oo++] = (mlen - 19) & 0xff;
325 out[oo++] = mofs & 0xff;
326 out[oo++] = mofs >> 8;
327 }
328 /* Insert the hashes for the compressed run [io..io+mlen-1].
329 For [io] we have it already done at the start of the loop.
330 So it's from [io+1..io+mlen-1], and we need three chars per
331 hash, so the accessed characters will be [io+1..io+mlen-1+2],
332 ergo io+mlen+1 < in_len. */
333 mlen--;
334 io++;
335 while (mlen--)
336 {
337 if (io + 2 < in_len)
338 {
339 unsigned int hval =
340 in[io] | in[io + 1] << 8 | in[io + 2] << 16;
341 hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
342 hval = hval & (HS(65536) - 1);
343 hnext[io] = htab[hval];
344 htab[hval] = io;
345 }
346 io++;
347 };
348 }
349 }
350 /* We might have some characters left. */
351 if (io < in_len && !litofs)
352 litofs = io + 1;
353 io = in_len;
354 if (litofs)
355 {
356 unsigned litlen;
357 litofs--;
358 litlen = io - litofs;
359 /* fprintf(stderr, "lit: %d\n", litlen); */
360 while (litlen)
361 {
362 unsigned int easy_sz;
363 /* Emit everything we can as self-describers. As soon as we hit a
364 byte we can't emit as such we're going to emit a length
365 descriptor anyway, so we can as well include bytes < 0x80 which
366 might follow afterwards in that run. */
367 for (easy_sz = 0; easy_sz < litlen && in[litofs + easy_sz] < 0x80;
368 easy_sz++)
369 ;
370 if (easy_sz)
371 {
372 if (oo + easy_sz >= out_len)
373 return 0;
374 memcpy(out + oo, in + litofs, easy_sz);
375 litofs += easy_sz;
376 oo += easy_sz;
377 litlen -= easy_sz;
378 if (!litlen)
379 break;
380 }
381 if (litlen <= 32)
382 {
383 if (oo + 1 + litlen >= out_len)
384 return 0;
385 out[oo++] = 0x80 | (litlen - 1);
386 while (litlen--)
387 out[oo++] = in[litofs++];
388 break;
389 }
390 else
391 {
392 /* Literal length > 32, so chunk it. */
393 if (oo + 1 + 32 >= out_len)
394 return 0;
395 out[oo++] = 0x80 | 31;
396 memcpy(out + oo, in + litofs, 32);
397 oo += 32;
398 litofs += 32;
399 litlen -= 32;
400 }
401 }
402 litofs = 0;
Value stored to 'litofs' is never read
403 }
404 return oo;
405}
406
407static unsigned int
408unchecked_decompress_buf(const unsigned char *in, unsigned int in_len,
409 unsigned char *out,
410 unsigned int out_len __attribute__((unused)))
411{
412 unsigned char *orig_out = out;
413 const unsigned char *in_end = in + in_len;
414 while (in < in_end)
415 {
416 unsigned int first = *in++;
417 int o;
418 switch (first >> 4)
419 {
420 default:
421 /* This default case can't happen, but GCCs VRP is not strong
422 enough to see this, so make this explicitely not fall to
423 the end of the switch, so that we don't have to initialize
424 o above. */
425 continue;
426 case 0: case 1:
427 case 2: case 3:
428 case 4: case 5:
429 case 6: case 7:
430 /* a 0LLLLLLL */
431 /* fprintf (stderr, "lit: 1\n"); */
432 *out++ = first;
433 continue;
434 case 8: case 9:
435 /* b 100lllll <l+1 bytes> */
436 {
437 unsigned int l = first & 31;
438 /* fprintf (stderr, "lit: %d\n", l); */
439 do
440 *out++ = *in++;
441 while (l--);
442 continue;
443 }
444 case 10: case 11:
445 /* c 101oolll <8o> */
446 {
447 o = first & (3 << 3);
448 o = (o << 5) | *in++;
449 first = (first & 7) + 2;
450 break;
451 }
452 case 12: case 13:
453 /* d 110lllll <8o> */
454 {
455 o = *in++;
456 first = (first & 31) + 10;
457 break;
458 }
459 case 14:
460 /* e 1110llll <8o> <8o> */
461 {
462 o = in[0] | (in[1] << 8);
463 in += 2;
464 first = first & 31;
465 first += 3;
466 break;
467 }
468 case 15:
469 /* f1 1111llll <8o> <8o> <8l> */
470 /* f2 11110lll <8o> <8o> <8l> */
471 /* g 11111lll <8o> <8o> <8o> <8l> */
472 {
473 first = first & 15;
474 if (first >= 8)
475 {
476 first = (((first - 8) << 8) | in[0]) + 5;
477 o = in[1] | (in[2] << 8) | (in[3] << 16);
478 in += 4;
479 }
480 else
481 {
482 first = ((first << 8) | in[0]) + 19;
483 o = in[1] | (in[2] << 8);
484 in += 3;
485 }
486 break;
487 }
488 }
489 /* fprintf(stderr, "ref: %d @ %d\n", first, o); */
490 o++;
491 o = -o;
492#if 0
493 /* We know that first will not be zero, and this loop structure is
494 better optimizable. */
495 do
496 {
497 *out = *(out - o);
498 out++;
499 }
500 while (--first);
501#else
502 switch (first)
503 {
504 case 18: *out = *(out + o); out++;
505 case 17: *out = *(out + o); out++;
506 case 16: *out = *(out + o); out++;
507 case 15: *out = *(out + o); out++;
508 case 14: *out = *(out + o); out++;
509 case 13: *out = *(out + o); out++;
510 case 12: *out = *(out + o); out++;
511 case 11: *out = *(out + o); out++;
512 case 10: *out = *(out + o); out++;
513 case 9: *out = *(out + o); out++;
514 case 8: *out = *(out + o); out++;
515 case 7: *out = *(out + o); out++;
516 case 6: *out = *(out + o); out++;
517 case 5: *out = *(out + o); out++;
518 case 4: *out = *(out + o); out++;
519 case 3: *out = *(out + o); out++;
520 case 2: *out = *(out + o); out++;
521 case 1: *out = *(out + o); out++;
522 case 0: break;
523 default:
524 /* Duff duff :-) */
525 switch (first & 15)
526 {
527 do
528 {
529 case 0: *out = *(out + o); out++;
530 case 15: *out = *(out + o); out++;
531 case 14: *out = *(out + o); out++;
532 case 13: *out = *(out + o); out++;
533 case 12: *out = *(out + o); out++;
534 case 11: *out = *(out + o); out++;
535 case 10: *out = *(out + o); out++;
536 case 9: *out = *(out + o); out++;
537 case 8: *out = *(out + o); out++;
538 case 7: *out = *(out + o); out++;
539 case 6: *out = *(out + o); out++;
540 case 5: *out = *(out + o); out++;
541 case 4: *out = *(out + o); out++;
542 case 3: *out = *(out + o); out++;
543 case 2: *out = *(out + o); out++;
544 case 1: *out = *(out + o); out++;
545 }
546 while ((int)(first -= 16) > 0);
547 }
548 break;
549 }
550#endif
551 }
552 return out - orig_out;
553}
554
555/**********************************************************************/
556
557void repopagestore_init(Repopagestore *store)
558{
559 memset(store, 0, sizeof(*store));
560 store->pagefd = -1;
561}
562
563void repopagestore_free(Repopagestore *store)
564{
565 store->blob_store = solv_free(store->blob_store);
566 store->file_pages = solv_free(store->file_pages);
567 store->mapped_at = solv_free(store->mapped_at);
568 store->mapped = solv_free(store->mapped);
569 if (store->pagefd != -1)
570 close(store->pagefd);
571 store->pagefd = -1;
572}
573
574
575/**********************************************************************/
576
577unsigned char *
578repopagestore_load_page_range(Repopagestore *store, unsigned int pstart, unsigned int pend)
579{
580/* Make sure all pages from PSTART to PEND (inclusive) are loaded,
581 and are consecutive. Return a pointer to the mapping of PSTART. */
582 unsigned char buf[REPOPAGE_BLOBSIZE(1 << 15)];
583 unsigned int i, best, pnum;
584
585 if (pstart == pend)
586 {
587 /* Quick check in case the requested page is already mapped */
588 if (store->mapped_at[pstart] != -1)
589 return store->blob_store + store->mapped_at[pstart];
590 }
591 else
592 {
593 /* Quick check in case all pages are already mapped and consecutive. */
594 for (pnum = pstart; pnum <= pend; pnum++)
595 if (store->mapped_at[pnum] == -1
596 || (pnum > pstart
597 && store->mapped_at[pnum]
598 != store->mapped_at[pnum-1] + REPOPAGE_BLOBSIZE(1 << 15)))
599 break;
600 if (pnum > pend)
601 return store->blob_store + store->mapped_at[pstart];
602 }
603
604 if (store->pagefd == -1 || !store->file_pages)
605 return 0; /* no backing file */
606
607#ifdef DEBUG_PAGING
608 fprintf(stderrstderr, "PAGE: want %d pages starting at %d\n", pend - pstart + 1, pstart);
609#endif
610
611 /* Ensure that we can map the numbers of pages we need at all. */
612 if (pend - pstart + 1 > store->nmapped)
613 {
614 unsigned int oldcan = store->nmapped;
615 store->nmapped = pend - pstart + 1;
616 if (store->nmapped < 4)
617 store->nmapped = 4;
618 store->mapped = solv_realloc2(store->mapped, store->nmapped, sizeof(store->mapped[0]));
619 for (i = oldcan; i < store->nmapped; i++)
620 store->mapped[i] = -1;
621 store->blob_store = solv_realloc2(store->blob_store, store->nmapped, REPOPAGE_BLOBSIZE(1 << 15));
622#ifdef DEBUG_PAGING
623 fprintf(stderrstderr, "PAGE: can map %d pages\n", store->nmapped);
624#endif
625 }
626
627 if (store->mapped_at[pstart] != -1)
628 {
629 /* assume forward search */
630 best = store->mapped_at[pstart] / REPOPAGE_BLOBSIZE(1 << 15);
631 if (best + (pend - pstart) >= store->nmapped)
632 best = 0;
633 }
634 else if (store->mapped_at[pend] != -1)
635 {
636 /* assume backward search */
637 best = store->mapped_at[pend] / REPOPAGE_BLOBSIZE(1 << 15);
638 if (best < pend - pstart)
639 best = store->nmapped - 1;
640 best -= pend - pstart;
641 }
642 else
643 {
644 /* choose some "random" location to avoid thrashing */
645 best = (pstart + store->rr_counter++) % (store->nmapped - pend + pstart);
646 }
647
648 /* So we want to map our pages from [best] to [best+pend-pstart].
649 Use a very simple strategy, which doesn't make the best use of
650 our resources, but works. Throw away all pages in that range
651 (even ours) then copy around ours or read them in. */
652 for (i = best, pnum = pstart; pnum <= pend; i++, pnum++)
653 {
654 unsigned int pnum_mapped_at;
655 unsigned int oldpnum = store->mapped[i];
656 if (oldpnum != -1)
657 {
658 if (oldpnum == pnum)
659 continue; /* already have the correct page */
660 /* Evict this page. */
661#ifdef DEBUG_PAGING
662 fprintf(stderrstderr, "PAGE: evict page %d from %d\n", oldpnum, i);
663#endif
664 store->mapped[i] = -1;
665 store->mapped_at[oldpnum] = -1;
666 }
667 /* check if we can copy the correct content (before it gets evicted) */
668 pnum_mapped_at = store->mapped_at[pnum];
669 if (pnum_mapped_at != -1 && pnum_mapped_at != i * REPOPAGE_BLOBSIZE(1 << 15))
670 {
671 void *dest = store->blob_store + i * REPOPAGE_BLOBSIZE(1 << 15);
672#ifdef DEBUG_PAGING
673 fprintf(stderrstderr, "PAGECOPY: %d from %d to %d\n", pnum, pnum_mapped_at / REPOPAGE_BLOBSIZE(1 << 15), i);
674#endif
675 memcpy(dest, store->blob_store + pnum_mapped_at, REPOPAGE_BLOBSIZE(1 << 15));
676 store->mapped[pnum_mapped_at / REPOPAGE_BLOBSIZE(1 << 15)] = -1;
677 store->mapped[i] = pnum;
678 store->mapped_at[pnum] = i * REPOPAGE_BLOBSIZE(1 << 15);
679 }
680 }
681
682 /* Everything is free now. Read in or copy the pages we want. */
683 for (i = best, pnum = pstart; pnum <= pend; i++, pnum++)
684 {
685 void *dest = store->blob_store + i * REPOPAGE_BLOBSIZE(1 << 15);
686 if (store->mapped_at[pnum] != -1)
687 {
688 unsigned int pnum_mapped_at = store->mapped_at[pnum];
689 if (pnum_mapped_at != i * REPOPAGE_BLOBSIZE(1 << 15))
690 {
691#ifdef DEBUG_PAGING
692 fprintf(stderrstderr, "PAGECOPY: %d from %d to %d\n", pnum, pnum_mapped_at / REPOPAGE_BLOBSIZE(1 << 15), i);
693#endif
694 /* Still mapped somewhere else, so just copy it from there. */
695 memcpy(dest, store->blob_store + pnum_mapped_at, REPOPAGE_BLOBSIZE(1 << 15));
696 store->mapped[pnum_mapped_at / REPOPAGE_BLOBSIZE(1 << 15)] = -1;
697 }
698 }
699 else
700 {
701 Attrblobpage *p = store->file_pages + pnum;
702 unsigned int in_len = p->page_size;
703 unsigned int compressed = in_len & 1;
704 in_len >>= 1;
705#ifdef DEBUG_PAGING
706 fprintf(stderrstderr, "PAGEIN: %d to %d", pnum, i);
707#endif
708 if (pread(store->pagefd, compressed ? buf : dest, in_len, store->file_offset + p->page_offset) != in_len)
709 {
710 perror("mapping pread");
711 return 0;
712 }
713 if (compressed)
714 {
715 unsigned int out_len;
716 out_len = unchecked_decompress_buf(buf, in_len, dest, REPOPAGE_BLOBSIZE(1 << 15));
717 if (out_len != REPOPAGE_BLOBSIZE(1 << 15) && pnum < store->num_pages - 1)
718 {
719#ifdef DEBUG_PAGING
720 fprintf(stderrstderr, "can't decompress\n");
721#endif
722 return 0;
723 }
724#ifdef DEBUG_PAGING
725 fprintf(stderrstderr, " (expand %d to %d)", in_len, out_len);
726#endif
727 }
728#ifdef DEBUG_PAGING
729 fprintf(stderrstderr, "\n");
730#endif
731 }
732 store->mapped_at[pnum] = i * REPOPAGE_BLOBSIZE(1 << 15);
733 store->mapped[i] = pnum;
734 }
735 return store->blob_store + best * REPOPAGE_BLOBSIZE(1 << 15);
736}
737
738unsigned int
739repopagestore_compress_page(unsigned char *page, unsigned int len, unsigned char *cpage, unsigned int max)
740{
741 return compress_buf(page, len, cpage, max);
742}
743
744#define SOLV_ERROR_EOF3 3
745#define SOLV_ERROR_CORRUPT6 6
746
747static inline unsigned int
748read_u32(FILE *fp)
749{
750 int c, i;
751 unsigned int x = 0;
752
753 for (i = 0; i < 4; i++)
754 {
755 c = getc(fp)_IO_getc (fp);
756 if (c == EOF(-1))
757 return 0;
758 x = (x << 8) | c;
759 }
760 return x;
761}
762
763/* Try to either setup on-demand paging (using FP as backing
764 file), or in case that doesn't work (FP not seekable) slurps in
765 all pages and deactivates paging. */
766int
767repopagestore_read_or_setup_pages(Repopagestore *store, FILE *fp, unsigned int pagesz, unsigned int blobsz)
768{
769 unsigned int npages;
770 unsigned int i;
771 unsigned int can_seek;
772 unsigned int cur_page_ofs;
773 unsigned char buf[REPOPAGE_BLOBSIZE(1 << 15)];
774
775 if (pagesz != REPOPAGE_BLOBSIZE(1 << 15))
776 {
777 /* We could handle this by slurping in everything. */
778 return SOLV_ERROR_CORRUPT6;
779 }
780 can_seek = 1;
781 if ((store->file_offset = ftell(fp)) < 0)
782 can_seek = 0;
783 clearerr(fp);
784 if (can_seek)
785 store->pagefd = dup(fileno(fp));
786 if (store->pagefd == -1)
787 can_seek = 0;
788 else
789 fcntl(store->pagefd, F_SETFD2, FD_CLOEXEC1);
790
791#ifdef DEBUG_PAGING
792 fprintf(stderrstderr, "can %sseek\n", can_seek ? "" : "NOT ");
793#endif
794 npages = (blobsz + REPOPAGE_BLOBSIZE(1 << 15) - 1) / REPOPAGE_BLOBSIZE(1 << 15);
795
796 store->num_pages = npages;
797 store->mapped_at = solv_malloc2(npages, sizeof(*store->mapped_at));
798
799 /* If we can't seek on our input we have to slurp in everything.
800 * Otherwise set up file_pages containing offest/length of the
801 * pages */
802 if (can_seek)
803 store->file_pages = solv_malloc2(npages, sizeof(*store->file_pages));
804 else
805 store->blob_store = solv_malloc2(npages, REPOPAGE_BLOBSIZE(1 << 15));
806 cur_page_ofs = 0;
807 for (i = 0; i < npages; i++)
808 {
809 unsigned int in_len = read_u32(fp);
810 unsigned int compressed = in_len & 1;
811 in_len >>= 1;
812#ifdef DEBUG_PAGING
813 fprintf(stderrstderr, "page %d: len %d (%scompressed)\n",
814 i, in_len, compressed ? "" : "not ");
815#endif
816 if (can_seek)
817 {
818 Attrblobpage *p = store->file_pages + i;
819 cur_page_ofs += 4;
820 store->mapped_at[i] = -1; /* not mapped yet */
821 p->page_offset = cur_page_ofs;
822 p->page_size = in_len * 2 + compressed;
823 if (fseek(fp, in_len, SEEK_CUR1) < 0)
824 {
825 /* We can't fall back to non-seeking behaviour as we already
826 read over some data pages without storing them away. */
827 close(store->pagefd);
828 store->pagefd = -1;
829 return SOLV_ERROR_EOF3;
830 }
831 cur_page_ofs += in_len;
832 }
833 else
834 {
835 unsigned int out_len;
836 void *dest = store->blob_store + i * REPOPAGE_BLOBSIZE(1 << 15);
837 store->mapped_at[i] = i * REPOPAGE_BLOBSIZE(1 << 15);
838 /* We can't seek, so suck everything in. */
839 if (fread(compressed ? buf : dest, in_len, 1, fp) != 1)
840 {
841 perror("fread");
842 return SOLV_ERROR_EOF3;
843 }
844 if (compressed)
845 {
846 out_len = unchecked_decompress_buf(buf, in_len, dest, REPOPAGE_BLOBSIZE(1 << 15));
847 if (out_len != REPOPAGE_BLOBSIZE(1 << 15) && i < npages - 1)
848 {
849 return SOLV_ERROR_CORRUPT6;
850 }
851 }
852 }
853 }
854 return 0;
855}
856
857void
858repopagestore_disable_paging(Repopagestore *store)
859{
860 if (store->num_pages)
861 repopagestore_load_page_range(store, 0, store->num_pages - 1);
862}
863
864#ifdef STANDALONE
865
866static void
867transfer_file(FILE * from, FILE * to, int compress)
868{
869 unsigned char inb[BLOCK_SIZE(65536*1)];
870 unsigned char outb[BLOCK_SIZE(65536*1)];
871 while (!feof (from) && !ferror (from))
872 {
873 unsigned int in_len, out_len;
874 if (compress)
875 {
876 in_len = fread(inb, 1, BLOCK_SIZE(65536*1), from);
877 if (in_len)
878 {
879 unsigned char *b = outb;
880 out_len = compress_buf(inb, in_len, outb, sizeof (outb));
881 if (!out_len)
882 b = inb, out_len = in_len;
883 if (fwrite(&out_len, sizeof (out_len), 1, to) != 1)
884 {
885 perror("write size");
886 exit (1);
887 }
888 if (fwrite(b, out_len, 1, to) != 1)
889 {
890 perror("write data");
891 exit (1);
892 }
893 }
894 }
895 else
896 {
897 if (fread(&in_len, sizeof(in_len), 1, from) != 1)
898 {
899 if (feof(from))
900 return;
901 perror("can't read size");
902 exit(1);
903 }
904 if (fread(inb, in_len, 1, from) != 1)
905 {
906 perror("can't read data");
907 exit(1);
908 }
909 out_len =
910 unchecked_decompress_buf(inb, in_len, outb, sizeof(outb));
911 if (fwrite(outb, out_len, 1, to) != 1)
912 {
913 perror("can't write output");
914 exit(1);
915 }
916 }
917 }
918}
919
920/* Just for benchmarking purposes. */
921static void
922dumb_memcpy(void *dest, const void *src, unsigned int len)
923{
924 char *d = dest;
925 const char *s = src;
926 while (len--)
927 *d++ = *s++;
928}
929
930static void
931benchmark(FILE * from)
932{
933 unsigned char inb[BLOCK_SIZE(65536*1)];
934 unsigned char outb[BLOCK_SIZE(65536*1)];
935 unsigned int in_len = fread(inb, 1, BLOCK_SIZE(65536*1), from);
936 unsigned int out_len;
937 if (!in_len)
938 {
939 perror("can't read from input");
940 exit(1);
941 }
942
943 unsigned int calib_loop;
944 unsigned int per_loop;
945 unsigned int i, j;
946 clock_t start, end;
947 float seconds;
948
949#if 0
950 calib_loop = 1;
951 per_loop = 0;
952 start = clock();
953 while ((clock() - start) < CLOCKS_PER_SEC((__clock_t) 1000000) / 4)
954 {
955 calib_loop *= 2;
956 for (i = 0; i < calib_loop; i++)
957 dumb_memcpy(outb, inb, in_len);
958 per_loop += calib_loop;
959 }
960
961 fprintf(stderrstderr, "memcpy:\nCalibrated to %d iterations per loop\n",
962 per_loop);
963
964 start = clock();
965 for (i = 0; i < 10; i++)
966 for (j = 0; j < per_loop; j++)
967 dumb_memcpy(outb, inb, in_len);
968 end = clock();
969 seconds = (end - start) / (float) CLOCKS_PER_SEC((__clock_t) 1000000);
970 fprintf(stderrstderr, "%.2f seconds == %.2f MB/s\n", seconds,
971 ((long long) in_len * per_loop * 10) / (1024 * 1024 * seconds));
972#endif
973
974 calib_loop = 1;
975 per_loop = 0;
976 start = clock();
977 while ((clock() - start) < CLOCKS_PER_SEC((__clock_t) 1000000) / 4)
978 {
979 calib_loop *= 2;
980 for (i = 0; i < calib_loop; i++)
981 compress_buf(inb, in_len, outb, sizeof(outb));
982 per_loop += calib_loop;
983 }
984
985 fprintf(stderrstderr, "compression:\nCalibrated to %d iterations per loop\n",
986 per_loop);
987
988 start = clock();
989 for (i = 0; i < 10; i++)
990 for (j = 0; j < per_loop; j++)
991 compress_buf(inb, in_len, outb, sizeof(outb));
992 end = clock();
993 seconds = (end - start) / (float) CLOCKS_PER_SEC((__clock_t) 1000000);
994 fprintf(stderrstderr, "%.2f seconds == %.2f MB/s\n", seconds,
995 ((long long) in_len * per_loop * 10) / (1024 * 1024 * seconds));
996
997 out_len = compress_buf(inb, in_len, outb, sizeof(outb));
998
999 calib_loop = 1;
1000 per_loop = 0;
1001 start = clock();
1002 while ((clock() - start) < CLOCKS_PER_SEC((__clock_t) 1000000) / 4)
1003 {
1004 calib_loop *= 2;
1005 for (i = 0; i < calib_loop; i++)
1006 unchecked_decompress_buf(outb, out_len, inb, sizeof(inb));
1007 per_loop += calib_loop;
1008 }
1009
1010 fprintf(stderrstderr, "decompression:\nCalibrated to %d iterations per loop\n",
1011 per_loop);
1012
1013 start = clock();
1014 for (i = 0; i < 10; i++)
1015 for (j = 0; j < per_loop; j++)
1016 unchecked_decompress_buf(outb, out_len, inb, sizeof(inb));
1017 end = clock();
1018 seconds = (end - start) / (float) CLOCKS_PER_SEC((__clock_t) 1000000);
1019 fprintf(stderrstderr, "%.2f seconds == %.2f MB/s\n", seconds,
1020 ((long long) in_len * per_loop * 10) / (1024 * 1024 * seconds));
1021}
1022
1023int
1024main(int argc, char *argv[])
1025{
1026 int compress = 1;
1027 if (argc > 1 && !strcmp(argv[1], "-d"))
1028 compress = 0;
1029 if (argc > 1 && !strcmp(argv[1], "-b"))
1030 benchmark(stdinstdin);
1031 else
1032 transfer_file(stdinstdin, stdoutstdout, compress);
1033 return 0;
1034}
1035
1036#endif
1037