/home/brain/Projects/upstream/libsolv/src/repopage.c

Bug Summary

File:	src/repopage.c
Warning:	line 402, column 7 Value stored to 'litofs' is never read

Annotated Source Code

1	/*
2	* Copyright (c) 2007-2012, Novell Inc.
3	*
4	* This program is licensed under the BSD license, read LICENSE.BSD
5	* for further information
6	*/
7
8	/*
9	* repopage.c
10	*
11	* Paging and compression functions for the vertical repository data.
12	* Vertical data is grouped by key, normal data is grouped by solvable.
13	* This makes searching for a string in vertical data fast as there's
14	* no need to skip over data if keys we're not interested in.
15	*
16	* The vertical data is split into pages, each page is compressed with a fast
17	* compression algorithm. These pages are read in on demand, not recently used
18	* pages automatically get dropped.
19	*/
20
21	#define _XOPEN_SOURCE500 500
22
23	#include <sys/types.h>
24	#include <stdint.h>
25	#include <stdio.h>
26	#include <stdlib.h>
27	#include <string.h>
28	#include <unistd.h>
29	#include <assert.h>
30	#include <fcntl.h>
31	#include <time.h>
32
33	#include "repo.h"
34	#include "repopage.h"
35
36
37
38	#define BLOCK_SIZE(655361) (655361)
39	#if BLOCK_SIZE(65536*1) <= 65536
40	typedef uint16_t Ref;
41	#else
42	typedef uint32_t Ref;
43	#endif
44
45	/*
46	The format is tailored for fast decompression (i.e. only byte based),
47	and skewed to ASCII content (highest bit often not set):
48
49	a 0LLLLLLL
50	- self-describing ASCII character hex L
51	b 100lllll <l+1 bytes>
52	- literal run of length l+1
53	c 101oolll <8o>
54	- back ref of length l+2, at offset -(o+1) (o < 1 << 10)
55	d 110lllll <8o>
56	- back ref of length l+2+8, at offset -(o+1) (o < 1 << 8)
57	e 1110llll <8o> <8o>
58	- back ref of length l+3, at offset -(o+1) (o < 1 << 16)
59	f1 1111llll <8l> <8o> <8o>
60	- back ref, length l+19 (l < 1<<12), offset -(o+1) (o < 1<<16)
61	f2 11110lll <8l> <8o> <8o>
62	- back ref, length l+19 (l < 1<<11), offset -(o+1) (o < 1<<16)
63	g 11111lll <8l> <8o> <8o> <8o>
64	- back ref, length l+5 (l < 1<<11), offset -(o+1) (o < 1<<24)
65
66	Generally for a literal of length L we need L+1 bytes, hence it is
67	better to encode also very short backrefs (2 chars) as backrefs if
68	their offset is small, as that only needs two bytes. Except if we
69	already have a literal run, in that case it's better to append there,
70	instead of breaking it for a backref. So given a potential backref
71	at offset O, length L the strategy is as follows:
72
73	L < 2 : encode as 1-literal
74	L == 2, O > 1024 : encode as 1-literal
75	L == 2, have already literals: encode as 1-literal
76	O = O - 1
77	L >= 2, L <= 9, O < 1024 : encode as c
78	L >= 10, L <= 41, O < 256 : encode as d
79	else we have either O >= 1024, or L >= 42:
80	L < 3 : encode as 1-literal
81	L >= 3, L <= 18, O < 65536 : encode as e
82	L >= 19, L <= 4095+18, O < 65536 : encode as f
83	else we have either L >= 4096+18 or O >= 65536.
84	O >= 65536: encode as 1-literal, too bad
85	(with the current block size this can't happen)
86	L >= 4096+18, so reduce to 4095+18 : encode as f
87	*/
88
89
90	static unsigned int
91	compress_buf(const unsigned char *in, unsigned int in_len,
92	unsigned char *out, unsigned int out_len)
93	{
94	unsigned int oo = 0; /* out-offset */
95	unsigned int io = 0; /* in-offset */
96	#define HS(65536) (65536)
97	Ref htab[HS(65536)];
98	Ref hnext[BLOCK_SIZE(65536*1)];
99	unsigned int litofs = 0;
100	memset(htab, -1, sizeof (htab));
101	memset(hnext, -1, sizeof (hnext));
102	while (io + 2 < in_len)
103	{
104	/* Search for a match of the string starting at IN, we have at
105	least three characters. */
106	unsigned int hval = in[io] \| in[io + 1] << 8 \| in[io + 2] << 16;
107	unsigned int try, mlen, mofs, tries;
108	hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
109	hval = hval & (HS(65536) - 1);
110	try = htab[hval];
111	hnext[io] = htab[hval];
112	htab[hval] = io;
113	mlen = 0;
114	mofs = 0;
115
116	for (tries = 0; try != -1 && tries < 12; tries++)
117	{
118	if (try < io
119	&& in[try] == in[io] && in[try + 1] == in[io + 1])
120	{
121	mlen = 2;
122	mofs = (io - try) - 1;
123	break;
124	}
125	try = hnext[try];
126	}
127	for (; try != -1 && tries < 12; tries++)
128	{
129	/* assert(mlen >= 2); */
130	/* assert(io + mlen < in_len); */
131	/* Try a match starting from [io] with the strings at [try].
132	That's only sensible if TRY actually is before IO (can happen
133	with uninit hash table). If we have a previous match already
134	we're only going to take the new one if it's longer, hence
135	check the potentially last character. */
136	if (try < io && in[try + mlen] == in[io + mlen])
137	{
138	unsigned int this_len, this_ofs;
139	if (memcmp(in + try, in + io, mlen))
140	goto no_match;
141	this_len = mlen + 1;
142	/* Now try extending the match by more characters. */
143	for (;
144	io + this_len < in_len
145	&& in[try + this_len] == in[io + this_len]; this_len++)
146	;
147	#if 0
148	unsigned int testi;
149	for (testi = 0; testi < this_len; testi++)
150	assert(in[try + testi] == in[io + testi])({ if (in[try + testi] == in[io + testi]) ; else __assert_fail ("in[try + testi] == in[io + testi]", "/home/brain/Projects/upstream/libsolv/src/repopage.c" , 150, __PRETTY_FUNCTION__); });
151	#endif
152	this_ofs = (io - try) - 1;
153	/*if (this_ofs > 65535)
154	goto no_match; */
155	#if 0
156	assert(this_len >= 2)({ if (this_len >= 2) ; else __assert_fail ("this_len >= 2" , "/home/brain/Projects/upstream/libsolv/src/repopage.c", 156 , __PRETTY_FUNCTION__); });
157	assert(this_len >= mlen)({ if (this_len >= mlen) ; else __assert_fail ("this_len >= mlen" , "/home/brain/Projects/upstream/libsolv/src/repopage.c", 157 , __PRETTY_FUNCTION__); });
158	assert(this_len > mlen \|\| (this_len == mlen && this_ofs > mofs))({ if (this_len > mlen \|\| (this_len == mlen && this_ofs > mofs)) ; else __assert_fail ("this_len > mlen \|\| (this_len == mlen && this_ofs > mofs)" , "/home/brain/Projects/upstream/libsolv/src/repopage.c", 158 , __PRETTY_FUNCTION__); });
159	#endif
160	mlen = this_len, mofs = this_ofs;
161	/* If our match extends up to the end of input, no next
162	match can become better. This is not just an
163	optimization, it establishes a loop invariant
164	(io + mlen < in_len). */
165	if (io + mlen >= in_len)
166	goto match_done;
167	}
168	no_match:
169	try = hnext[try];
170	/*if (io - try - 1 >= 65536)
171	break;*/
172	}
173
174	match_done:
175	if (mlen)
176	{
177	/fprintf(stderr, "%d %d\n", mlen, mofs);/
178	if (mlen == 2 && (litofs \|\| mofs >= 1024))
179	mlen = 0;
180	/*else if (mofs >= 65536)
181	mlen = 0;*/
182	else if (mofs >= 65536)
183	{
184	if (mlen >= 2048 + 5)
185	mlen = 2047 + 5;
186	else if (mlen < 5)
187	mlen = 0;
188	}
189	else if (mlen < 3)
190	mlen = 0;
191	/*else if (mlen >= 4096 + 19)
192	mlen = 4095 + 19;*/
193	else if (mlen >= 2048 + 19)
194	mlen = 2047 + 19;
195	/* Skip this match if the next character would deliver a better one,
196	but only do this if we have the chance to really extend the
197	length (i.e. our current length isn't yet the (conservative)
198	maximum). */
199	if (mlen && mlen < (2048 + 5) && io + 3 < in_len)
200	{
201	unsigned int hval =
202	in[io + 1] \| in[io + 2] << 8 \| in[io + 3] << 16;
203	unsigned int try;
204	hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
205	hval = hval & (HS(65536) - 1);
206	try = htab[hval];
207	if (try < io + 1
208	&& in[try] == in[io + 1] && in[try + 1] == in[io + 2])
209	{
210	unsigned int this_len;
211	this_len = 2;
212	for (;
213	io + 1 + this_len < in_len
214	&& in[try + this_len] == in[io + 1 + this_len];
215	this_len++)
216	;
217	if (this_len >= mlen)
218	mlen = 0;
219	}
220	}
221	}
222	if (!mlen)
223	{
224	if (!litofs)
225	litofs = io + 1;
226	io++;
227	}
228	else
229	{
230	if (litofs)
231	{
232	unsigned litlen;
233	litofs--;
234	litlen = io - litofs;
235	/* fprintf(stderr, "lit: %d\n", litlen); */
236	while (litlen)
237	{
238	unsigned int easy_sz;
239	/* Emit everything we can as self-describers. As soon as
240	we hit a byte we can't emit as such we're going to emit
241	a length descriptor anyway, so we can as well include
242	bytes < 0x80 which might follow afterwards in that run. */
243	for (easy_sz = 0;
244	easy_sz < litlen && in[litofs + easy_sz] < 0x80;
245	easy_sz++)
246	;
247	if (easy_sz)
248	{
249	if (oo + easy_sz >= out_len)
250	return 0;
251	memcpy(out + oo, in + litofs, easy_sz);
252	litofs += easy_sz;
253	oo += easy_sz;
254	litlen -= easy_sz;
255	if (!litlen)
256	break;
257	}
258	if (litlen <= 32)
259	{
260	if (oo + 1 + litlen >= out_len)
261	return 0;
262	out[oo++] = 0x80 \| (litlen - 1);
263	while (litlen--)
264	out[oo++] = in[litofs++];
265	break;
266	}
267	else
268	{
269	/* Literal length > 32, so chunk it. */
270	if (oo + 1 + 32 >= out_len)
271	return 0;
272	out[oo++] = 0x80 \| 31;
273	memcpy(out + oo, in + litofs, 32);
274	oo += 32;
275	litofs += 32;
276	litlen -= 32;
277	}
278	}
279	litofs = 0;
280	}
281
282	/* fprintf(stderr, "ref: %d @ %d\n", mlen, mofs); */
283
284	if (mlen >= 2 && mlen <= 9 && mofs < 1024)
285	{
286	if (oo + 2 >= out_len)
287	return 0;
288	out[oo++] = 0xa0 \| ((mofs & 0x300) >> 5) \| (mlen - 2);
289	out[oo++] = mofs & 0xff;
290	}
291	else if (mlen >= 10 && mlen <= 41 && mofs < 256)
292	{
293	if (oo + 2 >= out_len)
294	return 0;
295	out[oo++] = 0xc0 \| (mlen - 10);
296	out[oo++] = mofs;
297	}
298	else if (mofs >= 65536)
299	{
300	assert(mlen >= 5 && mlen < 2048 + 5)({ if (mlen >= 5 && mlen < 2048 + 5) ; else __assert_fail ("mlen >= 5 && mlen < 2048 + 5", "/home/brain/Projects/upstream/libsolv/src/repopage.c" , 300, __PRETTY_FUNCTION__); });
301	if (oo + 5 >= out_len)
302	return 0;
303	out[oo++] = 0xf8 \| ((mlen - 5) >> 8);
304	out[oo++] = (mlen - 5) & 0xff;
305	out[oo++] = mofs & 0xff;
306	out[oo++] = (mofs >> 8) & 0xff;
307	out[oo++] = mofs >> 16;
308	}
309	else if (mlen >= 3 && mlen <= 18)
310	{
311	assert(mofs < 65536)({ if (mofs < 65536) ; else __assert_fail ("mofs < 65536" , "/home/brain/Projects/upstream/libsolv/src/repopage.c", 311 , __PRETTY_FUNCTION__); });
312	if (oo + 3 >= out_len)
313	return 0;
314	out[oo++] = 0xe0 \| (mlen - 3);
315	out[oo++] = mofs & 0xff;
316	out[oo++] = mofs >> 8;
317	}
318	else
319	{
320	assert(mlen >= 19 && mlen <= 4095 + 19 && mofs < 65536)({ if (mlen >= 19 && mlen <= 4095 + 19 && mofs < 65536) ; else __assert_fail ("mlen >= 19 && mlen <= 4095 + 19 && mofs < 65536" , "/home/brain/Projects/upstream/libsolv/src/repopage.c", 320 , __PRETTY_FUNCTION__); });
321	if (oo + 4 >= out_len)
322	return 0;
323	out[oo++] = 0xf0 \| ((mlen - 19) >> 8);
324	out[oo++] = (mlen - 19) & 0xff;
325	out[oo++] = mofs & 0xff;
326	out[oo++] = mofs >> 8;
327	}
328	/* Insert the hashes for the compressed run [io..io+mlen-1].
329	For [io] we have it already done at the start of the loop.
330	So it's from [io+1..io+mlen-1], and we need three chars per
331	hash, so the accessed characters will be [io+1..io+mlen-1+2],
332	ergo io+mlen+1 < in_len. */
333	mlen--;
334	io++;
335	while (mlen--)
336	{
337	if (io + 2 < in_len)
338	{
339	unsigned int hval =
340	in[io] \| in[io + 1] << 8 \| in[io + 2] << 16;
341	hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
342	hval = hval & (HS(65536) - 1);
343	hnext[io] = htab[hval];
344	htab[hval] = io;
345	}
346	io++;
347	};
348	}
349	}
350	/* We might have some characters left. */
351	if (io < in_len && !litofs)
352	litofs = io + 1;
353	io = in_len;
354	if (litofs)
355	{
356	unsigned litlen;
357	litofs--;
358	litlen = io - litofs;
359	/* fprintf(stderr, "lit: %d\n", litlen); */
360	while (litlen)
361	{
362	unsigned int easy_sz;
363	/* Emit everything we can as self-describers. As soon as we hit a
364	byte we can't emit as such we're going to emit a length
365	descriptor anyway, so we can as well include bytes < 0x80 which
366	might follow afterwards in that run. */
367	for (easy_sz = 0; easy_sz < litlen && in[litofs + easy_sz] < 0x80;
368	easy_sz++)
369	;
370	if (easy_sz)
371	{
372	if (oo + easy_sz >= out_len)
373	return 0;
374	memcpy(out + oo, in + litofs, easy_sz);
375	litofs += easy_sz;
376	oo += easy_sz;
377	litlen -= easy_sz;
378	if (!litlen)
379	break;
380	}
381	if (litlen <= 32)
382	{
383	if (oo + 1 + litlen >= out_len)
384	return 0;
385	out[oo++] = 0x80 \| (litlen - 1);
386	while (litlen--)
387	out[oo++] = in[litofs++];
388	break;
389	}
390	else
391	{
392	/* Literal length > 32, so chunk it. */
393	if (oo + 1 + 32 >= out_len)
394	return 0;
395	out[oo++] = 0x80 \| 31;
396	memcpy(out + oo, in + litofs, 32);
397	oo += 32;
398	litofs += 32;
399	litlen -= 32;
400	}
401	}
402	litofs = 0;
	Value stored to 'litofs' is never read
403	}
404	return oo;
405	}
406
407	static unsigned int
408	unchecked_decompress_buf(const unsigned char *in, unsigned int in_len,
409	unsigned char *out,
410	unsigned int out_len __attribute__((unused)))
411	{
412	unsigned char *orig_out = out;
413	const unsigned char *in_end = in + in_len;
414	while (in < in_end)
415	{
416	unsigned int first = *in++;
417	int o;
418	switch (first >> 4)
419	{
420	default:
421	/* This default case can't happen, but GCCs VRP is not strong
422	enough to see this, so make this explicitely not fall to
423	the end of the switch, so that we don't have to initialize
424	o above. */
425	continue;
426	case 0: case 1:
427	case 2: case 3:
428	case 4: case 5:
429	case 6: case 7:
430	/* a 0LLLLLLL */
431	/* fprintf (stderr, "lit: 1\n"); */
432	*out++ = first;
433	continue;
434	case 8: case 9:
435	/* b 100lllll <l+1 bytes> */
436	{
437	unsigned int l = first & 31;
438	/* fprintf (stderr, "lit: %d\n", l); */
439	do
440	out++ = in++;
441	while (l--);
442	continue;
443	}
444	case 10: case 11:
445	/* c 101oolll <8o> */
446	{
447	o = first & (3 << 3);
448	o = (o << 5) \| *in++;
449	first = (first & 7) + 2;
450	break;
451	}
452	case 12: case 13:
453	/* d 110lllll <8o> */
454	{
455	o = *in++;
456	first = (first & 31) + 10;
457	break;
458	}
459	case 14:
460	/* e 1110llll <8o> <8o> */
461	{
462	o = in[0] \| (in[1] << 8);
463	in += 2;
464	first = first & 31;
465	first += 3;
466	break;
467	}
468	case 15:
469	/* f1 1111llll <8o> <8o> <8l> */
470	/* f2 11110lll <8o> <8o> <8l> */
471	/* g 11111lll <8o> <8o> <8o> <8l> */
472	{
473	first = first & 15;
474	if (first >= 8)
475	{
476	first = (((first - 8) << 8) \| in[0]) + 5;
477	o = in[1] \| (in[2] << 8) \| (in[3] << 16);
478	in += 4;
479	}
480	else
481	{
482	first = ((first << 8) \| in[0]) + 19;
483	o = in[1] \| (in[2] << 8);
484	in += 3;
485	}
486	break;
487	}
488	}
489	/* fprintf(stderr, "ref: %d @ %d\n", first, o); */
490	o++;
491	o = -o;
492	#if 0
493	/* We know that first will not be zero, and this loop structure is
494	better optimizable. */
495	do
496	{
497	out = (out - o);
498	out++;
499	}
500	while (--first);
501	#else
502	switch (first)
503	{
504	case 18: out = (out + o); out++;
505	case 17: out = (out + o); out++;
506	case 16: out = (out + o); out++;
507	case 15: out = (out + o); out++;
508	case 14: out = (out + o); out++;
509	case 13: out = (out + o); out++;
510	case 12: out = (out + o); out++;
511	case 11: out = (out + o); out++;
512	case 10: out = (out + o); out++;
513	case 9: out = (out + o); out++;
514	case 8: out = (out + o); out++;
515	case 7: out = (out + o); out++;
516	case 6: out = (out + o); out++;
517	case 5: out = (out + o); out++;
518	case 4: out = (out + o); out++;
519	case 3: out = (out + o); out++;
520	case 2: out = (out + o); out++;
521	case 1: out = (out + o); out++;
522	case 0: break;
523	default:
524	/* Duff duff :-) */
525	switch (first & 15)
526	{
527	do
528	{
529	case 0: out = (out + o); out++;
530	case 15: out = (out + o); out++;
531	case 14: out = (out + o); out++;
532	case 13: out = (out + o); out++;
533	case 12: out = (out + o); out++;
534	case 11: out = (out + o); out++;
535	case 10: out = (out + o); out++;
536	case 9: out = (out + o); out++;
537	case 8: out = (out + o); out++;
538	case 7: out = (out + o); out++;
539	case 6: out = (out + o); out++;
540	case 5: out = (out + o); out++;
541	case 4: out = (out + o); out++;
542	case 3: out = (out + o); out++;
543	case 2: out = (out + o); out++;
544	case 1: out = (out + o); out++;
545	}
546	while ((int)(first -= 16) > 0);
547	}
548	break;
549	}
550	#endif
551	}
552	return out - orig_out;
553	}
554
555	/**********************************************************************/
556
557	void repopagestore_init(Repopagestore *store)
558	{
559	memset(store, 0, sizeof(*store));
560	store->pagefd = -1;
561	}
562
563	void repopagestore_free(Repopagestore *store)
564	{
565	store->blob_store = solv_free(store->blob_store);
566	store->file_pages = solv_free(store->file_pages);
567	store->mapped_at = solv_free(store->mapped_at);
568	store->mapped = solv_free(store->mapped);
569	if (store->pagefd != -1)
570	close(store->pagefd);
571	store->pagefd = -1;
572	}
573
574
575	/**********************************************************************/
576
577	unsigned char *
578	repopagestore_load_page_range(Repopagestore *store, unsigned int pstart, unsigned int pend)
579	{
580	/* Make sure all pages from PSTART to PEND (inclusive) are loaded,
581	and are consecutive. Return a pointer to the mapping of PSTART. */
582	unsigned char buf[REPOPAGE_BLOBSIZE(1 << 15)];
583	unsigned int i, best, pnum;
584
585	if (pstart == pend)
586	{
587	/* Quick check in case the requested page is already mapped */
588	if (store->mapped_at[pstart] != -1)
589	return store->blob_store + store->mapped_at[pstart];
590	}
591	else
592	{
593	/* Quick check in case all pages are already mapped and consecutive. */
594	for (pnum = pstart; pnum <= pend; pnum++)
595	if (store->mapped_at[pnum] == -1
596	\|\| (pnum > pstart
597	&& store->mapped_at[pnum]
598	!= store->mapped_at[pnum-1] + REPOPAGE_BLOBSIZE(1 << 15)))
599	break;
600	if (pnum > pend)
601	return store->blob_store + store->mapped_at[pstart];
602	}
603
604	if (store->pagefd == -1 \|\| !store->file_pages)
605	return 0; /* no backing file */
606
607	#ifdef DEBUG_PAGING
608	fprintf(stderrstderr, "PAGE: want %d pages starting at %d\n", pend - pstart + 1, pstart);
609	#endif
610
611	/* Ensure that we can map the numbers of pages we need at all. */
612	if (pend - pstart + 1 > store->nmapped)
613	{
614	unsigned int oldcan = store->nmapped;
615	store->nmapped = pend - pstart + 1;
616	if (store->nmapped < 4)
617	store->nmapped = 4;
618	store->mapped = solv_realloc2(store->mapped, store->nmapped, sizeof(store->mapped[0]));
619	for (i = oldcan; i < store->nmapped; i++)
620	store->mapped[i] = -1;
621	store->blob_store = solv_realloc2(store->blob_store, store->nmapped, REPOPAGE_BLOBSIZE(1 << 15));
622	#ifdef DEBUG_PAGING
623	fprintf(stderrstderr, "PAGE: can map %d pages\n", store->nmapped);
624	#endif
625	}
626
627	if (store->mapped_at[pstart] != -1)
628	{
629	/* assume forward search */
630	best = store->mapped_at[pstart] / REPOPAGE_BLOBSIZE(1 << 15);
631	if (best + (pend - pstart) >= store->nmapped)
632	best = 0;
633	}
634	else if (store->mapped_at[pend] != -1)
635	{
636	/* assume backward search */
637	best = store->mapped_at[pend] / REPOPAGE_BLOBSIZE(1 << 15);
638	if (best < pend - pstart)
639	best = store->nmapped - 1;
640	best -= pend - pstart;
641	}
642	else
643	{
644	/* choose some "random" location to avoid thrashing */
645	best = (pstart + store->rr_counter++) % (store->nmapped - pend + pstart);
646	}
647
648	/* So we want to map our pages from [best] to [best+pend-pstart].
649	Use a very simple strategy, which doesn't make the best use of
650	our resources, but works. Throw away all pages in that range
651	(even ours) then copy around ours or read them in. */
652	for (i = best, pnum = pstart; pnum <= pend; i++, pnum++)
653	{
654	unsigned int pnum_mapped_at;
655	unsigned int oldpnum = store->mapped[i];
656	if (oldpnum != -1)
657	{
658	if (oldpnum == pnum)
659	continue; /* already have the correct page */
660	/* Evict this page. */
661	#ifdef DEBUG_PAGING
662	fprintf(stderrstderr, "PAGE: evict page %d from %d\n", oldpnum, i);
663	#endif
664	store->mapped[i] = -1;
665	store->mapped_at[oldpnum] = -1;
666	}
667	/* check if we can copy the correct content (before it gets evicted) */
668	pnum_mapped_at = store->mapped_at[pnum];
669	if (pnum_mapped_at != -1 && pnum_mapped_at != i * REPOPAGE_BLOBSIZE(1 << 15))
670	{
671	void dest = store->blob_store + i REPOPAGE_BLOBSIZE(1 << 15);
672	#ifdef DEBUG_PAGING
673	fprintf(stderrstderr, "PAGECOPY: %d from %d to %d\n", pnum, pnum_mapped_at / REPOPAGE_BLOBSIZE(1 << 15), i);
674	#endif
675	memcpy(dest, store->blob_store + pnum_mapped_at, REPOPAGE_BLOBSIZE(1 << 15));
676	store->mapped[pnum_mapped_at / REPOPAGE_BLOBSIZE(1 << 15)] = -1;
677	store->mapped[i] = pnum;
678	store->mapped_at[pnum] = i * REPOPAGE_BLOBSIZE(1 << 15);
679	}
680	}
681
682	/* Everything is free now. Read in or copy the pages we want. */
683	for (i = best, pnum = pstart; pnum <= pend; i++, pnum++)
684	{
685	void dest = store->blob_store + i REPOPAGE_BLOBSIZE(1 << 15);
686	if (store->mapped_at[pnum] != -1)
687	{
688	unsigned int pnum_mapped_at = store->mapped_at[pnum];
689	if (pnum_mapped_at != i * REPOPAGE_BLOBSIZE(1 << 15))
690	{
691	#ifdef DEBUG_PAGING
692	fprintf(stderrstderr, "PAGECOPY: %d from %d to %d\n", pnum, pnum_mapped_at / REPOPAGE_BLOBSIZE(1 << 15), i);
693	#endif
694	/* Still mapped somewhere else, so just copy it from there. */
695	memcpy(dest, store->blob_store + pnum_mapped_at, REPOPAGE_BLOBSIZE(1 << 15));
696	store->mapped[pnum_mapped_at / REPOPAGE_BLOBSIZE(1 << 15)] = -1;
697	}
698	}
699	else
700	{
701	Attrblobpage *p = store->file_pages + pnum;
702	unsigned int in_len = p->page_size;
703	unsigned int compressed = in_len & 1;
704	in_len >>= 1;
705	#ifdef DEBUG_PAGING
706	fprintf(stderrstderr, "PAGEIN: %d to %d", pnum, i);
707	#endif
708	if (pread(store->pagefd, compressed ? buf : dest, in_len, store->file_offset + p->page_offset) != in_len)
709	{
710	perror("mapping pread");
711	return 0;
712	}
713	if (compressed)
714	{
715	unsigned int out_len;
716	out_len = unchecked_decompress_buf(buf, in_len, dest, REPOPAGE_BLOBSIZE(1 << 15));
717	if (out_len != REPOPAGE_BLOBSIZE(1 << 15) && pnum < store->num_pages - 1)
718	{
719	#ifdef DEBUG_PAGING
720	fprintf(stderrstderr, "can't decompress\n");
721	#endif
722	return 0;
723	}
724	#ifdef DEBUG_PAGING
725	fprintf(stderrstderr, " (expand %d to %d)", in_len, out_len);
726	#endif
727	}
728	#ifdef DEBUG_PAGING
729	fprintf(stderrstderr, "\n");
730	#endif
731	}
732	store->mapped_at[pnum] = i * REPOPAGE_BLOBSIZE(1 << 15);
733	store->mapped[i] = pnum;
734	}
735	return store->blob_store + best * REPOPAGE_BLOBSIZE(1 << 15);
736	}
737
738	unsigned int
739	repopagestore_compress_page(unsigned char page, unsigned int len, unsigned char cpage, unsigned int max)
740	{
741	return compress_buf(page, len, cpage, max);
742	}
743
744	#define SOLV_ERROR_EOF3 3
745	#define SOLV_ERROR_CORRUPT6 6
746
747	static inline unsigned int
748	read_u32(FILE *fp)
749	{
750	int c, i;
751	unsigned int x = 0;
752
753	for (i = 0; i < 4; i++)
754	{
755	c = getc(fp)_IO_getc (fp);
756	if (c == EOF(-1))
757	return 0;
758	x = (x << 8) \| c;
759	}
760	return x;
761	}
762
763	/* Try to either setup on-demand paging (using FP as backing
764	file), or in case that doesn't work (FP not seekable) slurps in
765	all pages and deactivates paging. */
766	int
767	repopagestore_read_or_setup_pages(Repopagestore store, FILE fp, unsigned int pagesz, unsigned int blobsz)
768	{
769	unsigned int npages;
770	unsigned int i;
771	unsigned int can_seek;
772	unsigned int cur_page_ofs;
773	unsigned char buf[REPOPAGE_BLOBSIZE(1 << 15)];
774
775	if (pagesz != REPOPAGE_BLOBSIZE(1 << 15))
776	{
777	/* We could handle this by slurping in everything. */
778	return SOLV_ERROR_CORRUPT6;
779	}
780	can_seek = 1;
781	if ((store->file_offset = ftell(fp)) < 0)
782	can_seek = 0;
783	clearerr(fp);
784	if (can_seek)
785	store->pagefd = dup(fileno(fp));
786	if (store->pagefd == -1)
787	can_seek = 0;
788	else
789	fcntl(store->pagefd, F_SETFD2, FD_CLOEXEC1);
790
791	#ifdef DEBUG_PAGING
792	fprintf(stderrstderr, "can %sseek\n", can_seek ? "" : "NOT ");
793	#endif
794	npages = (blobsz + REPOPAGE_BLOBSIZE(1 << 15) - 1) / REPOPAGE_BLOBSIZE(1 << 15);
795
796	store->num_pages = npages;
797	store->mapped_at = solv_malloc2(npages, sizeof(*store->mapped_at));
798
799	/* If we can't seek on our input we have to slurp in everything.
800	* Otherwise set up file_pages containing offest/length of the
801	* pages */
802	if (can_seek)
803	store->file_pages = solv_malloc2(npages, sizeof(*store->file_pages));
804	else
805	store->blob_store = solv_malloc2(npages, REPOPAGE_BLOBSIZE(1 << 15));
806	cur_page_ofs = 0;
807	for (i = 0; i < npages; i++)
808	{
809	unsigned int in_len = read_u32(fp);
810	unsigned int compressed = in_len & 1;
811	in_len >>= 1;
812	#ifdef DEBUG_PAGING
813	fprintf(stderrstderr, "page %d: len %d (%scompressed)\n",
814	i, in_len, compressed ? "" : "not ");
815	#endif
816	if (can_seek)
817	{
818	Attrblobpage *p = store->file_pages + i;
819	cur_page_ofs += 4;
820	store->mapped_at[i] = -1; /* not mapped yet */
821	p->page_offset = cur_page_ofs;
822	p->page_size = in_len * 2 + compressed;
823	if (fseek(fp, in_len, SEEK_CUR1) < 0)
824	{
825	/* We can't fall back to non-seeking behaviour as we already
826	read over some data pages without storing them away. */
827	close(store->pagefd);
828	store->pagefd = -1;
829	return SOLV_ERROR_EOF3;
830	}
831	cur_page_ofs += in_len;
832	}
833	else
834	{
835	unsigned int out_len;
836	void dest = store->blob_store + i REPOPAGE_BLOBSIZE(1 << 15);
837	store->mapped_at[i] = i * REPOPAGE_BLOBSIZE(1 << 15);
838	/* We can't seek, so suck everything in. */
839	if (fread(compressed ? buf : dest, in_len, 1, fp) != 1)
840	{
841	perror("fread");
842	return SOLV_ERROR_EOF3;
843	}
844	if (compressed)
845	{
846	out_len = unchecked_decompress_buf(buf, in_len, dest, REPOPAGE_BLOBSIZE(1 << 15));
847	if (out_len != REPOPAGE_BLOBSIZE(1 << 15) && i < npages - 1)
848	{
849	return SOLV_ERROR_CORRUPT6;
850	}
851	}
852	}
853	}
854	return 0;
855	}
856
857	void
858	repopagestore_disable_paging(Repopagestore *store)
859	{
860	if (store->num_pages)
861	repopagestore_load_page_range(store, 0, store->num_pages - 1);
862	}
863
864	#ifdef STANDALONE
865
866	static void
867	transfer_file(FILE * from, FILE * to, int compress)
868	{
869	unsigned char inb[BLOCK_SIZE(65536*1)];
870	unsigned char outb[BLOCK_SIZE(65536*1)];
871	while (!feof (from) && !ferror (from))
872	{
873	unsigned int in_len, out_len;
874	if (compress)
875	{
876	in_len = fread(inb, 1, BLOCK_SIZE(65536*1), from);
877	if (in_len)
878	{
879	unsigned char *b = outb;
880	out_len = compress_buf(inb, in_len, outb, sizeof (outb));
881	if (!out_len)
882	b = inb, out_len = in_len;
883	if (fwrite(&out_len, sizeof (out_len), 1, to) != 1)
884	{
885	perror("write size");
886	exit (1);
887	}
888	if (fwrite(b, out_len, 1, to) != 1)
889	{
890	perror("write data");
891	exit (1);
892	}
893	}
894	}
895	else
896	{
897	if (fread(&in_len, sizeof(in_len), 1, from) != 1)
898	{
899	if (feof(from))
900	return;
901	perror("can't read size");
902	exit(1);
903	}
904	if (fread(inb, in_len, 1, from) != 1)
905	{
906	perror("can't read data");
907	exit(1);
908	}
909	out_len =
910	unchecked_decompress_buf(inb, in_len, outb, sizeof(outb));
911	if (fwrite(outb, out_len, 1, to) != 1)
912	{
913	perror("can't write output");
914	exit(1);
915	}
916	}
917	}
918	}
919
920	/* Just for benchmarking purposes. */
921	static void
922	dumb_memcpy(void dest, const void src, unsigned int len)
923	{
924	char *d = dest;
925	const char *s = src;
926	while (len--)
927	d++ = s++;
928	}
929
930	static void
931	benchmark(FILE * from)
932	{
933	unsigned char inb[BLOCK_SIZE(65536*1)];
934	unsigned char outb[BLOCK_SIZE(65536*1)];
935	unsigned int in_len = fread(inb, 1, BLOCK_SIZE(65536*1), from);
936	unsigned int out_len;
937	if (!in_len)
938	{
939	perror("can't read from input");
940	exit(1);
941	}
942
943	unsigned int calib_loop;
944	unsigned int per_loop;
945	unsigned int i, j;
946	clock_t start, end;
947	float seconds;
948
949	#if 0
950	calib_loop = 1;
951	per_loop = 0;
952	start = clock();
953	while ((clock() - start) < CLOCKS_PER_SEC((__clock_t) 1000000) / 4)
954	{
955	calib_loop *= 2;
956	for (i = 0; i < calib_loop; i++)
957	dumb_memcpy(outb, inb, in_len);
958	per_loop += calib_loop;
959	}
960
961	fprintf(stderrstderr, "memcpy:\nCalibrated to %d iterations per loop\n",
962	per_loop);
963
964	start = clock();
965	for (i = 0; i < 10; i++)
966	for (j = 0; j < per_loop; j++)
967	dumb_memcpy(outb, inb, in_len);
968	end = clock();
969	seconds = (end - start) / (float) CLOCKS_PER_SEC((__clock_t) 1000000);
970	fprintf(stderrstderr, "%.2f seconds == %.2f MB/s\n", seconds,
971	((long long) in_len * per_loop * 10) / (1024 * 1024 * seconds));
972	#endif
973
974	calib_loop = 1;
975	per_loop = 0;
976	start = clock();
977	while ((clock() - start) < CLOCKS_PER_SEC((__clock_t) 1000000) / 4)
978	{
979	calib_loop *= 2;
980	for (i = 0; i < calib_loop; i++)
981	compress_buf(inb, in_len, outb, sizeof(outb));
982	per_loop += calib_loop;
983	}
984
985	fprintf(stderrstderr, "compression:\nCalibrated to %d iterations per loop\n",
986	per_loop);
987
988	start = clock();
989	for (i = 0; i < 10; i++)
990	for (j = 0; j < per_loop; j++)
991	compress_buf(inb, in_len, outb, sizeof(outb));
992	end = clock();
993	seconds = (end - start) / (float) CLOCKS_PER_SEC((__clock_t) 1000000);
994	fprintf(stderrstderr, "%.2f seconds == %.2f MB/s\n", seconds,
995	((long long) in_len * per_loop * 10) / (1024 * 1024 * seconds));
996
997	out_len = compress_buf(inb, in_len, outb, sizeof(outb));
998
999	calib_loop = 1;
1000	per_loop = 0;
1001	start = clock();
1002	while ((clock() - start) < CLOCKS_PER_SEC((__clock_t) 1000000) / 4)
1003	{
1004	calib_loop *= 2;
1005	for (i = 0; i < calib_loop; i++)
1006	unchecked_decompress_buf(outb, out_len, inb, sizeof(inb));
1007	per_loop += calib_loop;
1008	}
1009
1010	fprintf(stderrstderr, "decompression:\nCalibrated to %d iterations per loop\n",
1011	per_loop);
1012
1013	start = clock();
1014	for (i = 0; i < 10; i++)
1015	for (j = 0; j < per_loop; j++)
1016	unchecked_decompress_buf(outb, out_len, inb, sizeof(inb));
1017	end = clock();
1018	seconds = (end - start) / (float) CLOCKS_PER_SEC((__clock_t) 1000000);
1019	fprintf(stderrstderr, "%.2f seconds == %.2f MB/s\n", seconds,
1020	((long long) in_len * per_loop * 10) / (1024 * 1024 * seconds));
1021	}
1022
1023	int
1024	main(int argc, char *argv[])
1025	{
1026	int compress = 1;
1027	if (argc > 1 && !strcmp(argv[1], "-d"))
1028	compress = 0;
1029	if (argc > 1 && !strcmp(argv[1], "-b"))
1030	benchmark(stdinstdin);
1031	else
1032	transfer_file(stdinstdin, stdoutstdout, compress);
1033	return 0;
1034	}
1035
1036	#endif
1037