2 * Copyright (c) 1995-1998, Index Data.
3 * See the file LICENSE for details.
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.13 1998-03-18 09:23:55 adam
8 * Blocks are stored in chunks on free list - up to factor 2 in speed.
9 * Fixed bug that could occur in block category rearrangemen.
11 * Revision 1.12 1998/03/16 10:37:24 adam
12 * Added more statistics.
14 * Revision 1.11 1998/03/13 15:30:50 adam
15 * New functions isc_block_used and isc_block_size. Fixed 'leak'
18 * Revision 1.10 1998/03/11 11:18:18 adam
19 * Changed the isc_merge to take into account the mfill (minimum-fill).
21 * Revision 1.9 1998/03/06 13:54:02 adam
22 * Fixed two nasty bugs in isc_merge.
24 * Revision 1.8 1997/09/17 12:19:20 adam
25 * Zebra version corresponds to YAZ version 1.4.
26 * Changed Zebra server so that it doesn't depend on global common_resource.
28 * Revision 1.7 1997/02/12 20:42:43 adam
29 * Bug fix: during isc_merge operations, some pages weren't marked dirty
30 * even though they should be. At this point the merge operation marks
31 * a page dirty if the previous page changed at all. A better approach is
32 * to mark it dirty if the last key written changed in previous page.
34 * Revision 1.6 1996/11/08 11:15:29 adam
35 * Number of keys in chain are stored in first block and the function
36 * to retrieve this information, isc_pp_num is implemented.
38 * Revision 1.5 1996/11/04 14:08:57 adam
39 * Optimized free block usage.
41 * Revision 1.4 1996/11/01 13:36:46 adam
42 * New element, max_blocks_mem, that control how many blocks of max size
43 * to store in memory during isc_merge.
44 * Function isc_merge now ignores delete/update of identical keys and
45 * the proper blocks are then non-dirty and not written in flush_blocks.
47 * Revision 1.3 1996/11/01 08:59:14 adam
48 * First version of isc_merge that supports update/delete.
50 * Revision 1.2 1996/10/29 16:44:56 adam
53 * Revision 1.1 1996/10/29 13:40:48 adam
60 * Reduction to lower categories in isc_merge
70 static void flush_block (ISAMC is, int cat);
71 static void release_fc (ISAMC is, int cat);
72 static void init_fc (ISAMC is, int cat);
74 #define ISAMC_FREELIST_CHUNK 1
78 ISAMC_M isc_getmethod (void)
80 static struct ISAMC_filecat_s def_cat[] = {
88 { 2048, 1900, 1700, 7 },
89 { 8192, 8000, 7900, 7 },
90 { 32768, 32000, 31000, 7 },
91 {131072, 129000, 121000, 0 }
94 ISAMC_M m = xmalloc (sizeof(*m));
101 m->compare_item = NULL;
105 m->max_blocks_mem = 10;
111 ISAMC isc_open (BFiles bfs, const char *name, int writeflag, ISAMC_M method)
114 ISAMC_filecat filecat;
116 int max_buf_size = 0;
118 is = xmalloc (sizeof(*is));
120 is->method = xmalloc (sizeof(*is->method));
121 memcpy (is->method, method, sizeof(*method));
122 filecat = is->method->filecat;
125 /* determine number of block categories */
126 if (is->method->debug)
127 logf (LOG_LOG, "isc: bsize ifill mfill mblocks");
130 if (is->method->debug)
131 logf (LOG_LOG, "isc:%6d %6d %6d %6d",
132 filecat[i].bsize, filecat[i].ifill,
133 filecat[i].mfill, filecat[i].mblocks);
134 if (max_buf_size < filecat[i].mblocks * filecat[i].bsize)
135 max_buf_size = filecat[i].mblocks * filecat[i].bsize;
136 } while (filecat[i++].mblocks);
139 /* max_buf_size is the larget buffer to be used during merge */
140 max_buf_size = (1 + max_buf_size / filecat[i].bsize) * filecat[i].bsize;
141 if (max_buf_size < (1+is->method->max_blocks_mem) * filecat[i].bsize)
142 max_buf_size = (1+is->method->max_blocks_mem) * filecat[i].bsize;
143 if (is->method->debug)
144 logf (LOG_LOG, "isc: max_buf_size %d", max_buf_size);
146 assert (is->no_files > 0);
147 is->files = xmalloc (sizeof(*is->files)*is->no_files);
150 is->merge_buf = xmalloc (max_buf_size+256);
151 memset (is->merge_buf, 0, max_buf_size+256);
154 is->merge_buf = NULL;
155 for (i = 0; i<is->no_files; i++)
159 sprintf (fname, "%s%c", name, i+'A');
160 is->files[i].bf = bf_open (bfs, fname, is->method->filecat[i].bsize,
162 is->files[i].head_is_dirty = 0;
163 if (!bf_read (is->files[i].bf, 0, 0, sizeof(ISAMC_head),
166 is->files[i].head.lastblock = 1;
167 is->files[i].head.freelist = 0;
169 is->files[i].alloc_entries_num = 0;
170 is->files[i].alloc_entries_max =
171 is->method->filecat[i].bsize / sizeof(int) - 1;
172 is->files[i].alloc_buf = xmalloc (is->method->filecat[i].bsize);
173 is->files[i].no_writes = 0;
174 is->files[i].no_reads = 0;
175 is->files[i].no_skip_writes = 0;
176 is->files[i].no_allocated = 0;
177 is->files[i].no_released = 0;
178 is->files[i].no_remap = 0;
179 is->files[i].no_forward = 0;
180 is->files[i].no_backward = 0;
181 is->files[i].sum_forward = 0;
182 is->files[i].sum_backward = 0;
183 is->files[i].no_next = 0;
184 is->files[i].no_prev = 0;
191 int isc_block_used (ISAMC is, int type)
193 if (type < 0 || type >= is->no_files)
195 return is->files[type].head.lastblock-1;
198 int isc_block_size (ISAMC is, int type)
200 ISAMC_filecat filecat = is->method->filecat;
201 if (type < 0 || type >= is->no_files)
203 return filecat[type].bsize;
206 int isc_close (ISAMC is)
210 if (is->method->debug)
212 logf (LOG_LOG, "isc: next forw mid-f prev backw mid-b");
213 for (i = 0; i<is->no_files; i++)
214 logf (LOG_LOG, "isc:%8d%8d%8.1f%8d%8d%8.1f",
215 is->files[i].no_next,
216 is->files[i].no_forward,
217 is->files[i].no_forward ?
218 (double) is->files[i].sum_forward/is->files[i].no_forward
220 is->files[i].no_prev,
221 is->files[i].no_backward,
222 is->files[i].no_backward ?
223 (double) is->files[i].sum_backward/is->files[i].no_backward
226 if (is->method->debug)
227 logf (LOG_LOG, "isc: writes reads skipped alloc released remap");
228 for (i = 0; i<is->no_files; i++)
231 assert (is->files[i].bf);
232 if (is->files[i].head_is_dirty)
233 bf_write (is->files[i].bf, 0, 0, sizeof(ISAMC_head),
235 if (is->method->debug)
236 logf (LOG_LOG, "isc:%8d%8d%8d%8d%8d%8d",
237 is->files[i].no_writes,
238 is->files[i].no_reads,
239 is->files[i].no_skip_writes,
240 is->files[i].no_allocated,
241 is->files[i].no_released,
242 is->files[i].no_remap);
243 xfree (is->files[i].fc_list);
245 bf_close (is->files[i].bf);
248 xfree (is->merge_buf);
253 int isc_read_block (ISAMC is, int cat, int pos, char *dst)
255 ++(is->files[cat].no_reads);
256 return bf_read (is->files[cat].bf, pos, 0, 0, dst);
259 int isc_write_block (ISAMC is, int cat, int pos, char *src)
261 ++(is->files[cat].no_writes);
262 if (is->method->debug > 2)
263 logf (LOG_LOG, "isc: write_block %d %d", cat, pos);
264 return bf_write (is->files[cat].bf, pos, 0, 0, src);
267 int isc_write_dblock (ISAMC is, int cat, int pos, char *src,
268 int nextpos, int offset)
270 ISAMC_BLOCK_SIZE size = offset + ISAMC_BLOCK_OFFSET_N;
271 if (is->method->debug > 2)
272 logf (LOG_LOG, "isc: write_dblock. size=%d nextpos=%d",
273 (int) size, nextpos);
274 src -= ISAMC_BLOCK_OFFSET_N;
275 memcpy (src, &nextpos, sizeof(int));
276 memcpy (src + sizeof(int), &size, sizeof(size));
277 return isc_write_block (is, cat, pos, src);
280 #if ISAMC_FREELIST_CHUNK
281 static void flush_block (ISAMC is, int cat)
283 char *abuf = is->files[cat].alloc_buf;
284 int block = is->files[cat].head.freelist;
285 if (block && is->files[cat].alloc_entries_num)
287 memcpy (abuf, &is->files[cat].alloc_entries_num, sizeof(int));
288 bf_write (is->files[cat].bf, block, 0, 0, abuf);
289 is->files[cat].alloc_entries_num = 0;
294 static int alloc_block (ISAMC is, int cat)
296 int block = is->files[cat].head.freelist;
297 char *abuf = is->files[cat].alloc_buf;
299 (is->files[cat].no_allocated)++;
303 block = (is->files[cat].head.lastblock)++; /* no free list */
304 is->files[cat].head_is_dirty = 1;
308 if (!is->files[cat].alloc_entries_num) /* read first time */
310 bf_read (is->files[cat].bf, block, 0, 0, abuf);
311 memcpy (&is->files[cat].alloc_entries_num, abuf,
312 sizeof(is->files[cat].alloc_entries_num));
313 assert (is->files[cat].alloc_entries_num > 0);
315 /* have some free blocks now */
316 assert (is->files[cat].alloc_entries_num > 0);
317 is->files[cat].alloc_entries_num--;
318 if (!is->files[cat].alloc_entries_num) /* last one in block? */
320 memcpy (&is->files[cat].head.freelist, abuf + sizeof(int),
322 is->files[cat].head_is_dirty = 1;
324 if (is->files[cat].head.freelist)
326 bf_read (is->files[cat].bf, is->files[cat].head.freelist,
328 memcpy (&is->files[cat].alloc_entries_num, abuf,
329 sizeof(is->files[cat].alloc_entries_num));
330 assert (is->files[cat].alloc_entries_num);
334 memcpy (&block, abuf + sizeof(int) + sizeof(int) *
335 is->files[cat].alloc_entries_num, sizeof(int));
340 static void release_block (ISAMC is, int cat, int pos)
342 char *abuf = is->files[cat].alloc_buf;
343 int block = is->files[cat].head.freelist;
345 (is->files[cat].no_released)++;
347 if (block && !is->files[cat].alloc_entries_num) /* must read block */
349 bf_read (is->files[cat].bf, block, 0, 0, abuf);
350 memcpy (&is->files[cat].alloc_entries_num, abuf,
351 sizeof(is->files[cat].alloc_entries_num));
352 assert (is->files[cat].alloc_entries_num > 0);
354 assert (is->files[cat].alloc_entries_num <= is->files[cat].alloc_entries_max);
355 if (is->files[cat].alloc_entries_num == is->files[cat].alloc_entries_max)
358 memcpy (abuf, &is->files[cat].alloc_entries_num, sizeof(int));
359 bf_write (is->files[cat].bf, block, 0, 0, abuf);
360 is->files[cat].alloc_entries_num = 0;
362 if (!is->files[cat].alloc_entries_num) /* make new buffer? */
364 memcpy (abuf + sizeof(int), &block, sizeof(int));
365 is->files[cat].head.freelist = pos;
366 is->files[cat].head_is_dirty = 1;
370 memcpy (abuf + sizeof(int) +
371 is->files[cat].alloc_entries_num*sizeof(int),
374 is->files[cat].alloc_entries_num++;
377 static void flush_block (ISAMC is, int cat)
379 char *abuf = is->files[cat].alloc_buf;
383 static int alloc_block (ISAMC is, int cat)
386 char buf[sizeof(int)];
388 is->files[cat].head_is_dirty = 1;
389 (is->files[cat].no_allocated)++;
390 if ((block = is->files[cat].head.freelist))
392 bf_read (is->files[cat].bf, block, 0, sizeof(int), buf);
393 memcpy (&is->files[cat].head.freelist, buf, sizeof(int));
396 block = (is->files[cat].head.lastblock)++;
400 static void release_block (ISAMC is, int cat, int pos)
402 char buf[sizeof(int)];
404 (is->files[cat].no_released)++;
405 is->files[cat].head_is_dirty = 1;
406 memcpy (buf, &is->files[cat].head.freelist, sizeof(int));
407 is->files[cat].head.freelist = pos;
408 bf_write (is->files[cat].bf, pos, 0, sizeof(int), buf);
412 int isc_alloc_block (ISAMC is, int cat)
416 if (is->files[cat].fc_list)
419 for (j = 0; j < is->files[cat].fc_max; j++)
420 if ((nb = is->files[cat].fc_list[j]) && (!block || nb < block))
422 is->files[cat].fc_list[j] = 0;
428 block = alloc_block (is, cat);
429 if (is->method->debug > 3)
430 logf (LOG_LOG, "isc: alloc_block in cat %d: %d", cat, block);
434 void isc_release_block (ISAMC is, int cat, int pos)
436 if (is->method->debug > 3)
437 logf (LOG_LOG, "isc: release_block in cat %d: %d", cat, pos);
438 if (is->files[cat].fc_list)
441 for (j = 0; j<is->files[cat].fc_max; j++)
442 if (!is->files[cat].fc_list[j])
444 is->files[cat].fc_list[j] = pos;
448 release_block (is, cat, pos);
451 static void init_fc (ISAMC is, int cat)
455 is->files[cat].fc_max = j;
456 is->files[cat].fc_list = xmalloc (sizeof(*is->files[0].fc_list) * j);
458 is->files[cat].fc_list[j] = 0;
461 static void release_fc (ISAMC is, int cat)
463 int b, j = is->files[cat].fc_max;
466 if ((b = is->files[cat].fc_list[j]))
468 release_block (is, cat, b);
469 is->files[cat].fc_list[j] = 0;
473 void isc_pp_close (ISAMC_PP pp)
477 (*is->method->code_stop)(ISAMC_DECODE, pp->decodeClientData);
482 ISAMC_PP isc_pp_open (ISAMC is, ISAMC_P ipos)
484 ISAMC_PP pp = xmalloc (sizeof(*pp));
487 pp->cat = isc_type(ipos);
488 pp->pos = isc_block(ipos);
490 src = pp->buf = xmalloc (is->method->filecat[pp->cat].bsize);
496 pp->decodeClientData = (*is->method->code_start)(ISAMC_DECODE);
503 isc_read_block (is, pp->cat, pp->pos, src);
504 memcpy (&pp->next, src, sizeof(pp->next));
505 src += sizeof(pp->next);
506 memcpy (&pp->size, src, sizeof(pp->size));
507 src += sizeof(pp->size);
508 memcpy (&pp->numKeys, src, sizeof(pp->numKeys));
509 src += sizeof(pp->numKeys);
510 assert (pp->next != pp->pos);
511 pp->offset = src - pp->buf;
512 assert (pp->offset == ISAMC_BLOCK_OFFSET_1);
513 if (is->method->debug > 2)
514 logf (LOG_LOG, "isc: read_block size=%d %d %d next=%d",
515 pp->size, pp->cat, pp->pos, pp->next);
520 /* returns non-zero if item could be read; 0 otherwise */
521 int isc_pp_read (ISAMC_PP pp, void *buf)
523 return isc_read_item (pp, (char **) &buf);
526 /* read one item from file - decode and store it in *dst.
529 1 if item could be read ok and NO boundary
530 2 if item could be read ok and boundary */
531 int isc_read_item (ISAMC_PP pp, char **dst)
534 char *src = pp->buf + pp->offset;
536 if (pp->offset >= pp->size)
541 return 0; /* end of file */
543 if (pp->next > pp->pos)
545 if (pp->next == pp->pos + 1)
546 is->files[pp->cat].no_next++;
549 is->files[pp->cat].no_forward++;
550 is->files[pp->cat].sum_forward += pp->next - pp->pos;
555 if (pp->next + 1 == pp->pos)
556 is->files[pp->cat].no_prev++;
559 is->files[pp->cat].no_backward++;
560 is->files[pp->cat].sum_backward += pp->pos - pp->next;
563 /* out new block position */
566 /* read block and save 'next' and 'size' entry */
567 isc_read_block (is, pp->cat, pp->pos, src);
568 memcpy (&pp->next, src, sizeof(pp->next));
569 src += sizeof(pp->next);
570 memcpy (&pp->size, src, sizeof(pp->size));
571 src += sizeof(pp->size);
572 /* assume block is non-empty */
573 assert (src - pp->buf == ISAMC_BLOCK_OFFSET_N);
574 assert (pp->next != pp->pos);
576 isc_release_block (is, pp->cat, pp->pos);
577 (*is->method->code_item)(ISAMC_DECODE, pp->decodeClientData, dst, &src);
578 pp->offset = src - pp->buf;
579 if (is->method->debug > 2)
580 logf (LOG_LOG, "isc: read_block size=%d %d %d next=%d",
581 pp->size, pp->cat, pp->pos, pp->next);
584 (*is->method->code_item)(ISAMC_DECODE, pp->decodeClientData, dst, &src);
585 pp->offset = src - pp->buf;
589 int isc_pp_num (ISAMC_PP pp)