2 * Copyright (c) 1995-1998, Index Data.
3 * See the file LICENSE for details.
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.21 2002-06-19 10:29:18 adam
8 * align block sizes for isam sys. Better plot for test
10 * Revision 1.20 1999/11/30 13:48:04 adam
11 * Improved installation. Updated for inclusion of YAZ header files.
13 * Revision 1.19 1999/07/14 10:59:27 adam
14 * Changed functions isc_getmethod, isams_getmethod.
15 * Improved fatal error handling (such as missing EXPLAIN schema).
17 * Revision 1.18 1999/06/30 09:08:23 adam
18 * Added coder to reset.
20 * Revision 1.17 1999/05/26 07:49:14 adam
23 * Revision 1.16 1998/05/27 14:32:03 adam
24 * Changed default block category layout.
26 * Revision 1.15 1998/05/20 10:12:25 adam
27 * Implemented automatic EXPLAIN database maintenance.
28 * Modified Zebra to work with ASN.1 compiled version of YAZ.
30 * Revision 1.14 1998/03/19 10:04:35 adam
33 * Revision 1.13 1998/03/18 09:23:55 adam
34 * Blocks are stored in chunks on free list - up to factor 2 in speed.
35 * Fixed bug that could occur in block category rearrangemen.
37 * Revision 1.12 1998/03/16 10:37:24 adam
38 * Added more statistics.
40 * Revision 1.11 1998/03/13 15:30:50 adam
41 * New functions isc_block_used and isc_block_size. Fixed 'leak'
44 * Revision 1.10 1998/03/11 11:18:18 adam
45 * Changed the isc_merge to take into account the mfill (minimum-fill).
47 * Revision 1.9 1998/03/06 13:54:02 adam
48 * Fixed two nasty bugs in isc_merge.
50 * Revision 1.8 1997/09/17 12:19:20 adam
51 * Zebra version corresponds to YAZ version 1.4.
52 * Changed Zebra server so that it doesn't depend on global common_resource.
54 * Revision 1.7 1997/02/12 20:42:43 adam
55 * Bug fix: during isc_merge operations, some pages weren't marked dirty
56 * even though they should be. At this point the merge operation marks
57 * a page dirty if the previous page changed at all. A better approach is
58 * to mark it dirty if the last key written changed in previous page.
60 * Revision 1.6 1996/11/08 11:15:29 adam
61 * Number of keys in chain are stored in first block and the function
62 * to retrieve this information, isc_pp_num is implemented.
64 * Revision 1.5 1996/11/04 14:08:57 adam
65 * Optimized free block usage.
67 * Revision 1.4 1996/11/01 13:36:46 adam
68 * New element, max_blocks_mem, that control how many blocks of max size
69 * to store in memory during isc_merge.
70 * Function isc_merge now ignores delete/update of identical keys and
71 * the proper blocks are then non-dirty and not written in flush_blocks.
73 * Revision 1.3 1996/11/01 08:59:14 adam
74 * First version of isc_merge that supports update/delete.
76 * Revision 1.2 1996/10/29 16:44:56 adam
79 * Revision 1.1 1996/10/29 13:40:48 adam
86 * Reduction to lower categories in isc_merge
96 static void flush_block (ISAMC is, int cat);
97 static void release_fc (ISAMC is, int cat);
98 static void init_fc (ISAMC is, int cat);
100 #define ISAMC_FREELIST_CHUNK 1
104 void isc_getmethod (ISAMC_M m)
107 static struct ISAMC_filecat_s def_cat[] = {
113 { 128, 120, 100, 10 },
114 { 512, 490, 350, 10 },
115 { 2048, 1900, 1700, 10 },
116 { 8192, 8000, 7900, 10 },
117 { 32768, 32000, 31000, 0 },
120 m->filecat = def_cat;
122 m->code_start = NULL;
125 m->code_reset = NULL;
127 m->compare_item = NULL;
131 m->max_blocks_mem = 10;
134 ISAMC isc_open (BFiles bfs, const char *name, int writeflag, ISAMC_M method)
137 ISAMC_filecat filecat;
139 int max_buf_size = 0;
141 is = (ISAMC) xmalloc (sizeof(*is));
143 is->method = (ISAMC_M) xmalloc (sizeof(*is->method));
144 memcpy (is->method, method, sizeof(*method));
145 filecat = is->method->filecat;
148 /* determine number of block categories */
149 if (is->method->debug)
150 logf (LOG_LOG, "isc: bsize ifill mfill mblocks");
153 if (is->method->debug)
154 logf (LOG_LOG, "isc:%6d %6d %6d %6d",
155 filecat[i].bsize, filecat[i].ifill,
156 filecat[i].mfill, filecat[i].mblocks);
157 if (max_buf_size < filecat[i].mblocks * filecat[i].bsize)
158 max_buf_size = filecat[i].mblocks * filecat[i].bsize;
159 } while (filecat[i++].mblocks);
162 /* max_buf_size is the larget buffer to be used during merge */
163 max_buf_size = (1 + max_buf_size / filecat[i].bsize) * filecat[i].bsize;
164 if (max_buf_size < (1+is->method->max_blocks_mem) * filecat[i].bsize)
165 max_buf_size = (1+is->method->max_blocks_mem) * filecat[i].bsize;
166 if (is->method->debug)
167 logf (LOG_LOG, "isc: max_buf_size %d", max_buf_size);
169 assert (is->no_files > 0);
170 is->files = (ISAMC_file) xmalloc (sizeof(*is->files)*is->no_files);
173 is->merge_buf = (char *) xmalloc (max_buf_size+256);
174 memset (is->merge_buf, 0, max_buf_size+256);
177 is->merge_buf = NULL;
178 for (i = 0; i<is->no_files; i++)
182 sprintf (fname, "%s%c", name, i+'A');
183 is->files[i].bf = bf_open (bfs, fname, is->method->filecat[i].bsize,
185 is->files[i].head_is_dirty = 0;
186 if (!bf_read (is->files[i].bf, 0, 0, sizeof(ISAMC_head),
189 is->files[i].head.lastblock = 1;
190 is->files[i].head.freelist = 0;
192 is->files[i].alloc_entries_num = 0;
193 is->files[i].alloc_entries_max =
194 is->method->filecat[i].bsize / sizeof(int) - 1;
195 is->files[i].alloc_buf = (char *)
196 xmalloc (is->method->filecat[i].bsize);
197 is->files[i].no_writes = 0;
198 is->files[i].no_reads = 0;
199 is->files[i].no_skip_writes = 0;
200 is->files[i].no_allocated = 0;
201 is->files[i].no_released = 0;
202 is->files[i].no_remap = 0;
203 is->files[i].no_forward = 0;
204 is->files[i].no_backward = 0;
205 is->files[i].sum_forward = 0;
206 is->files[i].sum_backward = 0;
207 is->files[i].no_next = 0;
208 is->files[i].no_prev = 0;
215 int isc_block_used (ISAMC is, int type)
217 if (type < 0 || type >= is->no_files)
219 return is->files[type].head.lastblock-1;
222 int isc_block_size (ISAMC is, int type)
224 ISAMC_filecat filecat = is->method->filecat;
225 if (type < 0 || type >= is->no_files)
227 return filecat[type].bsize;
230 int isc_close (ISAMC is)
234 if (is->method->debug)
236 logf (LOG_LOG, "isc: next forw mid-f prev backw mid-b");
237 for (i = 0; i<is->no_files; i++)
238 logf (LOG_LOG, "isc:%8d%8d%8.1f%8d%8d%8.1f",
239 is->files[i].no_next,
240 is->files[i].no_forward,
241 is->files[i].no_forward ?
242 (double) is->files[i].sum_forward/is->files[i].no_forward
244 is->files[i].no_prev,
245 is->files[i].no_backward,
246 is->files[i].no_backward ?
247 (double) is->files[i].sum_backward/is->files[i].no_backward
250 if (is->method->debug)
251 logf (LOG_LOG, "isc: writes reads skipped alloc released remap");
252 for (i = 0; i<is->no_files; i++)
255 assert (is->files[i].bf);
256 if (is->files[i].head_is_dirty)
257 bf_write (is->files[i].bf, 0, 0, sizeof(ISAMC_head),
259 if (is->method->debug)
260 logf (LOG_LOG, "isc:%8d%8d%8d%8d%8d%8d",
261 is->files[i].no_writes,
262 is->files[i].no_reads,
263 is->files[i].no_skip_writes,
264 is->files[i].no_allocated,
265 is->files[i].no_released,
266 is->files[i].no_remap);
267 xfree (is->files[i].fc_list);
269 bf_close (is->files[i].bf);
272 xfree (is->merge_buf);
278 int isc_read_block (ISAMC is, int cat, int pos, char *dst)
280 ++(is->files[cat].no_reads);
281 return bf_read (is->files[cat].bf, pos, 0, 0, dst);
284 int isc_write_block (ISAMC is, int cat, int pos, char *src)
286 ++(is->files[cat].no_writes);
287 if (is->method->debug > 2)
288 logf (LOG_LOG, "isc: write_block %d %d", cat, pos);
289 return bf_write (is->files[cat].bf, pos, 0, 0, src);
292 int isc_write_dblock (ISAMC is, int cat, int pos, char *src,
293 int nextpos, int offset)
295 ISAMC_BLOCK_SIZE size = offset + ISAMC_BLOCK_OFFSET_N;
296 if (is->method->debug > 2)
297 logf (LOG_LOG, "isc: write_dblock. size=%d nextpos=%d",
298 (int) size, nextpos);
299 src -= ISAMC_BLOCK_OFFSET_N;
300 memcpy (src, &nextpos, sizeof(int));
301 memcpy (src + sizeof(int), &size, sizeof(size));
302 return isc_write_block (is, cat, pos, src);
305 #if ISAMC_FREELIST_CHUNK
306 static void flush_block (ISAMC is, int cat)
308 char *abuf = is->files[cat].alloc_buf;
309 int block = is->files[cat].head.freelist;
310 if (block && is->files[cat].alloc_entries_num)
312 memcpy (abuf, &is->files[cat].alloc_entries_num, sizeof(int));
313 bf_write (is->files[cat].bf, block, 0, 0, abuf);
314 is->files[cat].alloc_entries_num = 0;
319 static int alloc_block (ISAMC is, int cat)
321 int block = is->files[cat].head.freelist;
322 char *abuf = is->files[cat].alloc_buf;
324 (is->files[cat].no_allocated)++;
328 block = (is->files[cat].head.lastblock)++; /* no free list */
329 is->files[cat].head_is_dirty = 1;
333 if (!is->files[cat].alloc_entries_num) /* read first time */
335 bf_read (is->files[cat].bf, block, 0, 0, abuf);
336 memcpy (&is->files[cat].alloc_entries_num, abuf,
337 sizeof(is->files[cat].alloc_entries_num));
338 assert (is->files[cat].alloc_entries_num > 0);
340 /* have some free blocks now */
341 assert (is->files[cat].alloc_entries_num > 0);
342 is->files[cat].alloc_entries_num--;
343 if (!is->files[cat].alloc_entries_num) /* last one in block? */
345 memcpy (&is->files[cat].head.freelist, abuf + sizeof(int),
347 is->files[cat].head_is_dirty = 1;
349 if (is->files[cat].head.freelist)
351 bf_read (is->files[cat].bf, is->files[cat].head.freelist,
353 memcpy (&is->files[cat].alloc_entries_num, abuf,
354 sizeof(is->files[cat].alloc_entries_num));
355 assert (is->files[cat].alloc_entries_num);
359 memcpy (&block, abuf + sizeof(int) + sizeof(int) *
360 is->files[cat].alloc_entries_num, sizeof(int));
365 static void release_block (ISAMC is, int cat, int pos)
367 char *abuf = is->files[cat].alloc_buf;
368 int block = is->files[cat].head.freelist;
370 (is->files[cat].no_released)++;
372 if (block && !is->files[cat].alloc_entries_num) /* must read block */
374 bf_read (is->files[cat].bf, block, 0, 0, abuf);
375 memcpy (&is->files[cat].alloc_entries_num, abuf,
376 sizeof(is->files[cat].alloc_entries_num));
377 assert (is->files[cat].alloc_entries_num > 0);
379 assert (is->files[cat].alloc_entries_num <= is->files[cat].alloc_entries_max);
380 if (is->files[cat].alloc_entries_num == is->files[cat].alloc_entries_max)
383 memcpy (abuf, &is->files[cat].alloc_entries_num, sizeof(int));
384 bf_write (is->files[cat].bf, block, 0, 0, abuf);
385 is->files[cat].alloc_entries_num = 0;
387 if (!is->files[cat].alloc_entries_num) /* make new buffer? */
389 memcpy (abuf + sizeof(int), &block, sizeof(int));
390 is->files[cat].head.freelist = pos;
391 is->files[cat].head_is_dirty = 1;
395 memcpy (abuf + sizeof(int) +
396 is->files[cat].alloc_entries_num*sizeof(int),
399 is->files[cat].alloc_entries_num++;
402 static void flush_block (ISAMC is, int cat)
404 char *abuf = is->files[cat].alloc_buf;
408 static int alloc_block (ISAMC is, int cat)
411 char buf[sizeof(int)];
413 is->files[cat].head_is_dirty = 1;
414 (is->files[cat].no_allocated)++;
415 if ((block = is->files[cat].head.freelist))
417 bf_read (is->files[cat].bf, block, 0, sizeof(int), buf);
418 memcpy (&is->files[cat].head.freelist, buf, sizeof(int));
421 block = (is->files[cat].head.lastblock)++;
425 static void release_block (ISAMC is, int cat, int pos)
427 char buf[sizeof(int)];
429 (is->files[cat].no_released)++;
430 is->files[cat].head_is_dirty = 1;
431 memcpy (buf, &is->files[cat].head.freelist, sizeof(int));
432 is->files[cat].head.freelist = pos;
433 bf_write (is->files[cat].bf, pos, 0, sizeof(int), buf);
437 int isc_alloc_block (ISAMC is, int cat)
441 if (is->files[cat].fc_list)
444 for (j = 0; j < is->files[cat].fc_max; j++)
445 if ((nb = is->files[cat].fc_list[j]) && (!block || nb < block))
447 is->files[cat].fc_list[j] = 0;
453 block = alloc_block (is, cat);
454 if (is->method->debug > 3)
455 logf (LOG_LOG, "isc: alloc_block in cat %d: %d", cat, block);
459 void isc_release_block (ISAMC is, int cat, int pos)
461 if (is->method->debug > 3)
462 logf (LOG_LOG, "isc: release_block in cat %d: %d", cat, pos);
463 if (is->files[cat].fc_list)
466 for (j = 0; j<is->files[cat].fc_max; j++)
467 if (!is->files[cat].fc_list[j])
469 is->files[cat].fc_list[j] = pos;
473 release_block (is, cat, pos);
476 static void init_fc (ISAMC is, int cat)
480 is->files[cat].fc_max = j;
481 is->files[cat].fc_list = (int *)
482 xmalloc (sizeof(*is->files[0].fc_list) * j);
484 is->files[cat].fc_list[j] = 0;
487 static void release_fc (ISAMC is, int cat)
489 int b, j = is->files[cat].fc_max;
492 if ((b = is->files[cat].fc_list[j]))
494 release_block (is, cat, b);
495 is->files[cat].fc_list[j] = 0;
499 void isc_pp_close (ISAMC_PP pp)
503 (*is->method->code_stop)(ISAMC_DECODE, pp->decodeClientData);
508 ISAMC_PP isc_pp_open (ISAMC is, ISAMC_P ipos)
510 ISAMC_PP pp = (ISAMC_PP) xmalloc (sizeof(*pp));
513 pp->cat = isc_type(ipos);
514 pp->pos = isc_block(ipos);
516 src = pp->buf = (char *) xmalloc (is->method->filecat[pp->cat].bsize);
522 pp->decodeClientData = (*is->method->code_start)(ISAMC_DECODE);
529 isc_read_block (is, pp->cat, pp->pos, src);
530 memcpy (&pp->next, src, sizeof(pp->next));
531 src += sizeof(pp->next);
532 memcpy (&pp->size, src, sizeof(pp->size));
533 src += sizeof(pp->size);
534 memcpy (&pp->numKeys, src, sizeof(pp->numKeys));
535 src += sizeof(pp->numKeys);
536 assert (pp->next != pp->pos);
537 pp->offset = src - pp->buf;
538 assert (pp->offset == ISAMC_BLOCK_OFFSET_1);
539 if (is->method->debug > 2)
540 logf (LOG_LOG, "isc: read_block size=%d %d %d next=%d",
541 pp->size, pp->cat, pp->pos, pp->next);
546 /* returns non-zero if item could be read; 0 otherwise */
547 int isc_pp_read (ISAMC_PP pp, void *buf)
549 return isc_read_item (pp, (char **) &buf);
552 /* read one item from file - decode and store it in *dst.
555 1 if item could be read ok and NO boundary
556 2 if item could be read ok and boundary */
557 int isc_read_item (ISAMC_PP pp, char **dst)
560 char *src = pp->buf + pp->offset;
562 if (pp->offset >= pp->size)
567 return 0; /* end of file */
569 if (pp->next > pp->pos)
571 if (pp->next == pp->pos + 1)
572 is->files[pp->cat].no_next++;
575 is->files[pp->cat].no_forward++;
576 is->files[pp->cat].sum_forward += pp->next - pp->pos;
581 if (pp->next + 1 == pp->pos)
582 is->files[pp->cat].no_prev++;
585 is->files[pp->cat].no_backward++;
586 is->files[pp->cat].sum_backward += pp->pos - pp->next;
589 /* out new block position */
592 /* read block and save 'next' and 'size' entry */
593 isc_read_block (is, pp->cat, pp->pos, src);
594 memcpy (&pp->next, src, sizeof(pp->next));
595 src += sizeof(pp->next);
596 memcpy (&pp->size, src, sizeof(pp->size));
597 src += sizeof(pp->size);
598 /* assume block is non-empty */
599 assert (src - pp->buf == ISAMC_BLOCK_OFFSET_N);
600 assert (pp->next != pp->pos);
602 isc_release_block (is, pp->cat, pp->pos);
603 (*is->method->code_item)(ISAMC_DECODE, pp->decodeClientData, dst, &src);
604 pp->offset = src - pp->buf;
605 if (is->method->debug > 2)
606 logf (LOG_LOG, "isc: read_block size=%d %d %d next=%d",
607 pp->size, pp->cat, pp->pos, pp->next);
610 (*is->method->code_item)(ISAMC_DECODE, pp->decodeClientData, dst, &src);
611 pp->offset = src - pp->buf;
615 int isc_pp_num (ISAMC_PP pp)