2 * Copyright (c) 1995-1998, Index Data.
3 * See the file LICENSE for details.
6 * Isamh - append-only isam
10 * implement direct address bit
26 #include "../index/index.h" /* for dump */
28 static void flush_block (ISAMH is, int cat);
29 static void release_fc (ISAMH is, int cat);
30 static void init_fc (ISAMH is, int cat);
32 #define ISAMH_FREELIST_CHUNK 1
36 ISAMH_M isamh_getmethod (void)
38 static struct ISAMH_filecat_s def_cat[] = {
40 /* blocksz, max keys before switching size */
58 /* assume about 2 bytes per pointer, when compressed. The head uses */
59 /* 16 bytes, and other blocks use 8 for header info... If you want 3 */
60 /* blocks of 32 bytes, say max 16+24+24 = 64 keys */
63 ISAMH_M m = (ISAMH_M) xmalloc (sizeof(*m));
71 m->compare_item = NULL;
75 m->max_blocks_mem = 10;
82 ISAMH isamh_open (BFiles bfs, const char *name, int writeflag, ISAMH_M method)
85 ISAMH_filecat filecat;
89 is = (ISAMH) xmalloc (sizeof(*is));
91 is->method = (ISAMH_M) xmalloc (sizeof(*is->method));
92 memcpy (is->method, method, sizeof(*method));
93 filecat = is->method->filecat;
96 /* determine number of block categories */
97 if (is->method->debug)
98 logf (LOG_LOG, "isc: bsize maxkeys");
101 if (is->method->debug)
102 logf (LOG_LOG, "isc:%6d %6d",
103 filecat[i].bsize, filecat[i].mblocks);
104 if (max_buf_size < filecat[i].bsize)
105 max_buf_size = filecat[i].bsize;
106 } while (filecat[i++].mblocks);
110 /* max_buf_size is the larget buffer to be used during merge */
111 max_buf_size = (1 + max_buf_size / filecat[i].bsize) * filecat[i].bsize;
112 if (max_buf_size < (1+is->method->max_blocks_mem) * filecat[i].bsize)
113 max_buf_size = (1+is->method->max_blocks_mem) * filecat[i].bsize;
116 if (is->method->debug)
117 logf (LOG_LOG, "isc: max_buf_size %d", max_buf_size);
119 assert (is->no_files > 0);
120 is->files = (ISAMH_file) xmalloc (sizeof(*is->files)*is->no_files);
124 is->merge_buf = (char *) xmalloc (max_buf_size+256);
125 memset (is->merge_buf, 0, max_buf_size+256);
127 is->startblock = (char *) xmalloc (max_buf_size+256);
128 memset (is->startblock, 0, max_buf_size+256);
129 is->lastblock = (char *) xmalloc (max_buf_size+256);
130 memset (is->lastblock, 0, max_buf_size+256);
131 /* The spare 256 bytes should not be needed! */
135 is->startblock = is->lastblock = NULL;
137 for (i = 0; i<is->no_files; i++)
141 sprintf (fname, "%s%c", name, i+'A');
142 is->files[i].bf = bf_open (bfs, fname, is->method->filecat[i].bsize,
144 is->files[i].head_is_dirty = 0;
145 if (!bf_read (is->files[i].bf, 0, 0, sizeof(ISAMH_head),
148 is->files[i].head.lastblock = 1;
149 is->files[i].head.freelist = 0;
151 is->files[i].alloc_entries_num = 0;
152 is->files[i].alloc_entries_max =
153 is->method->filecat[i].bsize / sizeof(int) - 1;
154 is->files[i].alloc_buf = (char *)
155 xmalloc (is->method->filecat[i].bsize);
156 is->files[i].no_writes = 0;
157 is->files[i].no_reads = 0;
158 is->files[i].no_skip_writes = 0;
159 is->files[i].no_allocated = 0;
160 is->files[i].no_released = 0;
161 is->files[i].no_remap = 0;
162 is->files[i].no_forward = 0;
163 is->files[i].no_backward = 0;
164 is->files[i].sum_forward = 0;
165 is->files[i].sum_backward = 0;
166 is->files[i].no_next = 0;
167 is->files[i].no_prev = 0;
174 int isamh_block_used (ISAMH is, int type)
176 if (type < 0 || type >= is->no_files)
178 return is->files[type].head.lastblock-1;
181 int isamh_block_size (ISAMH is, int type)
183 ISAMH_filecat filecat = is->method->filecat;
184 if (type < 0 || type >= is->no_files)
186 return filecat[type].bsize;
189 int isamh_close (ISAMH is)
193 if (is->method->debug)
195 logf (LOG_LOG, "isc: next forw mid-f prev backw mid-b");
196 for (i = 0; i<is->no_files; i++)
197 logf (LOG_LOG, "isc:%8d%8d%8.1f%8d%8d%8.1f",
198 is->files[i].no_next,
199 is->files[i].no_forward,
200 is->files[i].no_forward ?
201 (double) is->files[i].sum_forward/is->files[i].no_forward
203 is->files[i].no_prev,
204 is->files[i].no_backward,
205 is->files[i].no_backward ?
206 (double) is->files[i].sum_backward/is->files[i].no_backward
209 if (is->method->debug)
210 logf (LOG_LOG, "isc: writes reads skipped alloc released remap");
211 for (i = 0; i<is->no_files; i++)
214 assert (is->files[i].bf);
215 if (is->files[i].head_is_dirty)
216 bf_write (is->files[i].bf, 0, 0, sizeof(ISAMH_head),
218 if (is->method->debug)
219 logf (LOG_LOG, "isc:%8d%8d%8d%8d%8d%8d",
220 is->files[i].no_writes,
221 is->files[i].no_reads,
222 is->files[i].no_skip_writes,
223 is->files[i].no_allocated,
224 is->files[i].no_released,
225 is->files[i].no_remap);
226 xfree (is->files[i].fc_list);
228 bf_close (is->files[i].bf);
231 xfree (is->startblock);
232 xfree (is->lastblock);
238 int isamh_read_block (ISAMH is, int cat, int pos, char *dst)
240 ++(is->files[cat].no_reads);
241 return bf_read (is->files[cat].bf, pos, 0, 0, dst);
244 int isamh_write_block (ISAMH is, int cat, int pos, char *src)
246 ++(is->files[cat].no_writes);
247 if (is->method->debug > 2)
248 logf (LOG_LOG, "isc: write_block %d %d", cat, pos);
249 return bf_write (is->files[cat].bf, pos, 0, 0, src);
252 int isamh_write_dblock (ISAMH is, int cat, int pos, char *src,
253 int nextpos, int offset)
255 ISAMH_BLOCK_SIZE size = offset + ISAMH_BLOCK_OFFSET_N;
256 if (is->method->debug > 2)
257 logf (LOG_LOG, "isc: write_dblock. size=%d nextpos=%d",
258 (int) size, nextpos);
259 src -= ISAMH_BLOCK_OFFSET_N;
260 memcpy (src, &nextpos, sizeof(int));
261 memcpy (src + sizeof(int), &size, sizeof(size));
262 return isamh_write_block (is, cat, pos, src);
265 #if ISAMH_FREELIST_CHUNK
266 static void flush_block (ISAMH is, int cat)
268 char *abuf = is->files[cat].alloc_buf;
269 int block = is->files[cat].head.freelist;
270 if (block && is->files[cat].alloc_entries_num)
272 memcpy (abuf, &is->files[cat].alloc_entries_num, sizeof(int));
273 bf_write (is->files[cat].bf, block, 0, 0, abuf);
274 is->files[cat].alloc_entries_num = 0;
279 static int alloc_block (ISAMH is, int cat)
281 int block = is->files[cat].head.freelist;
282 char *abuf = is->files[cat].alloc_buf;
284 (is->files[cat].no_allocated)++;
288 block = (is->files[cat].head.lastblock)++; /* no free list */
289 is->files[cat].head_is_dirty = 1;
293 if (!is->files[cat].alloc_entries_num) /* read first time */
295 bf_read (is->files[cat].bf, block, 0, 0, abuf);
296 memcpy (&is->files[cat].alloc_entries_num, abuf,
297 sizeof(is->files[cat].alloc_entries_num));
298 assert (is->files[cat].alloc_entries_num > 0);
300 /* have some free blocks now */
301 assert (is->files[cat].alloc_entries_num > 0);
302 is->files[cat].alloc_entries_num--;
303 if (!is->files[cat].alloc_entries_num) /* last one in block? */
305 memcpy (&is->files[cat].head.freelist, abuf + sizeof(int),
307 is->files[cat].head_is_dirty = 1;
309 if (is->files[cat].head.freelist)
311 bf_read (is->files[cat].bf, is->files[cat].head.freelist,
313 memcpy (&is->files[cat].alloc_entries_num, abuf,
314 sizeof(is->files[cat].alloc_entries_num));
315 assert (is->files[cat].alloc_entries_num);
319 memcpy (&block, abuf + sizeof(int) + sizeof(int) *
320 is->files[cat].alloc_entries_num, sizeof(int));
325 static void release_block (ISAMH is, int cat, int pos)
327 char *abuf = is->files[cat].alloc_buf;
328 int block = is->files[cat].head.freelist;
330 (is->files[cat].no_released)++;
332 if (block && !is->files[cat].alloc_entries_num) /* must read block */
334 bf_read (is->files[cat].bf, block, 0, 0, abuf);
335 memcpy (&is->files[cat].alloc_entries_num, abuf,
336 sizeof(is->files[cat].alloc_entries_num));
337 assert (is->files[cat].alloc_entries_num > 0);
339 assert (is->files[cat].alloc_entries_num <= is->files[cat].alloc_entries_max);
340 if (is->files[cat].alloc_entries_num == is->files[cat].alloc_entries_max)
343 memcpy (abuf, &is->files[cat].alloc_entries_num, sizeof(int));
344 bf_write (is->files[cat].bf, block, 0, 0, abuf);
345 is->files[cat].alloc_entries_num = 0;
347 if (!is->files[cat].alloc_entries_num) /* make new buffer? */
349 memcpy (abuf + sizeof(int), &block, sizeof(int));
350 is->files[cat].head.freelist = pos;
351 is->files[cat].head_is_dirty = 1;
355 memcpy (abuf + sizeof(int) +
356 is->files[cat].alloc_entries_num*sizeof(int),
359 is->files[cat].alloc_entries_num++;
362 static void flush_block (ISAMH is, int cat)
364 char *abuf = is->files[cat].alloc_buf;
368 static int alloc_block (ISAMH is, int cat)
371 char buf[sizeof(int)];
373 is->files[cat].head_is_dirty = 1;
374 (is->files[cat].no_allocated)++;
375 if ((block = is->files[cat].head.freelist))
377 bf_read (is->files[cat].bf, block, 0, sizeof(int), buf);
378 memcpy (&is->files[cat].head.freelist, buf, sizeof(int));
381 block = (is->files[cat].head.lastblock)++;
385 static void release_block (ISAMH is, int cat, int pos)
387 char buf[sizeof(int)];
389 (is->files[cat].no_released)++;
390 is->files[cat].head_is_dirty = 1;
391 memcpy (buf, &is->files[cat].head.freelist, sizeof(int));
392 is->files[cat].head.freelist = pos;
393 bf_write (is->files[cat].bf, pos, 0, sizeof(int), buf);
397 int isamh_alloc_block (ISAMH is, int cat)
401 if (is->files[cat].fc_list)
404 for (j = 0; j < is->files[cat].fc_max; j++)
405 if ((nb = is->files[cat].fc_list[j]) && (!block || nb < block))
407 is->files[cat].fc_list[j] = 0;
413 block = alloc_block (is, cat);
414 if (is->method->debug > 3)
415 logf (LOG_LOG, "isc: alloc_block in cat %d: %d", cat, block);
419 void isamh_release_block (ISAMH is, int cat, int pos)
421 if (is->method->debug > 3)
422 logf (LOG_LOG, "isc: release_block in cat %d: %d", cat, pos);
423 if (is->files[cat].fc_list)
426 for (j = 0; j<is->files[cat].fc_max; j++)
427 if (!is->files[cat].fc_list[j])
429 is->files[cat].fc_list[j] = pos;
433 release_block (is, cat, pos);
436 static void init_fc (ISAMH is, int cat)
440 is->files[cat].fc_max = j;
441 is->files[cat].fc_list = (int *)
442 xmalloc (sizeof(*is->files[0].fc_list) * j);
444 is->files[cat].fc_list[j] = 0;
447 static void release_fc (ISAMH is, int cat)
449 int b, j = is->files[cat].fc_max;
452 if ((b = is->files[cat].fc_list[j]))
454 release_block (is, cat, b);
455 is->files[cat].fc_list[j] = 0;
459 void isamh_pp_close (ISAMH_PP pp)
463 (*is->method->code_stop)(ISAMH_DECODE, pp->decodeClientData);
468 ISAMH_PP isamh_pp_open (ISAMH is, ISAMH_P ipos)
470 ISAMH_PP pp = (ISAMH_PP) xmalloc (sizeof(*pp));
473 pp->cat = isamh_type(ipos);
474 pp->pos = isamh_block(ipos);
476 src = pp->buf = (char *) xmalloc (is->method->filecat[pp->cat].bsize);
482 pp->decodeClientData = (*is->method->code_start)(ISAMH_DECODE);
490 isamh_read_block (is, pp->cat, pp->pos, src);
491 memcpy (&pp->next, src, sizeof(pp->next));
492 src += sizeof(pp->next);
493 memcpy (&pp->size, src, sizeof(pp->size));
494 src += sizeof(pp->size);
495 memcpy (&pp->numKeys, src, sizeof(pp->numKeys));
496 src += sizeof(pp->numKeys);
497 memcpy (&pp->lastblock, src, sizeof(pp->lastblock));
498 src += sizeof(pp->lastblock);
499 assert (pp->next != pp->pos);
500 pp->offset = src - pp->buf;
501 assert (pp->offset == ISAMH_BLOCK_OFFSET_1);
502 if (is->method->debug > 2)
503 logf (LOG_LOG, "isamh_pp_open sz=%d c=%d p=%d n=%d",
504 pp->size, pp->cat, pp->pos, isamh_block(pp->next));
511 void isamh_buildfirstblock(ISAMH_PP pp){
514 assert(pp->next != pp->pos);
515 memcpy(dst, &pp->next, sizeof(pp->next) );
516 dst += sizeof(pp->next);
517 memcpy(dst, &pp->size,sizeof(pp->size));
518 dst += sizeof(pp->size);
519 memcpy(dst, &pp->numKeys, sizeof(pp->numKeys));
520 dst += sizeof(pp->numKeys);
521 memcpy(dst, &pp->lastblock, sizeof(pp->lastblock));
522 dst += sizeof(pp->lastblock);
523 assert (dst - pp->buf == ISAMH_BLOCK_OFFSET_1);
524 if (pp->is->method->debug > 2)
525 logf (LOG_LOG, "isamh: first: sz=%d p=%d/%d>%d/%d>%d/%d nk=%d",
528 isamh_block(pp->next), isamh_type(pp->next),
529 isamh_block(pp->lastblock), isamh_type(pp->lastblock),
533 void isamh_buildlaterblock(ISAMH_PP pp){
536 assert(pp->next != isamh_addr(pp->pos,pp->cat));
537 memcpy(dst, &pp->next, sizeof(pp->next) );
538 dst += sizeof(pp->next);
539 memcpy(dst, &pp->size,sizeof(pp->size));
540 dst += sizeof(pp->size);
541 assert (dst - pp->buf == ISAMH_BLOCK_OFFSET_N);
542 if (pp->is->method->debug > 2)
543 logf (LOG_LOG, "isamh: l8r: sz=%d p=%d/%d>%d/%d",
546 isamh_block(pp->next), isamh_type(pp->next) );
551 /* returns non-zero if item could be read; 0 otherwise */
552 int isamh_pp_read (ISAMH_PP pp, void *buf)
554 return isamh_read_item (pp, (char **) &buf);
557 /* read one item from file - decode and store it in *dst.
560 1 if item could be read ok and NO boundary
561 2 if item could be read ok and boundary */
562 int isamh_read_item (ISAMH_PP pp, char **dst)
565 char *src = pp->buf + pp->offset;
568 if (pp->offset >= pp->size)
573 return 0; /* end of file */
575 if (pp->next > pp->pos)
577 if (pp->next == pp->pos + 1)
578 is->files[pp->cat].no_next++;
581 is->files[pp->cat].no_forward++;
582 is->files[pp->cat].sum_forward += pp->next - pp->pos;
587 if (pp->next + 1 == pp->pos)
588 is->files[pp->cat].no_prev++;
591 is->files[pp->cat].no_backward++;
592 is->files[pp->cat].sum_backward += pp->pos - pp->next;
595 /* out new block position */
596 newcat = isamh_type(pp->next);
597 if (pp->cat != newcat ) {
598 pp->buf = xrealloc(pp->buf, is->method->filecat[newcat].bsize);
600 pp->pos = isamh_block(pp->next);
601 pp->cat = isamh_type(pp->next);
604 /* read block and save 'next' and 'size' entry */
605 isamh_read_block (is, pp->cat, pp->pos, src);
606 memcpy (&pp->next, src, sizeof(pp->next));
607 src += sizeof(pp->next);
608 memcpy (&pp->size, src, sizeof(pp->size));
609 src += sizeof(pp->size);
610 /* assume block is non-empty */
611 assert (src - pp->buf == ISAMH_BLOCK_OFFSET_N);
612 assert (pp->next != isamh_addr(pp->pos,pp->cat));
614 isamh_release_block (is, pp->cat, pp->pos);
615 (*is->method->code_reset)(pp->decodeClientData);
616 (*is->method->code_item)(ISAMH_DECODE, pp->decodeClientData, dst, &src);
617 pp->offset = src - pp->buf;
618 if (is->method->debug > 2)
619 logf (LOG_LOG, "isc: read_block size=%d %d %d next=%d",
620 pp->size, pp->cat, pp->pos, pp->next);
623 (*is->method->code_item)(ISAMH_DECODE, pp->decodeClientData, dst, &src);
624 pp->offset = src - pp->buf;
628 int isamh_pp_num (ISAMH_PP pp)
633 static char *hexdump(unsigned char *p, int len, char *buff) {
634 static char localbuff[128];
636 if (!buff) buff=localbuff;
639 sprintf(bytebuff,"%02x",*p);
641 strcat(buff,bytebuff);
642 if (len) strcat(buff," ");
648 void isamh_pp_dump (ISAMH is, ISAMH_P ipos)
658 logf(LOG_LOG,"dumping isamh block %d (%d:%d)",
659 (int)ipos, isamh_type(ipos), isamh_block(ipos) );
660 pp=isamh_pp_open(is,ipos);
661 logf(LOG_LOG,"numKeys=%d, last=%d (%d:%d) ofs=%d ",
664 isamh_type(pp->lastblock), isamh_block(pp->lastblock),
667 while(isamh_pp_read(pp, &key))
669 if (oldaddr != isamh_addr(pp->pos,pp->cat) )
671 oldaddr = isamh_addr(pp->pos,pp->cat);
672 logf(LOG_LOG,"block %d (%d:%d) sz=%d nx=%d (%d:%d) ofs=%d",
673 isamh_addr(pp->pos,pp->cat),
674 pp->cat, pp->pos, pp->size,
675 pp->next, isamh_type(pp->next), isamh_block(pp->next),
681 logf(LOG_LOG," %05x: %s",i,hexdump(pp->buf+i,n,hexbuff));
684 if (oldoffs > ISAMH_BLOCK_OFFSET_N)
685 oldoffs=ISAMH_BLOCK_OFFSET_N;
688 logf (LOG_LOG," got %d:%d=%x:%x from %s at %d=%x",
689 key.sysno, key.seqno,
690 key.sysno, key.seqno,
691 hexdump(pp->buf+oldoffs, pp->offset-oldoffs, hexbuff),
693 oldoffs = pp->offset;
700 * Revision 1.6 1999-07-13 15:24:50 heikki
701 * Removed the one-block append, it had a serious flaw.
703 * Revision 1.5 1999/07/08 14:23:27 heikki
704 * Fixed a bug in isamh_pp_read and cleaned up a bit
706 * Revision 1.4 1999/07/07 09:36:04 heikki
707 * Fixed an assertion in isamh
709 * Revision 1.2 1999/07/06 09:37:05 heikki
710 * Working on isamh - not ready yet.
712 * Revision 1.1 1999/06/30 15:04:54 heikki
713 * Copied from isamc.c, slowly starting to simplify...