2 * Copyright (c) 1995-1998, Index Data.
3 * See the file LICENSE for details.
4 * $Id: isamd.c,v 1.15 1999-09-27 14:36:36 heikki Exp $
6 * Isamd - isam with diffs
7 * Programmed by: Heikki Levanto
10 * - Statistics are missing and/or completely wrong
11 * - Lots of code stolen from isamc, not all needed any more
21 #include "../index/index.h" /* isamd uses the internal structure of it_key */
24 static void flush_block (ISAMD is, int cat);
25 static void release_fc (ISAMD is, int cat);
26 static void init_fc (ISAMD is, int cat);
28 #define ISAMD_FREELIST_CHUNK 1
32 ISAMD_M isamd_getmethod (ISAMD_M me)
34 static struct ISAMD_filecat_s def_cat[] = {
36 /* blocksz, max. Unused time being */
37 { 32, 40 }, /* 24 is the smallest unreasonable size! */
49 /* old values from isamc, long time ago...
59 ISAMD_M m = (ISAMD_M) xmalloc (sizeof(*m)); /* never released! */
60 m->filecat = def_cat; /* ok, only alloc'd once */
67 m->compare_item = NULL;
69 m->debug = 0; /* default to no debug */
71 m->max_blocks_mem = 10;
78 ISAMD isamd_open (BFiles bfs, const char *name, int writeflag, ISAMD_M method)
81 ISAMD_filecat filecat;
84 is = (ISAMD) xmalloc (sizeof(*is));
86 is->method = (ISAMD_M) xmalloc (sizeof(*is->method));
87 memcpy (is->method, method, sizeof(*method));
88 filecat = is->method->filecat;
91 /* determine number of block categories */
92 if (is->method->debug>0)
93 logf (LOG_LOG, "isamd: bsize maxkeys");
96 if (is->method->debug>0)
97 logf (LOG_LOG, "isamd:%6d %6d",
98 filecat[i].bsize, filecat[i].mblocks);
99 } while (filecat[i++].mblocks);
103 assert (is->no_files > 0);
104 assert (is->max_cat <=8 ); /* we have only 3 bits for it */
106 is->files = (ISAMD_file) xmalloc (sizeof(*is->files)*is->no_files);
108 for (i = 0; i<is->no_files; i++)
112 sprintf (fname, "%s%c", name, i+'A');
113 is->files[i].bf = bf_open (bfs, fname, is->method->filecat[i].bsize,
115 is->files[i].head_is_dirty = 0;
116 if (!bf_read (is->files[i].bf, 0, 0, sizeof(ISAMD_head),
119 is->files[i].head.lastblock = 1;
120 is->files[i].head.freelist = 0;
122 is->files[i].alloc_entries_num = 0;
123 is->files[i].alloc_entries_max =
124 is->method->filecat[i].bsize / sizeof(int) - 1;
125 is->files[i].alloc_buf = (char *)
126 xmalloc (is->method->filecat[i].bsize);
127 is->files[i].no_writes = 0; /* clear statistics */
128 is->files[i].no_reads = 0;
129 is->files[i].no_skip_writes = 0;
130 is->files[i].no_allocated = 0;
131 is->files[i].no_released = 0;
132 is->files[i].no_remap = 0;
133 is->files[i].no_forward = 0;
134 is->files[i].no_backward = 0;
135 is->files[i].sum_forward = 0;
136 is->files[i].sum_backward = 0;
137 is->files[i].no_next = 0;
138 is->files[i].no_prev = 0;
139 is->files[i].no_op_nodiff=0;
140 is->files[i].no_op_intdiff=0;
141 is->files[i].no_op_extdiff=0;
142 is->files[i].no_fbuilds=0;
143 is->files[i].no_appds=0;
144 is->files[i].no_merges=0;
145 is->files[i].no_remerges=0;
152 int isamd_block_used (ISAMD is, int type)
154 if (type < 0 || type >= is->no_files)
156 return is->files[type].head.lastblock-1;
159 int isamd_block_size (ISAMD is, int type)
161 ISAMD_filecat filecat = is->method->filecat;
162 if (type < 0 || type >= is->no_files)
164 return filecat[type].bsize;
167 int isamd_close (ISAMD is)
171 if (is->method->debug>0)
173 logf (LOG_LOG, "isamd statistics");
174 logf (LOG_LOG, "f nxt forw mid-f prev backw mid-b");
175 for (i = 0; i<is->no_files; i++)
176 logf (LOG_LOG, "%d%8d%8d%8.1f%8d%8d%8.1f",i,
177 is->files[i].no_next,
178 is->files[i].no_forward,
179 is->files[i].no_forward ?
180 (double) is->files[i].sum_forward/is->files[i].no_forward
182 is->files[i].no_prev,
183 is->files[i].no_backward,
184 is->files[i].no_backward ?
185 (double) is->files[i].sum_backward/is->files[i].no_backward
188 if (is->method->debug>0)
189 logf (LOG_LOG, "f writes reads skipped alloc released ");
190 for (i = 0; i<is->no_files; i++)
193 assert (is->files[i].bf);
194 if (is->files[i].head_is_dirty)
195 bf_write (is->files[i].bf, 0, 0, sizeof(ISAMD_head),
197 if (is->method->debug>0)
198 logf (LOG_LOG, "%d%8d%8d%8d%8d%8d",i,
199 is->files[i].no_writes,
200 is->files[i].no_reads,
201 is->files[i].no_skip_writes,
202 is->files[i].no_allocated,
203 is->files[i].no_released);
204 xfree (is->files[i].fc_list);
206 bf_close (is->files[i].bf);
209 if (is->method->debug>0)
211 logf (LOG_LOG, "f opens simple int ext");
212 for (i = 0; i<is->no_files; i++)
214 logf (LOG_LOG, "%d%8d%8d%8d%8d",i,
215 is->files[i].no_op_nodiff+
216 is->files[i].no_op_intdiff+
217 is->files[i].no_op_extdiff,
218 is->files[i].no_op_nodiff,
219 is->files[i].no_op_intdiff,
220 is->files[i].no_op_extdiff);
222 logf (LOG_LOG, " build append merge remrg");
223 logf (LOG_LOG, "=%8d%8d%8d%8d",
224 is->files[0].no_fbuilds,
225 is->files[0].no_appds,
226 is->files[0].no_merges,
227 is->files[0].no_remerges);
235 int isamd_read_block (ISAMD is, int cat, int pos, char *dst)
237 ++(is->files[cat].no_reads);
238 if (is->method->debug > 6)
239 logf (LOG_LOG, "isamd: read_block %d:%d",cat, pos);
240 return bf_read (is->files[cat].bf, pos, 0, 0, dst);
243 int isamd_write_block (ISAMD is, int cat, int pos, char *src)
245 ++(is->files[cat].no_writes);
246 if (is->method->debug > 6)
247 logf (LOG_LOG, "isamd: write_block %d:%d", cat, pos);
248 return bf_write (is->files[cat].bf, pos, 0, 0, src);
251 int isamd_write_dblock (ISAMD is, int cat, int pos, char *src,
252 int nextpos, int offset)
254 ISAMD_BLOCK_SIZE size = offset + ISAMD_BLOCK_OFFSET_N;
255 if (is->method->debug > 4)
256 logf (LOG_LOG, "isamd: write_dblock. size=%d nextpos=%d",
257 (int) size, nextpos);
258 src -= ISAMD_BLOCK_OFFSET_N;
259 assert( ISAMD_BLOCK_OFFSET_N == sizeof(int)+sizeof(int) );
260 memcpy (src, &nextpos, sizeof(int));
261 memcpy (src + sizeof(int), &size, sizeof(size));
262 return isamd_write_block (is, cat, pos, src);
265 #if ISAMD_FREELIST_CHUNK
266 static void flush_block (ISAMD is, int cat)
268 char *abuf = is->files[cat].alloc_buf;
269 int block = is->files[cat].head.freelist;
270 if (block && is->files[cat].alloc_entries_num)
272 memcpy (abuf, &is->files[cat].alloc_entries_num, sizeof(int));
273 bf_write (is->files[cat].bf, block, 0, 0, abuf);
274 is->files[cat].alloc_entries_num = 0;
279 static int alloc_block (ISAMD is, int cat)
281 int block = is->files[cat].head.freelist;
282 char *abuf = is->files[cat].alloc_buf;
284 (is->files[cat].no_allocated)++;
288 block = (is->files[cat].head.lastblock)++; /* no free list */
289 is->files[cat].head_is_dirty = 1;
293 if (!is->files[cat].alloc_entries_num) /* read first time */
295 bf_read (is->files[cat].bf, block, 0, 0, abuf);
296 memcpy (&is->files[cat].alloc_entries_num, abuf,
297 sizeof(is->files[cat].alloc_entries_num));
298 assert (is->files[cat].alloc_entries_num > 0);
300 /* have some free blocks now */
301 assert (is->files[cat].alloc_entries_num > 0);
302 is->files[cat].alloc_entries_num--;
303 if (!is->files[cat].alloc_entries_num) /* last one in block? */
305 memcpy (&is->files[cat].head.freelist, abuf + sizeof(int),
307 is->files[cat].head_is_dirty = 1;
309 if (is->files[cat].head.freelist)
311 bf_read (is->files[cat].bf, is->files[cat].head.freelist,
313 memcpy (&is->files[cat].alloc_entries_num, abuf,
314 sizeof(is->files[cat].alloc_entries_num));
315 assert (is->files[cat].alloc_entries_num);
319 memcpy (&block, abuf + sizeof(int) + sizeof(int) *
320 is->files[cat].alloc_entries_num, sizeof(int));
325 static void release_block (ISAMD is, int cat, int pos)
327 char *abuf = is->files[cat].alloc_buf;
328 int block = is->files[cat].head.freelist;
330 (is->files[cat].no_released)++;
332 if (block && !is->files[cat].alloc_entries_num) /* must read block */
334 bf_read (is->files[cat].bf, block, 0, 0, abuf);
335 memcpy (&is->files[cat].alloc_entries_num, abuf,
336 sizeof(is->files[cat].alloc_entries_num));
337 assert (is->files[cat].alloc_entries_num > 0);
339 assert (is->files[cat].alloc_entries_num <= is->files[cat].alloc_entries_max);
340 if (is->files[cat].alloc_entries_num == is->files[cat].alloc_entries_max)
343 memcpy (abuf, &is->files[cat].alloc_entries_num, sizeof(int));
344 bf_write (is->files[cat].bf, block, 0, 0, abuf);
345 is->files[cat].alloc_entries_num = 0;
347 if (!is->files[cat].alloc_entries_num) /* make new buffer? */
349 memcpy (abuf + sizeof(int), &block, sizeof(int));
350 is->files[cat].head.freelist = pos;
351 is->files[cat].head_is_dirty = 1;
355 memcpy (abuf + sizeof(int) +
356 is->files[cat].alloc_entries_num*sizeof(int),
359 is->files[cat].alloc_entries_num++;
362 static void flush_block (ISAMD is, int cat)
364 char *abuf = is->files[cat].alloc_buf;
368 static int alloc_block (ISAMD is, int cat)
371 char buf[sizeof(int)];
373 is->files[cat].head_is_dirty = 1;
374 (is->files[cat].no_allocated)++;
375 if ((block = is->files[cat].head.freelist))
377 bf_read (is->files[cat].bf, block, 0, sizeof(int), buf);
378 memcpy (&is->files[cat].head.freelist, buf, sizeof(int));
381 block = (is->files[cat].head.lastblock)++;
385 static void release_block (ISAMD is, int cat, int pos)
387 char buf[sizeof(int)];
389 (is->files[cat].no_released)++;
390 is->files[cat].head_is_dirty = 1;
391 memcpy (buf, &is->files[cat].head.freelist, sizeof(int));
392 is->files[cat].head.freelist = pos;
393 bf_write (is->files[cat].bf, pos, 0, sizeof(int), buf);
397 int isamd_alloc_block (ISAMD is, int cat)
401 if (is->files[cat].fc_list)
404 for (j = 0; j < is->files[cat].fc_max; j++)
405 if ((nb = is->files[cat].fc_list[j]) && (!block || nb < block))
407 is->files[cat].fc_list[j] = 0;
413 block = alloc_block (is, cat);
414 if (is->method->debug > 4)
415 logf (LOG_LOG, "isamd: alloc_block in cat %d: %d", cat, block);
419 void isamd_release_block (ISAMD is, int cat, int pos)
421 if (is->method->debug > 4)
422 logf (LOG_LOG, "isamd: release_block in cat %d: %d", cat, pos);
425 if (is->files[cat].fc_list)
428 for (j = 0; j<is->files[cat].fc_max; j++)
429 if (!is->files[cat].fc_list[j])
431 is->files[cat].fc_list[j] = pos;
435 release_block (is, cat, pos);
438 static void init_fc (ISAMD is, int cat)
442 is->files[cat].fc_max = j;
443 is->files[cat].fc_list = (int *)
444 xmalloc (sizeof(*is->files[0].fc_list) * j);
446 is->files[cat].fc_list[j] = 0;
449 static void release_fc (ISAMD is, int cat)
451 int b, j = is->files[cat].fc_max;
454 if ((b = is->files[cat].fc_list[j]))
456 release_block (is, cat, b);
457 is->files[cat].fc_list[j] = 0;
461 void isamd_pp_close (ISAMD_PP pp)
465 (*is->method->code_stop)(ISAMD_DECODE, pp->decodeClientData);
466 isamd_free_diffs(pp); /* see merge-d.h */
469 if (is->method->debug > 5)
470 logf (LOG_LOG, "isamd_pp_close %p %d=%d:%d sz=%d n=%d=%d:%d",
471 pp, isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, pp->size,
472 pp->next, isamd_type(pp->next), isamd_block(pp->next) );
477 ISAMD_PP isamd_pp_open (ISAMD is, ISAMD_P ipos)
479 ISAMD_PP pp = (ISAMD_PP) xmalloc (sizeof(*pp));
481 int sz = is->method->filecat[is->max_cat].bsize;
482 /* always allocate for the largest blocks, saves trouble */
483 struct it_key singlekey;
484 char *c_ptr; /* for fake encoding the singlekey */
489 src = pp->buf = (char *) xmalloc (sz);
490 memset(src,'\0',sz); /* clear the buffer, for new blocks */
499 pp->decodeClientData = (*is->method->code_start)(ISAMD_DECODE);
501 if ( is_singleton(ipos) )
505 if (is->method->debug > 5)
506 logf (LOG_LOG, "isamd_pp_open %p %d=%d:%d sz=%d n=%d=%d:%d",
507 pp, isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, pp->size,
508 pp->next, isamd_type(pp->next), isamd_block(pp->next) );
509 singleton_decode(ipos, &singlekey );
510 pp->offset=ISAMD_BLOCK_OFFSET_1;
512 ofs=pp->offset+sizeof(int); /* reserve length of diffsegment */
513 singlekey.seqno = singlekey.seqno * 2 + 1; /* make an insert diff */
514 c_ptr=&(pp->buf[ofs]);
515 i_ptr=(char*)(&singlekey);
516 (*is->method->code_item)(ISAMD_ENCODE, pp->decodeClientData,
518 (*is->method->code_reset)(pp->decodeClientData);
519 ofs += c_ptr-&(pp->buf[ofs]);
520 memcpy( &(pp->buf[pp->offset]), &ofs, sizeof(int) );
521 /* since we memset buf earlier, we already have a zero endmark! */
523 if (is->method->debug > 5)
524 logf (LOG_LOG, "isamd_pp_open single %d=%x: %d.%d sz=%d",
526 singlekey.sysno, singlekey.seqno/2,
531 pp->cat = isamd_type(ipos);
532 pp->pos = isamd_block(ipos);
537 isamd_read_block (is, pp->cat, pp->pos, src);
538 memcpy (&pp->next, src, sizeof(pp->next));
539 src += sizeof(pp->next);
540 memcpy (&pp->size, src, sizeof(pp->size));
541 src += sizeof(pp->size);
542 memcpy (&pp->numKeys, src, sizeof(pp->numKeys));
543 src += sizeof(pp->numKeys);
544 assert (pp->next != pp->pos);
545 pp->offset = src - pp->buf;
546 assert (pp->offset == ISAMD_BLOCK_OFFSET_1);
547 assert(pp->size>=ISAMD_BLOCK_OFFSET_1); /*??*/
549 if (is->method->debug > 5)
550 logf (LOG_LOG, "isamd_pp_open %p %d=%d:%d sz=%d n=%d=%d:%d",
551 pp, isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, pp->size,
552 pp->next, isamd_type(pp->next), isamd_block(pp->next) );
559 void isamd_buildfirstblock(ISAMD_PP pp){
562 assert(pp->next != pp->pos);
563 memcpy(dst, &pp->next, sizeof(pp->next) );
564 dst += sizeof(pp->next);
565 memcpy(dst, &pp->size,sizeof(pp->size));
566 dst += sizeof(pp->size);
567 memcpy(dst, &pp->numKeys, sizeof(pp->numKeys));
568 dst += sizeof(pp->numKeys);
569 // memcpy(dst, &pp->diffs, sizeof(pp->diffs));
570 // dst += sizeof(pp->diffs);
571 assert (dst - pp->buf == ISAMD_BLOCK_OFFSET_1);
572 if (pp->is->method->debug > 5)
573 logf (LOG_LOG, "isamd: bldfirst: p=%d=%d:%d n=%d:%d:%d sz=%d nk=%d ",
574 isamd_addr(pp->pos,pp->cat),pp->cat, pp->pos,
575 pp->next, isamd_type(pp->next), isamd_block(pp->next),
576 pp->size, pp->numKeys);
579 void isamd_buildlaterblock(ISAMD_PP pp){
582 assert(pp->next != isamd_addr(pp->pos,pp->cat));
583 memcpy(dst, &pp->next, sizeof(pp->next) );
584 dst += sizeof(pp->next);
585 memcpy(dst, &pp->size,sizeof(pp->size));
586 dst += sizeof(pp->size);
587 assert (dst - pp->buf == ISAMD_BLOCK_OFFSET_N);
588 if (pp->is->method->debug > 5)
589 logf (LOG_LOG, "isamd: l8r: sz=%d p=%d/%d>%d/%d",
592 isamd_block(pp->next), isamd_type(pp->next) );
597 /* returns non-zero if item could be read; 0 otherwise */
598 int isamd_pp_read (ISAMD_PP pp, void *buf)
601 return isamd_read_item (pp, (char **) &buf);
602 /* note: isamd_read_item is in merge-d.c, because it is so */
603 /* convoluted with the merge process */
606 /* read one main item from file - decode and store it in *dst.
607 Does not worry about diffs
610 1 if item could be read ok
612 int isamd_read_main_item (ISAMD_PP pp, char **dst)
615 char *src = pp->buf + pp->offset;
619 if (pp->offset >= pp->size)
624 return 0; /* end of file */
626 if (pp->next > pp->pos)
628 if (pp->next == pp->pos + 1)
629 is->files[pp->cat].no_next++;
632 is->files[pp->cat].no_forward++;
633 is->files[pp->cat].sum_forward += pp->next - pp->pos;
638 if (pp->next + 1 == pp->pos)
639 is->files[pp->cat].no_prev++;
642 is->files[pp->cat].no_backward++;
643 is->files[pp->cat].sum_backward += pp->pos - pp->next;
646 /* out new block position */
647 newcat = isamd_type(pp->next);
648 pp->pos = isamd_block(pp->next);
649 pp->cat = isamd_type(pp->next);
652 /* read block and save 'next' and 'size' entry */
653 isamd_read_block (is, pp->cat, pp->pos, src);
654 memcpy (&pp->next, src, sizeof(pp->next));
655 src += sizeof(pp->next);
656 memcpy (&pp->size, src, sizeof(pp->size));
657 src += sizeof(pp->size);
658 /* assume block is non-empty */
659 pp->offset = oldoffs = src - pp->buf;
660 assert (pp->offset == ISAMD_BLOCK_OFFSET_N);
661 assert (pp->next != isamd_addr(pp->pos,pp->cat));
662 (*is->method->code_reset)(pp->decodeClientData);
663 /* finally, read the item */
664 (*is->method->code_item)(ISAMD_DECODE, pp->decodeClientData, dst, &src);
665 pp->offset = src - pp->buf;
666 if (is->method->debug > 8)
667 logf (LOG_LOG, "isamd: read_m: block %d:%d sz=%d ofs=%d-%d next=%d",
668 pp->cat, pp->pos, pp->size, oldoffs, pp->offset, pp->next);
672 (*is->method->code_item)(ISAMD_DECODE, pp->decodeClientData, dst, &src);
673 pp->offset = src - pp->buf;
674 if (is->method->debug > 8)
675 logf (LOG_LOG, "isamd: read_m: got %d:%d sz=%d ofs=%d-%d next=%d",
676 pp->cat, pp->pos, pp->size, oldoffs, pp->offset, pp->next);
680 int isamd_pp_num (ISAMD_PP pp)
685 static char *hexdump(unsigned char *p, int len, char *buff) {
686 static char localbuff[128];
688 if (!buff) buff=localbuff;
691 sprintf(bytebuff,"%02x",*p);
693 strcat(buff,bytebuff);
694 if (len) strcat(buff," ");
700 void isamd_pp_dump (ISAMD is, ISAMD_P ipos)
711 int olddebug= is->method->debug;
712 is->method->debug=0; /* no debug logs while reading for dump */
714 logf(LOG_LOG,"dumping isamd block %d (%d:%d)",
715 (int)ipos, isamd_type(ipos), isamd_block(ipos) );
716 pp=isamd_pp_open(is,ipos);
717 logf(LOG_LOG,"numKeys=%d, ofs=%d sz=%d",
718 pp->numKeys, pp->offset, pp->size );
719 diffidx=oldoffs= pp->offset;
720 while ((diffidx < is->method->filecat[pp->cat].bsize) && (diffmax>0))
722 memcpy(&diffmax,&(pp->buf[diffidx]),sizeof(int));
723 logf (LOG_LOG,"diff set at %d-%d: %s", diffidx, diffmax,
724 hexdump(pp->buf+diffidx,8,0));
725 /*! todo: dump the actual diffs as well !!! */
729 while(isamd_pp_read(pp, &key))
731 if (oldaddr != isamd_addr(pp->pos,pp->cat) )
733 oldaddr = isamd_addr(pp->pos,pp->cat);
734 logf(LOG_LOG,"block %d=%d:%d sz=%d nx=%d=%d:%d ofs=%d",
735 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,
737 pp->next, isamd_type(pp->next), isamd_block(pp->next),
743 logf(LOG_LOG," %05x: %s",i,hexdump(pp->buf+i,n,hexbuff));
746 if (oldoffs > ISAMD_BLOCK_OFFSET_N)
747 oldoffs=ISAMD_BLOCK_OFFSET_N;
750 logf (LOG_LOG," got %d:%d=%x:%x from %s at %d=%x",
751 key.sysno, key.seqno,
752 key.sysno, key.seqno,
753 hexdump(pp->buf+oldoffs, pp->offset-oldoffs, hexbuff),
755 oldoffs = pp->offset;
757 /*!*/ /*TODO: dump diffs too!!! */
759 is->method->debug=olddebug;
764 * Revision 1.15 1999-09-27 14:36:36 heikki
767 * Revision 1.14 1999/09/23 18:01:18 heikki
768 * singleton optimising
770 * Revision 1.13 1999/09/20 15:48:06 heikki
773 * Revision 1.12 1999/09/13 13:28:28 heikki
774 * isam-d optimizing: merging input data in the same go
776 * Revision 1.11 1999/08/25 18:09:24 heikki
777 * Starting to optimize
779 * Revision 1.10 1999/08/24 13:17:42 heikki
780 * Block sizes, comments
782 * Revision 1.9 1999/08/20 12:25:58 heikki
783 * Statistics in isamd
785 * Revision 1.8 1999/08/18 13:28:16 heikki
786 * Set log levels to decent values
788 * Revision 1.6 1999/08/17 19:44:25 heikki
791 * Revision 1.4 1999/08/04 14:21:18 heikki
792 * isam-d seems to be working.
794 * Revision 1.3 1999/07/21 14:24:50 heikki
795 * isamd write and read functions ok, except when diff block full.
796 * (merge not yet done)
798 * Revision 1.1 1999/07/14 12:34:43 heikki
799 * Copied from isamh, starting to change things...