1 /* $Id: isamb.c,v 1.66 2005-01-13 11:55:02 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 #include <yaz/xmalloc.h>
26 #include <idzebra/isamb.h>
33 #define ISAMB_MAJOR_VERSION 2
34 #define ISAMB_MINOR_VERSION 0
46 /* maximum size of encoded buffer */
47 #define DST_ITEM_MAX 256
49 #define ISAMB_MAX_LEVEL 10
50 /* approx 2*max page + max size of item */
51 #define DST_BUF_SIZE 16840
53 #define ISAMB_CACHE_ENTRY_SIZE 4096
55 /* CAT_MAX: _must_ be power of 2 */
57 #define CAT_MASK (CAT_MAX-1)
58 /* CAT_NO: <= CAT_MAX */
61 /* ISAMB_PTR_CODEC = 1 var, =0 fixed */
62 #define ISAMB_PTR_CODEC 1
64 struct ISAMB_cache_entry {
69 struct ISAMB_cache_entry *next;
75 struct ISAMB_head head;
76 struct ISAMB_cache_entry *cache_entries;
83 struct ISAMB_file *file;
85 int cache; /* 0 = no cache, 1 = use cache, -1 = dummy isam (for testing only) */
86 int log_io; /* log level for bf_read/bf_write calls */
87 int log_freelist; /* log level for freelist handling */
88 zint skipped_numbers; /* on a leaf node */
89 zint returned_numbers;
90 zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1 = higher etc */
91 zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */
102 zint no_items; /* number of nodes in this + children */
106 void *decodeClientData;
114 int maxlevel; /* total depth */
117 zint skipped_numbers; /* on a leaf node */
118 zint returned_numbers;
119 zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1 = higher etc */
120 zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */
121 struct ISAMB_block **block;
122 int scope; /* on what level we forward */
127 static void encode_ptr (char **dst, zint pos)
129 unsigned char *bp = (unsigned char*) *dst;
133 *bp++ = 128 | (pos & 127);
140 static void encode_ptr (char **dst, zint pos)
142 memcpy(*dst, &pos, sizeof(pos));
143 (*dst) += sizeof(pos);
148 static void decode_ptr (const char **src1, zint *pos)
150 const unsigned char **src = (const unsigned char **) src1;
155 while (((c = *(*src)++) & 128))
157 d += ((zint) (c & 127) << r);
160 d += ((zint) c << r);
164 static void decode_ptr (const char **src, zint *pos)
166 memcpy (pos, *src, sizeof(*pos));
167 (*src) += sizeof(*pos);
171 ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method,
174 ISAMB isamb = xmalloc (sizeof(*isamb));
178 isamb->method = (ISAMC_M *) xmalloc (sizeof(*method));
179 memcpy (isamb->method, method, sizeof(*method));
180 isamb->no_cat = CAT_NO;
182 isamb->log_freelist = 0;
183 isamb->cache = cache;
184 isamb->skipped_numbers = 0;
185 isamb->returned_numbers = 0;
186 for (i = 0;i<ISAMB_MAX_LEVEL;i++)
187 isamb->skipped_nodes[i]= isamb->accessed_nodes[i]=0;
190 isamb->file = xmalloc (sizeof(*isamb->file) * isamb->no_cat);
191 for (i = 0; i < isamb->no_cat; i++)
193 char fname[DST_BUF_SIZE];
194 char hbuf[DST_BUF_SIZE];
195 isamb->file[i].cache_entries = 0;
196 isamb->file[i].head_dirty = 0;
197 sprintf (fname, "%s%c", name, i+'A');
199 isamb->file[i].bf = bf_open (bfs, fname, ISAMB_CACHE_ENTRY_SIZE,
202 isamb->file[i].bf = bf_open (bfs, fname, b_size, writeflag);
204 /* fill-in default values (for empty isamb) */
205 isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1;
206 isamb->file[i].head.last_block = isamb->file[i].head.first_block;
207 isamb->file[i].head.block_size = b_size;
209 if (i == isamb->no_cat-1 || b_size > 128)
210 isamb->file[i].head.block_offset = 8;
212 isamb->file[i].head.block_offset = 4;
214 isamb->file[i].head.block_offset = 11;
216 isamb->file[i].head.block_max =
217 b_size - isamb->file[i].head.block_offset;
218 isamb->file[i].head.free_list = 0;
219 if (bf_read (isamb->file[i].bf, 0, 0, 0, hbuf))
221 /* got header assume "isamb"major minor len can fit in 16 bytes */
223 int major, minor, len, pos = 0;
226 if (memcmp(hbuf, "isamb", 5))
228 yaz_log(YLOG_WARN, "bad isamb header for file %s", fname);
231 if (sscanf(hbuf+5, "%d %d %d", &major, &minor, &len) != 3)
233 yaz_log(YLOG_WARN, "bad isamb header for file %s", fname);
236 if (major != ISAMB_MAJOR_VERSION)
238 yaz_log(YLOG_WARN, "bad major version for file %s %d, must be %d",
239 fname, major, ISAMB_MAJOR_VERSION);
242 for (left = len - b_size; left > 0; left = left - b_size)
245 if (!bf_read (isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size))
247 yaz_log(YLOG_WARN, "truncated isamb header for "
248 "file=%s len=%d pos=%d",
254 decode_ptr(&src, &isamb->file[i].head.first_block);
255 decode_ptr(&src, &isamb->file[i].head.last_block);
256 decode_ptr(&src, &zint_tmp);
257 isamb->file[i].head.block_size = zint_tmp;
258 decode_ptr(&src, &zint_tmp);
259 isamb->file[i].head.block_max = zint_tmp;
260 decode_ptr(&src, &isamb->file[i].head.free_list);
262 assert (isamb->file[i].head.block_size >= isamb->file[i].head.block_offset);
263 isamb->file[i].head_dirty = 0;
264 assert(isamb->file[i].head.block_size == b_size);
268 yaz_log(YLOG_WARN, "isamb debug enabled. Things will be slower than usual");
273 static void flush_blocks (ISAMB b, int cat)
275 while (b->file[cat].cache_entries)
277 struct ISAMB_cache_entry *ce_this = b->file[cat].cache_entries;
278 b->file[cat].cache_entries = ce_this->next;
282 yaz_log (b->log_io, "bf_write: flush_blocks");
283 bf_write (b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf);
285 xfree (ce_this->buf);
290 static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr)
292 int cat = (int) (pos&CAT_MASK);
293 int off = (int) (((pos/CAT_MAX) &
294 (ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size - 1))
295 * b->file[cat].head.block_size);
296 zint norm = pos / (CAT_MASK*ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size);
298 struct ISAMB_cache_entry **ce, *ce_this = 0, **ce_last = 0;
303 assert (ISAMB_CACHE_ENTRY_SIZE >= b->file[cat].head.block_size);
304 for (ce = &b->file[cat].cache_entries; *ce; ce = &(*ce)->next, no++)
307 if ((*ce)->pos == norm)
310 *ce = (*ce)->next; /* remove from list */
312 ce_this->next = b->file[cat].cache_entries; /* move to front */
313 b->file[cat].cache_entries = ce_this;
317 memcpy (ce_this->buf + off, userbuf,
318 b->file[cat].head.block_size);
322 memcpy (userbuf, ce_this->buf + off,
323 b->file[cat].head.block_size);
330 assert (ce_last && *ce_last);
332 *ce_last = 0; /* remove the last entry from list */
335 yaz_log (b->log_io, "bf_write: get_block");
336 bf_write (b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf);
338 xfree (ce_this->buf);
341 ce_this = xmalloc (sizeof(*ce_this));
342 ce_this->next = b->file[cat].cache_entries;
343 b->file[cat].cache_entries = ce_this;
344 ce_this->buf = xmalloc (ISAMB_CACHE_ENTRY_SIZE);
346 yaz_log (b->log_io, "bf_read: get_block");
347 if (!bf_read (b->file[cat].bf, norm, 0, 0, ce_this->buf))
348 memset (ce_this->buf, 0, ISAMB_CACHE_ENTRY_SIZE);
351 memcpy (ce_this->buf + off, userbuf, b->file[cat].head.block_size);
357 memcpy (userbuf, ce_this->buf + off, b->file[cat].head.block_size);
363 void isamb_close (ISAMB isamb)
366 for (i = 0;isamb->accessed_nodes[i];i++)
367 yaz_log(YLOG_DEBUG, "isamb_close level leaf-%d: "ZINT_FORMAT" read, "
368 ZINT_FORMAT" skipped",
369 i, isamb->accessed_nodes[i], isamb->skipped_nodes[i]);
370 yaz_log(YLOG_DEBUG, "isamb_close returned "ZINT_FORMAT" values, "
371 "skipped "ZINT_FORMAT,
372 isamb->skipped_numbers, isamb->returned_numbers);
373 for (i = 0; i<isamb->no_cat; i++)
375 flush_blocks (isamb, i);
376 if (isamb->file[i].head_dirty)
378 char hbuf[DST_BUF_SIZE];
379 int major = ISAMB_MAJOR_VERSION;
380 int minor = ISAMB_MINOR_VERSION;
382 char *dst = hbuf + 16;
384 int b_size = isamb->file[i].head.block_size;
386 encode_ptr(&dst, isamb->file[i].head.first_block);
387 encode_ptr(&dst, isamb->file[i].head.last_block);
388 encode_ptr(&dst, isamb->file[i].head.block_size);
389 encode_ptr(&dst, isamb->file[i].head.block_max);
390 encode_ptr(&dst, isamb->file[i].head.free_list);
391 memset(dst, '\0', b_size); /* ensure no random bytes are written */
395 /* print exactly 16 bytes (including trailing 0) */
396 sprintf(hbuf, "isamb%02d %02d %02d\r\n", major, minor, len);
398 bf_write (isamb->file[i].bf, pos, 0, 0, hbuf);
400 for (left = len - b_size; left > 0; left = left - b_size)
403 bf_write (isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size);
406 bf_close (isamb->file[i].bf);
409 xfree (isamb->method);
413 /* open_block: read one block at pos.
414 Decode leading sys bytes .. consisting of
416 0: leader byte, != 0 leaf, == 0, non-leaf
417 1-2: used size of block
418 3-7*: number of items and all children
420 * Reserve 5 bytes for large block sizes. 1 for small ones .. Number
421 of items. We can thus have at most 2^40 nodes.
423 static struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos)
425 int cat = (int) (pos&CAT_MASK);
427 int offset = b->file[cat].head.block_offset;
428 struct ISAMB_block *p;
431 p = xmalloc (sizeof(*p));
433 p->cat = (int) (pos & CAT_MASK);
434 p->buf = xmalloc (b->file[cat].head.block_size);
437 if (!get_block (b, pos, p->buf, 0))
439 yaz_log (b->log_io, "bf_read: open_block");
440 if (!bf_read (b->file[cat].bf, pos/CAT_MAX, 0, 0, p->buf))
442 yaz_log (YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld",
443 (long) pos, (long) pos/CAT_MAX);
447 p->bytes = p->buf + offset;
449 p->size = (p->buf[1] + 256 * p->buf[2]) - offset;
452 yaz_log (YLOG_FATAL, "Bad block size %d in pos=" ZINT_FORMAT "\n",
455 assert (p->size >= 0);
457 decode_ptr(&src, &p->no_items);
462 p->decodeClientData = (*b->method->codec.start)();
466 struct ISAMB_block *new_block (ISAMB b, int leaf, int cat)
468 struct ISAMB_block *p;
470 p = xmalloc (sizeof(*p));
471 p->buf = xmalloc (b->file[cat].head.block_size);
473 if (!b->file[cat].head.free_list)
476 block_no = b->file[cat].head.last_block++;
477 p->pos = block_no * CAT_MAX + cat;
481 p->pos = b->file[cat].head.free_list;
482 assert((p->pos & CAT_MASK) == cat);
483 if (!get_block (b, p->pos, p->buf, 0))
485 yaz_log (b->log_io, "bf_read: new_block");
486 if (!bf_read (b->file[cat].bf, p->pos/CAT_MAX, 0, 0, p->buf))
488 yaz_log (YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld",
489 (long) p->pos/CAT_MAX, (long) p->pos/CAT_MAX);
493 yaz_log (b->log_freelist, "got block " ZINT_FORMAT " from freelist %d:" ZINT_FORMAT, p->pos,
494 cat, p->pos/CAT_MAX);
495 memcpy (&b->file[cat].head.free_list, p->buf, sizeof(zint));
498 b->file[cat].head_dirty = 1;
499 memset (p->buf, 0, b->file[cat].head.block_size);
500 p->bytes = p->buf + b->file[cat].head.block_offset;
507 p->decodeClientData = (*b->method->codec.start)();
511 struct ISAMB_block *new_leaf (ISAMB b, int cat)
513 return new_block (b, 1, cat);
517 struct ISAMB_block *new_int (ISAMB b, int cat)
519 return new_block (b, 0, cat);
522 static void check_block (ISAMB b, struct ISAMB_block *p)
524 assert(b); /* mostly to make the compiler shut up about unused b */
532 char *startp = p->bytes;
533 const char *src = startp;
534 char *endp = p->bytes + p->size;
537 decode_ptr (&src, &pos);
538 assert ((pos&CAT_MASK) == p->cat);
542 decode_ptr (&src, &item_len);
543 assert (item_len > 0 && item_len < 80);
545 decode_ptr (&src, &pos);
546 if ((pos&CAT_MASK) != p->cat)
548 assert ((pos&CAT_MASK) == p->cat);
554 void close_block (ISAMB b, struct ISAMB_block *p)
560 yaz_log (b->log_freelist, "release block " ZINT_FORMAT " from freelist %d:" ZINT_FORMAT,
561 p->pos, p->cat, p->pos/CAT_MAX);
562 memcpy (p->buf, &b->file[p->cat].head.free_list, sizeof(zint));
563 b->file[p->cat].head.free_list = p->pos;
564 if (!get_block (b, p->pos, p->buf, 1))
566 yaz_log (b->log_io, "bf_write: close_block (deleted)");
567 bf_write (b->file[p->cat].bf, p->pos/CAT_MAX, 0, 0, p->buf);
572 int offset = b->file[p->cat].head.block_offset;
573 int size = p->size + offset;
574 char *dst = p->buf + 3;
575 assert (p->size >= 0);
577 /* memset becuase encode_ptr usually does not write all bytes */
578 memset(p->buf, 0, b->file[p->cat].head.block_offset);
580 p->buf[1] = size & 255;
581 p->buf[2] = size >> 8;
582 encode_ptr(&dst, p->no_items);
584 if (!get_block (b, p->pos, p->buf, 1))
586 yaz_log (b->log_io, "bf_write: close_block");
587 bf_write (b->file[p->cat].bf, p->pos/CAT_MAX, 0, 0, p->buf);
590 (*b->method->codec.stop)(p->decodeClientData);
595 int insert_sub (ISAMB b, struct ISAMB_block **p,
596 void *new_item, int *mode,
598 struct ISAMB_block **sp,
599 void *sub_item, int *sub_size,
600 const void *max_item);
602 int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item,
604 ISAMC_I *stream, struct ISAMB_block **sp,
605 void *split_item, int *split_size, const void *last_max_item)
607 char *startp = p->bytes;
608 const char *src = startp;
609 char *endp = p->bytes + p->size;
611 struct ISAMB_block *sub_p1 = 0, *sub_p2 = 0;
612 char sub_item[DST_ITEM_MAX];
619 assert(p->size >= 0);
620 decode_ptr (&src, &pos);
625 const char *src0 = src;
626 decode_ptr (&src, &item_len);
627 d = (*b->method->compare_item)(src, lookahead_item);
630 sub_p1 = open_block (b, pos);
632 diff_terms -= sub_p1->no_items;
633 more = insert_sub (b, &sub_p1, lookahead_item, mode,
635 sub_item, &sub_size, src);
636 diff_terms += sub_p1->no_items;
641 decode_ptr (&src, &pos);
645 sub_p1 = open_block (b, pos);
647 diff_terms -= sub_p1->no_items;
648 more = insert_sub (b, &sub_p1, lookahead_item, mode, stream, &sub_p2,
649 sub_item, &sub_size, last_max_item);
650 diff_terms += sub_p1->no_items;
653 diff_terms += sub_p2->no_items;
657 p->no_items += diff_terms;
661 /* there was a split - must insert pointer in this one */
662 char dst_buf[DST_BUF_SIZE];
665 assert (sub_size < 80 && sub_size > 1);
667 memcpy (dst, startp, src - startp);
671 encode_ptr (&dst, sub_size); /* sub length and item */
672 memcpy (dst, sub_item, sub_size);
675 encode_ptr (&dst, sub_p2->pos); /* pos */
677 if (endp - src) /* remaining data */
679 memcpy (dst, src, endp - src);
682 p->size = dst - dst_buf;
683 assert (p->size >= 0);
686 if (p->size <= b->file[p->cat].head.block_max)
688 memcpy (startp, dst_buf, dst - dst_buf);
692 struct ISAMB_block *sub_p3;
694 zint no_items_first_half = 0;
700 half = src + b->file[p->cat].head.block_size/2;
701 decode_ptr (&src, &pos);
703 /* read sub block so we can get no_items for it */
704 sub_p3 = open_block(b, pos);
705 no_items_first_half += sub_p3->no_items;
706 close_block(b, sub_p3);
710 decode_ptr (&src, &split_size_tmp);
711 *split_size = (int) split_size_tmp;
714 decode_ptr (&src, &pos);
716 /* read sub block so we can get no_items for it */
717 sub_p3 = open_block(b, pos);
718 no_items_first_half += sub_p3->no_items;
719 close_block(b, sub_p3);
721 /* p is first half */
722 p_new_size = src - dst_buf;
723 memcpy (p->bytes, dst_buf, p_new_size);
725 decode_ptr (&src, &split_size_tmp);
726 *split_size = (int) split_size_tmp;
727 memcpy (split_item, src, *split_size);
730 /* *sp is second half */
731 *sp = new_int (b, p->cat);
732 (*sp)->size = endp - src;
733 memcpy ((*sp)->bytes, src, (*sp)->size);
735 p->size = p_new_size;
737 /* adjust no_items in first&second half */
738 (*sp)->no_items = p->no_items - no_items_first_half;
739 p->no_items = no_items_first_half;
742 close_block (b, sub_p2);
744 close_block (b, sub_p1);
748 int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item,
749 int *lookahead_mode, ISAMC_I *stream,
750 struct ISAMB_block **sp2,
751 void *sub_item, int *sub_size,
752 const void *max_item)
754 struct ISAMB_block *p = *sp1;
757 char dst_buf[DST_BUF_SIZE], *dst = dst_buf;
759 void *c1 = (*b->method->codec.start)();
760 void *c2 = (*b->method->codec.start)();
762 int quater = b->file[b->no_cat-1].head.block_max / 4;
763 char *mid_cut = dst_buf + quater * 2;
764 char *tail_cut = dst_buf + quater * 3;
765 char *maxp = dst_buf + b->file[b->no_cat-1].head.block_max;
768 char cut_item_buf[DST_ITEM_MAX];
769 int cut_item_size = 0;
770 int no_items = 0; /* number of items (total) */
771 int no_items_1 = 0; /* number of items (first half) */
775 char file_item_buf[DST_ITEM_MAX];
776 char *file_item = file_item_buf;
779 endp = p->bytes + p->size;
780 (*b->method->codec.decode)(c1, &file_item, &src);
783 const char *dst_item = 0; /* resulting item to be inserted */
784 char *lookahead_next;
788 d = (*b->method->compare_item)(file_item_buf, lookahead_item);
790 /* d now holds comparison between existing file item and
793 d > 0: lookahead before file
794 d < 0: lookahead after file
798 /* lookahead must be inserted */
799 dst_item = lookahead_item;
800 /* if this is not an insertion, it's really bad .. */
801 if (!*lookahead_mode)
803 yaz_log (YLOG_WARN, "isamb: Inconsistent register (1)");
804 assert (*lookahead_mode);
808 dst_item = file_item_buf;
811 if (!*lookahead_mode && d == 0)
813 /* it's a deletion and they match so there is nothing to be
814 inserted anyway .. But mark the thing bad (file item
815 was part of input.. The item will not be part of output */
818 else if (!half1 && dst > mid_cut)
820 /* we have reached the splitting point for the first time */
821 const char *dst_item_0 = dst_item;
822 half1 = dst; /* candidate for splitting */
824 /* encode the resulting item */
825 (*b->method->codec.encode)(c2, &dst, &dst_item);
827 cut_item_size = dst_item - dst_item_0;
828 assert(cut_item_size > 0);
829 memcpy (cut_item_buf, dst_item_0, cut_item_size);
832 no_items_1 = no_items;
837 /* encode the resulting item */
838 (*b->method->codec.encode)(c2, &dst, &dst_item);
842 /* now move "pointers" .. result has been encoded .. */
845 /* we must move the lookahead pointer */
848 /* no more room. Mark lookahead as "gone".. */
852 /* move it really.. */
853 lookahead_next = lookahead_item;
854 if (!(*stream->read_item)(stream->clientData,
858 /* end of stream reached: no "more" and no lookahead */
862 if (lookahead_item && max_item &&
863 (*b->method->compare_item)(max_item, lookahead_item) <= 0)
865 /* the lookahead goes beyond what we allow in this
866 leaf. Mark it as "gone" */
875 /* exact match .. move both pointers */
877 lookahead_next = lookahead_item;
878 if (!(*stream->read_item)(stream->clientData,
879 &lookahead_next, lookahead_mode))
885 break; /* end of file stream reached .. */
886 file_item = file_item_buf; /* move file pointer */
887 (*b->method->codec.decode)(c1, &file_item, &src);
891 /* file pointer must be moved */
894 file_item = file_item_buf;
895 (*b->method->codec.decode)(c1, &file_item, &src);
899 maxp = dst_buf + b->file[b->no_cat-1].head.block_max + quater;
900 /* this loop runs when we are "appending" to a leaf page. That is
901 either it's empty (new) or all file items have been read in
903 while (lookahead_item)
906 const char *src = lookahead_item;
909 /* compare lookahead with max item */
911 (*b->method->compare_item)(max_item, lookahead_item) <= 0)
913 /* stop if we have reached the value of max item */
916 if (!*lookahead_mode)
918 /* this is append. So a delete is bad */
919 yaz_log (YLOG_WARN, "isamb: Inconsistent register (2)");
922 else if (!half1 && dst > tail_cut)
924 const char *src_0 = src;
925 half1 = dst; /* candidate for splitting */
927 (*b->method->codec.encode)(c2, &dst, &src);
929 cut_item_size = src - src_0;
930 assert(cut_item_size > 0);
931 memcpy (cut_item_buf, src_0, cut_item_size);
933 no_items_1 = no_items;
937 (*b->method->codec.encode)(c2, &dst, &src);
947 dst_item = lookahead_item;
948 if (!(*stream->read_item)(stream->clientData, &dst_item,
955 new_size = dst - dst_buf;
956 if (p && p->cat != b->no_cat-1 &&
957 new_size > b->file[p->cat].head.block_max)
959 /* non-btree block will be removed */
962 /* delete it too!! */
963 p = 0; /* make a new one anyway */
966 { /* must create a new one */
968 for (i = 0; i < b->no_cat; i++)
969 if (new_size <= b->file[i].head.block_max)
975 if (new_size > b->file[p->cat].head.block_max)
978 const char *cut_item = cut_item_buf;
983 assert(cut_item_size > 0);
986 p->size = half1 - dst_buf;
987 memcpy (p->bytes, dst_buf, half1 - dst_buf);
988 p->no_items = no_items_1;
991 *sp2 = new_leaf (b, p->cat);
993 (*b->method->codec.reset)(c2);
995 first_dst = (*sp2)->bytes;
997 (*b->method->codec.encode)(c2, &first_dst, &cut_item);
999 memcpy (first_dst, half2, dst - half2);
1001 (*sp2)->size = (first_dst - (*sp2)->bytes) + (dst - half2);
1002 (*sp2)->no_items = no_items - no_items_1;
1005 memcpy (sub_item, cut_item_buf, cut_item_size);
1006 *sub_size = cut_item_size;
1010 memcpy (p->bytes, dst_buf, dst - dst_buf);
1012 p->no_items = no_items;
1014 (*b->method->codec.stop)(c1);
1015 (*b->method->codec.stop)(c2);
1020 int insert_sub (ISAMB b, struct ISAMB_block **p, void *new_item,
1023 struct ISAMB_block **sp,
1024 void *sub_item, int *sub_size,
1025 const void *max_item)
1027 if (!*p || (*p)->leaf)
1028 return insert_leaf (b, p, new_item, mode, stream, sp, sub_item,
1029 sub_size, max_item);
1031 return insert_int (b, *p, new_item, mode, stream, sp, sub_item,
1032 sub_size, max_item);
1035 int isamb_unlink (ISAMB b, ISAMC_P pos)
1037 struct ISAMB_block *p1;
1041 p1 = open_block(b, pos);
1047 const char *src = p1->bytes + p1->offset;
1049 decode_ptr(&src, &sub_p);
1050 isamb_unlink(b, sub_p);
1052 while (src != p1->bytes + p1->size)
1054 decode_ptr(&src, &item_len);
1056 decode_ptr(&src, &sub_p);
1057 isamb_unlink(b, sub_p);
1064 ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream)
1066 char item_buf[DST_ITEM_MAX];
1070 int must_delete = 0;
1077 item_ptr = item_buf;
1079 (*stream->read_item)(stream->clientData, &item_ptr, &i_mode);
1083 item_ptr = item_buf;
1084 more = (*stream->read_item)(stream->clientData, &item_ptr, &i_mode);
1087 struct ISAMB_block *p = 0, *sp = 0;
1088 char sub_item[DST_ITEM_MAX];
1092 p = open_block (b, pos);
1093 more = insert_sub (b, &p, item_buf, &i_mode, stream, &sp,
1094 sub_item, &sub_size, 0);
1096 { /* increase level of tree by one */
1097 struct ISAMB_block *p2 = new_int (b, p->cat);
1098 char *dst = p2->bytes + p2->size;
1100 encode_ptr (&dst, p->pos);
1101 assert (sub_size < 80 && sub_size > 1);
1102 encode_ptr (&dst, sub_size);
1103 memcpy (dst, sub_item, sub_size);
1105 encode_ptr (&dst, sp->pos);
1107 p2->size = dst - p2->bytes;
1108 p2->no_items = p->no_items + sp->no_items;
1109 pos = p2->pos; /* return new super page */
1110 close_block (b, sp);
1111 close_block (b, p2);
1115 pos = p->pos; /* return current one (again) */
1117 if (p->no_items == 0)
1125 isamb_unlink(b, pos);
1131 ISAMB_PP isamb_pp_open_x (ISAMB isamb, ISAMB_P pos, int *level, int scope)
1133 ISAMB_PP pp = xmalloc (sizeof(*pp));
1139 pp->block = xmalloc (ISAMB_MAX_LEVEL * sizeof(*pp->block));
1146 pp->skipped_numbers = 0;
1147 pp->returned_numbers = 0;
1149 for (i = 0;i<ISAMB_MAX_LEVEL;i++)
1150 pp->skipped_nodes[i] = pp->accessed_nodes[i]=0;
1153 struct ISAMB_block *p = open_block (isamb, pos);
1154 const char *src = p->bytes + p->offset;
1155 pp->block[pp->level] = p;
1157 pp->total_size += p->size;
1161 decode_ptr (&src, &pos);
1162 p->offset = src - p->bytes;
1164 pp->accessed_nodes[pp->level]++;
1166 pp->block[pp->level+1] = 0;
1167 pp->maxlevel = pp->level;
1173 ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos, int scope)
1175 return isamb_pp_open_x (isamb, pos, 0, scope);
1178 void isamb_pp_close_x (ISAMB_PP pp, int *size, int *blocks)
1183 yaz_log(YLOG_DEBUG, "isamb_pp_close lev=%d returned "ZINT_FORMAT" values, "
1184 "skipped "ZINT_FORMAT,
1185 pp->maxlevel, pp->skipped_numbers, pp->returned_numbers);
1186 for (i = pp->maxlevel;i>=0;i--)
1187 if (pp->skipped_nodes[i] || pp->accessed_nodes[i])
1188 yaz_log(YLOG_DEBUG, "isamb_pp_close level leaf-%d: "
1189 ZINT_FORMAT" read, "ZINT_FORMAT" skipped", i,
1190 pp->accessed_nodes[i], pp->skipped_nodes[i]);
1191 pp->isamb->skipped_numbers += pp->skipped_numbers;
1192 pp->isamb->returned_numbers += pp->returned_numbers;
1193 for (i = pp->maxlevel;i>=0;i--)
1195 pp->isamb->accessed_nodes[i] += pp->accessed_nodes[i];
1196 pp->isamb->skipped_nodes[i] += pp->skipped_nodes[i];
1199 *size = pp->total_size;
1201 *blocks = pp->no_blocks;
1202 for (i = 0; i <= pp->level; i++)
1203 close_block (pp->isamb, pp->block[i]);
1208 int isamb_block_info (ISAMB isamb, int cat)
1210 if (cat >= 0 && cat < isamb->no_cat)
1211 return isamb->file[cat].head.block_size;
1215 void isamb_pp_close (ISAMB_PP pp)
1217 isamb_pp_close_x (pp, 0, 0);
1220 /* simple recursive dumper .. */
1221 static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str),
1225 char prefix_str[1024];
1228 struct ISAMB_block *p = open_block (b, pos);
1229 sprintf(prefix_str, "%*s " ZINT_FORMAT " cat=%d size=%d max=%d items="
1230 ZINT_FORMAT, level*2, "",
1231 pos, p->cat, p->size, b->file[p->cat].head.block_max,
1234 sprintf(prefix_str, "%*s " ZINT_FORMAT, level*2, "", pos);
1237 while (p->offset < p->size)
1239 const char *src = p->bytes + p->offset;
1241 (*b->method->codec.decode)(p->decodeClientData, &dst, &src);
1242 (*b->method->log_item)(YLOG_DEBUG, buf, prefix_str);
1243 p->offset = src - (char*) p->bytes;
1245 assert(p->offset == p->size);
1249 const char *src = p->bytes + p->offset;
1253 decode_ptr (&src, &sub);
1254 p->offset = src - (char*) p->bytes;
1256 isamb_dump_r(b, sub, pr, level+1);
1258 while (p->offset < p->size)
1260 decode_ptr (&src, &item_len);
1261 (*b->method->log_item)(YLOG_DEBUG, src, prefix_str);
1263 decode_ptr (&src, &sub);
1265 p->offset = src - (char*) p->bytes;
1267 isamb_dump_r(b, sub, pr, level+1);
1274 void isamb_dump (ISAMB b, ISAMB_P pos, void (*pr)(const char *str))
1276 isamb_dump_r(b, pos, pr, 0);
1279 int isamb_pp_read (ISAMB_PP pp, void *buf)
1281 return isamb_pp_forward(pp, buf, 0);
1285 static int isamb_pp_on_right_node(ISAMB_PP pp, int level, const void *untilbuf)
1286 { /* looks one node higher to see if we should be on this node at all */
1287 /* useful in backing off quickly, and in avoiding tail descends */
1288 /* call with pp->level to begin with */
1289 struct ISAMB_block *p;
1297 yaz_log(YLOG_DEBUG, "isamb_pp_on_right returning true for root");
1299 return 1; /* we can never skip the root node */
1302 p = pp->block[level];
1303 assert(p->offset <= p->size);
1304 if (p->offset < p->size)
1306 assert(p->offset>0);
1307 src = p->bytes + p->offset;
1308 decode_ptr(&src, &item_len);
1310 (*pp->isamb->method->codec.log_item)(YLOG_DEBUG, untilbuf, "on_leaf: until");
1311 (*pp->isamb->method->codec.log_item)(YLOG_DEBUG, src, "on_leaf: value");
1313 cmp=(*pp->isamb->method->compare_item)(untilbuf, src);
1317 yaz_log(YLOG_DEBUG, "isamb_pp_on_right returning true "
1318 "cmp=%d lev=%d ofs=%d", cmp, level, p->offset);
1324 yaz_log(YLOG_DEBUG, "isamb_pp_on_right returning false "
1325 "cmp=%d lev=%d ofs=%d", cmp, level, p->offset);
1332 yaz_log(YLOG_DEBUG, "isamb_pp_on_right at tail, looking higher "
1335 return isamb_pp_on_right_node(pp, level, untilbuf);
1337 } /* isamb_pp_on_right_node */
1339 static int isamb_pp_read_on_leaf(ISAMB_PP pp, void *buf)
1340 { /* reads the next item on the current leaf, returns 0 if end of leaf*/
1341 struct ISAMB_block *p = pp->block[pp->level];
1346 if (p->offset == p->size)
1349 yaz_log(YLOG_DEBUG, "isamb_pp_read_on_leaf returning 0 on node %d", p->pos);
1351 return 0; /* at end of leaf */
1353 src = p->bytes + p->offset;
1355 (*pp->isamb->method->codec.decode)(p->decodeClientData, &dst, &src);
1356 p->offset = src - (char*) p->bytes;
1358 (*pp->isamb->method->codec.log_item)(YLOG_DEBUG, buf, "read_on_leaf returning 1");
1360 pp->returned_numbers++;
1362 } /* read_on_leaf */
1364 static int isamb_pp_forward_on_leaf(ISAMB_PP pp, void *buf, const void *untilbuf)
1365 { /* forwards on the current leaf, returns 0 if not found */
1370 if (!isamb_pp_read_on_leaf(pp, buf))
1372 /* FIXME - this is an extra function call, inline the read? */
1373 cmp=(*pp->isamb->method->compare_item)(untilbuf, buf);
1375 { /* cmp<2 found a good one */
1378 yaz_log(YLOG_DEBUG, "isam_pp_fwd_on_leaf skipped %d items", skips);
1380 pp->returned_numbers++;
1384 if (!isamb_pp_on_right_node(pp, pp->level, untilbuf))
1385 return 0; /* never mind the rest of this leaf */
1386 pp->skipped_numbers++;
1389 } /* forward_on_leaf */
1391 static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos)
1392 { /* climbs higher in the tree, until finds a level with data left */
1393 /* returns the node to (consider to) descend to in *pos) */
1394 struct ISAMB_block *p = pp->block[pp->level];
1398 yaz_log(YLOG_DEBUG, "isamb_pp_climb_level starting "
1399 "at level %d node %d ofs=%d sz=%d",
1400 pp->level, p->pos, p->offset, p->size);
1402 assert(pp->level >= 0);
1403 assert(p->offset <= p->size);
1407 yaz_log(YLOG_DEBUG, "isamb_pp_climb_level returning 0 at root");
1411 assert(pp->level>0);
1412 close_block(pp->isamb, pp->block[pp->level]);
1413 pp->block[pp->level]=0;
1415 p = pp->block[pp->level];
1417 yaz_log(YLOG_DEBUG, "isamb_pp_climb_level climbed to level %d node %d ofs=%d",
1418 pp->level, p->pos, p->offset);
1421 assert(p->offset <= p->size);
1422 if (p->offset == p->size)
1424 /* we came from the last pointer, climb on */
1425 if (!isamb_pp_climb_level(pp, pos))
1427 p = pp->block[pp->level];
1431 /* skip the child we just came from */
1433 yaz_log(YLOG_DEBUG, "isam_pp_climb_level: skipping lev=%d ofs=%d sz=%d",
1434 pp->level, p->offset, p->size);
1436 assert (p->offset < p->size);
1437 src = p->bytes + p->offset;
1438 decode_ptr(&src, &item_len);
1440 decode_ptr(&src, pos);
1441 p->offset = src - (char *)p->bytes;
1448 static zint isamb_pp_forward_unode(ISAMB_PP pp, zint pos, const void *untilbuf)
1449 { /* scans a upper node until it finds a child <= untilbuf */
1450 /* pp points to the key value, as always. pos is the child read from */
1452 /* if all values are too small, returns the last child in the node */
1453 /* FIXME - this can be detected, and avoided by looking at the */
1454 /* parent node, but that gets messy. Presumably the cost is */
1455 /* pretty low anyway */
1456 struct ISAMB_block *p = pp->block[pp->level];
1457 const char *src = p->bytes + p->offset;
1463 yaz_log(YLOG_DEBUG, "isamb_pp_forward_unode starting "
1464 "at level %d node %d ofs=%di sz=%d",
1465 pp->level, p->pos, p->offset, p->size);
1468 assert(p->offset <= p->size);
1469 if (p->offset == p->size)
1472 yaz_log(YLOG_DEBUG, "isamb_pp_forward_unode returning at end "
1473 "at level %d node %d ofs=%di sz=%d",
1474 pp->level, p->pos, p->offset, p->size);
1476 return pos; /* already at the end of it */
1478 while(p->offset < p->size)
1480 decode_ptr(&src, &item_len);
1481 cmp=(*pp->isamb->method->compare_item)(untilbuf, src);
1483 decode_ptr(&src, &nxtpos);
1484 if (cmp<pp->scope) /* cmp<2 */
1487 yaz_log(YLOG_DEBUG, "isamb_pp_forward_unode returning a hit "
1488 "at level %d node %d ofs=%d sz=%d",
1489 pp->level, p->pos, p->offset, p->size);
1494 p->offset = src-(char*)p->bytes;
1495 (pp->skipped_nodes[pp->maxlevel - pp->level -1])++;
1501 yaz_log(YLOG_DEBUG, "isamb_pp_forward_unode returning at tail "
1502 "at level %d node %d ofs=%d sz=%d skips=%d",
1503 pp->level, p->pos, p->offset, p->size, skips);
1505 return pos; /* that's the last one in the line */
1507 } /* forward_unode */
1509 static void isamb_pp_descend_to_leaf(ISAMB_PP pp, ISAMB_P pos, const void *untilbuf)
1510 { /* climbs down the tree, from pos, to the leftmost leaf */
1511 struct ISAMB_block *p = pp->block[pp->level];
1515 yaz_log(YLOG_DEBUG, "isamb_pp_descend_to_leaf "
1516 "starting at lev %d node %d ofs=%d lf=%d u=%p",
1517 pp->level, p->pos, p->offset, p->leaf, untilbuf);
1520 pos = isamb_pp_forward_unode(pp, pos, untilbuf);
1523 p = open_block(pp->isamb, pos);
1524 pp->block[pp->level]=p;
1525 ++(pp->accessed_nodes[pp->maxlevel-pp->level]);
1528 yaz_log(YLOG_DEBUG, "isamb_pp_descend_to_leaf "
1529 "got lev %d node %d lf=%d",
1530 pp->level, p->pos, p->leaf);
1534 assert (p->offset==0);
1535 src = p->bytes + p->offset;
1536 decode_ptr(&src, &pos);
1537 p->offset = src-(char*)p->bytes;
1538 isamb_pp_descend_to_leaf(pp, pos, untilbuf);
1540 yaz_log(YLOG_DEBUG, "isamb_pp_descend_to_leaf "
1541 "returning at lev %d node %d ofs=%d lf=%d",
1542 pp->level, p->pos, p->offset, p->leaf);
1544 } /* descend_to_leaf */
1546 static int isamb_pp_find_next_leaf(ISAMB_PP pp)
1547 { /* finds the next leaf by climbing up and down */
1549 if (!isamb_pp_climb_level(pp, &pos))
1551 isamb_pp_descend_to_leaf(pp, pos, 0);
1555 static int isamb_pp_climb_desc(ISAMB_PP pp, const void *untilbuf)
1556 { /* climbs up and descends to a leaf where values >= *untilbuf are found */
1559 struct ISAMB_block *p = pp->block[pp->level];
1560 yaz_log(YLOG_DEBUG, "isamb_pp_climb_desc starting "
1561 "at level %d node %d ofs=%d sz=%d",
1562 pp->level, p->pos, p->offset, p->size);
1564 if (!isamb_pp_climb_level(pp, &pos))
1566 /* see if it would pay to climb one higher */
1567 if (!isamb_pp_on_right_node(pp, pp->level, untilbuf))
1568 if (!isamb_pp_climb_level(pp, &pos))
1570 isamb_pp_descend_to_leaf(pp, pos, untilbuf);
1572 p = pp->block[pp->level];
1573 yaz_log(YLOG_DEBUG, "isamb_pp_climb_desc done "
1574 "at level %d node %d ofs=%d sz=%d",
1575 pp->level, p->pos, p->offset, p->size);
1580 int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf)
1583 struct ISAMB_block *p = pp->block[pp->level];
1585 yaz_log(YLOG_DEBUG, "isamb_pp_forward starting "
1586 "at level %d node %d ofs=%d sz=%d u=%p sc=%d",
1587 pp->level, p->pos, p->offset, p->size, untilbuf, scope);
1591 if (isamb_pp_forward_on_leaf(pp, buf, untilbuf))
1594 yaz_log(YLOG_DEBUG, "isamb_pp_forward (f) returning (A) "
1595 "at level %d node %d ofs=%d sz=%d",
1596 pp->level, p->pos, p->offset, p->size);
1600 if (! isamb_pp_climb_desc(pp, untilbuf))
1603 yaz_log(YLOG_DEBUG, "isamb_pp_forward (f) returning notfound (B) "
1604 "at level %d node %d ofs=%d sz=%d",
1605 pp->level, p->pos, p->offset, p->size);
1607 return 0; /* could not find a leaf */
1610 if (isamb_pp_forward_on_leaf(pp, buf, untilbuf))
1613 yaz_log(YLOG_DEBUG, "isamb_pp_forward (f) returning (C) "
1614 "at level %d node %d ofs=%d sz=%d",
1615 pp->level, p->pos, p->offset, p->size);
1619 } while (isamb_pp_find_next_leaf(pp));
1620 return 0; /* could not find at all */
1622 else { /* no untilbuf, a straight read */
1623 /* FIXME - this should be moved
1624 * directly into the pp_read */
1625 /* keeping here now, to keep same
1626 * interface as the old fwd */
1627 if (isamb_pp_read_on_leaf(pp, buf))
1630 yaz_log(YLOG_DEBUG, "isamb_pp_forward (read) returning (D) "
1631 "at level %d node %d ofs=%d sz=%d",
1632 pp->level, p->pos, p->offset, p->size);
1636 if (isamb_pp_find_next_leaf(pp))
1639 yaz_log(YLOG_DEBUG, "isamb_pp_forward (read) returning (E) "
1640 "at level %d node %d ofs=%d sz=%d",
1641 pp->level, p->pos, p->offset, p->size);
1643 return isamb_pp_read_on_leaf(pp, buf);
1648 } /* isam_pp_forward (new version) */
1650 void isamb_pp_pos(ISAMB_PP pp, double *current, double *total)
1651 { /* return an estimate of the current position and of the total number of */
1652 /* occureences in the isam tree, based on the current leaf */
1653 struct ISAMB_block *p = pp->block[pp->level];
1658 *total = pp->block[0]->no_items;
1659 *current = (double) pp->returned_numbers;
1661 yaz_log(YLOG_LOG, "isamb_pp_pos returning: cur= %0.1f tot=%0.1f rn="
1662 ZINT_FORMAT, *current, *total, pp->returned_numbers);