1 /* $Id: isamb.c,v 1.60 2004-11-29 21:53:00 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include <yaz/xmalloc.h>
33 #define ISAMB_MAJOR_VERSION 2
34 #define ISAMB_MINOR_VERSION 0
46 /* maximum size of encoded buffer */
47 #define DST_ITEM_MAX 256
49 #define ISAMB_MAX_LEVEL 10
50 /* approx 2*max page + max size of item */
51 #define DST_BUF_SIZE 16840
53 #define ISAMB_CACHE_ENTRY_SIZE 4096
55 /* CAT_MAX: _must_ be power of 2 */
57 #define CAT_MASK (CAT_MAX-1)
58 /* CAT_NO: <= CAT_MAX */
61 /* ISAMB_PTR_CODEC=1 var, =0 fixed */
62 #define ISAMB_PTR_CODEC 1
64 struct ISAMB_cache_entry {
69 struct ISAMB_cache_entry *next;
75 struct ISAMB_head head;
76 struct ISAMB_cache_entry *cache_entries;
83 struct ISAMB_file *file;
85 int cache; /* 0=no cache, 1=use cache, -1=dummy isam (for testing only) */
86 int log_io; /* log level for bf_read/bf_write calls */
87 int log_freelist; /* log level for freelist handling */
88 zint skipped_numbers; /* on a leaf node */
89 zint returned_numbers;
90 zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */
91 zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */
102 zint no_items; /* number of nodes in this + children */
106 void *decodeClientData;
114 int maxlevel; /* total depth */
117 zint skipped_numbers; /* on a leaf node */
118 zint returned_numbers;
119 zint skipped_nodes[ISAMB_MAX_LEVEL]; /* [0]=skipped leaves, 1=higher etc */
120 zint accessed_nodes[ISAMB_MAX_LEVEL]; /* nodes we did not skip */
121 struct ISAMB_block **block;
122 int scope; /* on what level we forward */
127 static void encode_ptr (char **dst, zint pos)
129 unsigned char *bp = (unsigned char*) *dst;
133 *bp++ = 128 | (pos & 127);
140 static void encode_ptr (char **dst, zint pos)
142 memcpy(*dst, &pos, sizeof(pos));
143 (*dst) += sizeof(pos);
148 static void decode_ptr (const char **src1, zint *pos)
150 const unsigned char **src = (const unsigned char **) src1;
155 while (((c = *(*src)++) & 128))
157 d += ((zint) (c & 127) << r);
160 d += ((zint) c << r);
164 static void decode_ptr (const char **src, zint *pos)
166 memcpy (pos, *src, sizeof(*pos));
167 (*src) += sizeof(*pos);
171 ISAMB isamb_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method,
174 ISAMB isamb = xmalloc (sizeof(*isamb));
178 isamb->method = (ISAMC_M *) xmalloc (sizeof(*method));
179 memcpy (isamb->method, method, sizeof(*method));
180 isamb->no_cat = CAT_NO;
182 isamb->log_freelist = 0;
183 isamb->cache = cache;
184 isamb->skipped_numbers=0;
185 isamb->returned_numbers=0;
186 for (i=0;i<ISAMB_MAX_LEVEL;i++)
187 isamb->skipped_nodes[i]= isamb->accessed_nodes[i]=0;
190 isamb->file = xmalloc (sizeof(*isamb->file) * isamb->no_cat);
191 for (i = 0; i < isamb->no_cat; i++)
193 char fname[DST_BUF_SIZE];
194 char hbuf[DST_BUF_SIZE];
195 isamb->file[i].cache_entries = 0;
196 isamb->file[i].head_dirty = 0;
197 sprintf (fname, "%s%c", name, i+'A');
199 isamb->file[i].bf = bf_open (bfs, fname, ISAMB_CACHE_ENTRY_SIZE,
202 isamb->file[i].bf = bf_open (bfs, fname, b_size, writeflag);
204 /* fill-in default values (for empty isamb) */
205 isamb->file[i].head.first_block = ISAMB_CACHE_ENTRY_SIZE/b_size+1;
206 isamb->file[i].head.last_block = isamb->file[i].head.first_block;
207 isamb->file[i].head.block_size = b_size;
208 if (i == isamb->no_cat-1 || b_size > 128)
209 isamb->file[i].head.block_offset = 8;
211 isamb->file[i].head.block_offset = 4;
212 isamb->file[i].head.block_max =
213 b_size - isamb->file[i].head.block_offset;
214 isamb->file[i].head.free_list = 0;
215 if (bf_read (isamb->file[i].bf, 0, 0, 0, hbuf))
217 /* got header assume "isamb"major minor len can fit in 16 bytes */
219 int major, minor, len, pos = 0;
222 if (memcmp(hbuf, "isamb", 5))
224 yaz_log(YLOG_WARN, "bad isamb header for file %s", fname);
227 if (sscanf(hbuf+5, "%d %d %d", &major, &minor, &len) != 3)
229 yaz_log(YLOG_WARN, "bad isamb header for file %s", fname);
232 if (major != ISAMB_MAJOR_VERSION)
234 yaz_log(YLOG_WARN, "bad major version for file %s %d, must be %d",
235 fname, major, ISAMB_MAJOR_VERSION);
238 for (left = len - b_size; left > 0; left = left - b_size)
241 if (!bf_read (isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size))
243 yaz_log(YLOG_WARN, "truncated isamb header for "
244 "file=%s len=%d pos=%d",
250 decode_ptr(&src, &isamb->file[i].head.first_block);
251 decode_ptr(&src, &isamb->file[i].head.last_block);
252 decode_ptr(&src, &zint_tmp);
253 isamb->file[i].head.block_size = zint_tmp;
254 decode_ptr(&src, &zint_tmp);
255 isamb->file[i].head.block_max = zint_tmp;
256 decode_ptr(&src, &isamb->file[i].head.free_list);
258 assert (isamb->file[i].head.block_size >= isamb->file[i].head.block_offset);
259 isamb->file[i].head_dirty = 0;
260 assert(isamb->file[i].head.block_size == b_size);
264 yaz_log(YLOG_WARN, "isamb debug enabled. Things will be slower than usual");
269 static void flush_blocks (ISAMB b, int cat)
271 while (b->file[cat].cache_entries)
273 struct ISAMB_cache_entry *ce_this = b->file[cat].cache_entries;
274 b->file[cat].cache_entries = ce_this->next;
278 yaz_log (b->log_io, "bf_write: flush_blocks");
279 bf_write (b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf);
281 xfree (ce_this->buf);
286 static int get_block (ISAMB b, ISAMC_P pos, char *userbuf, int wr)
288 int cat = (int) (pos&CAT_MASK);
289 int off = (int) (((pos/CAT_MAX) &
290 (ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size - 1))
291 * b->file[cat].head.block_size);
292 zint norm = pos / (CAT_MASK*ISAMB_CACHE_ENTRY_SIZE / b->file[cat].head.block_size);
294 struct ISAMB_cache_entry **ce, *ce_this = 0, **ce_last = 0;
299 assert (ISAMB_CACHE_ENTRY_SIZE >= b->file[cat].head.block_size);
300 for (ce = &b->file[cat].cache_entries; *ce; ce = &(*ce)->next, no++)
303 if ((*ce)->pos == norm)
306 *ce = (*ce)->next; /* remove from list */
308 ce_this->next = b->file[cat].cache_entries; /* move to front */
309 b->file[cat].cache_entries = ce_this;
313 memcpy (ce_this->buf + off, userbuf,
314 b->file[cat].head.block_size);
318 memcpy (userbuf, ce_this->buf + off,
319 b->file[cat].head.block_size);
326 assert (ce_last && *ce_last);
328 *ce_last = 0; /* remove the last entry from list */
331 yaz_log (b->log_io, "bf_write: get_block");
332 bf_write (b->file[cat].bf, ce_this->pos, 0, 0, ce_this->buf);
334 xfree (ce_this->buf);
337 ce_this = xmalloc (sizeof(*ce_this));
338 ce_this->next = b->file[cat].cache_entries;
339 b->file[cat].cache_entries = ce_this;
340 ce_this->buf = xmalloc (ISAMB_CACHE_ENTRY_SIZE);
342 yaz_log (b->log_io, "bf_read: get_block");
343 if (!bf_read (b->file[cat].bf, norm, 0, 0, ce_this->buf))
344 memset (ce_this->buf, 0, ISAMB_CACHE_ENTRY_SIZE);
347 memcpy (ce_this->buf + off, userbuf, b->file[cat].head.block_size);
353 memcpy (userbuf, ce_this->buf + off, b->file[cat].head.block_size);
359 void isamb_close (ISAMB isamb)
362 for (i=0;isamb->accessed_nodes[i];i++)
363 yaz_log(YLOG_DEBUG,"isamb_close level leaf-%d: "ZINT_FORMAT" read, "
364 ZINT_FORMAT" skipped",
365 i, isamb->accessed_nodes[i], isamb->skipped_nodes[i]);
366 yaz_log(YLOG_DEBUG,"isamb_close returned "ZINT_FORMAT" values, "
367 "skipped "ZINT_FORMAT,
368 isamb->skipped_numbers, isamb->returned_numbers);
369 for (i = 0; i<isamb->no_cat; i++)
371 flush_blocks (isamb, i);
372 if (isamb->file[i].head_dirty)
374 char hbuf[DST_BUF_SIZE];
375 int major = ISAMB_MAJOR_VERSION;
376 int minor = ISAMB_MINOR_VERSION;
378 char *dst = hbuf + 16;
380 int b_size = isamb->file[i].head.block_size;
382 encode_ptr(&dst, isamb->file[i].head.first_block);
383 encode_ptr(&dst, isamb->file[i].head.last_block);
384 encode_ptr(&dst, isamb->file[i].head.block_size);
385 encode_ptr(&dst, isamb->file[i].head.block_max);
386 encode_ptr(&dst, isamb->file[i].head.free_list);
387 memset(dst, '\0', b_size); /* ensure no random bytes are written */
391 /* print exactly 16 bytes (including trailing 0) */
392 sprintf(hbuf, "isamb%02d %02d %02d\r\n", major, minor, len);
394 bf_write (isamb->file[i].bf, pos, 0, 0, hbuf);
396 for (left = len - b_size; left > 0; left = left - b_size)
399 bf_write (isamb->file[i].bf, pos, 0, 0, hbuf + pos*b_size);
402 bf_close (isamb->file[i].bf);
405 xfree (isamb->method);
409 /* open_block: read one block at pos.
410 Decode leading sys bytes .. consisting of
412 0: leader byte, != 0 leaf, == 0, non-leaf
413 1-2: used size of block
414 3-7*: number of items and all children
416 * Reserve 5 bytes for large block sizes. 1 for small ones .. Number
417 of items. We can thus have at most 2^40 nodes.
419 static struct ISAMB_block *open_block (ISAMB b, ISAMC_P pos)
421 int cat = (int) (pos&CAT_MASK);
423 int offset = b->file[cat].head.block_offset;
424 struct ISAMB_block *p;
427 p = xmalloc (sizeof(*p));
429 p->cat = (int) (pos & CAT_MASK);
430 p->buf = xmalloc (b->file[cat].head.block_size);
433 if (!get_block (b, pos, p->buf, 0))
435 yaz_log (b->log_io, "bf_read: open_block");
436 if (!bf_read (b->file[cat].bf, pos/CAT_MAX, 0, 0, p->buf))
438 yaz_log (YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld",
439 (long) pos, (long) pos/CAT_MAX);
443 p->bytes = p->buf + offset;
445 p->size = (p->buf[1] + 256 * p->buf[2]) - offset;
448 yaz_log (YLOG_FATAL, "Bad block size %d in pos=" ZINT_FORMAT "\n",
451 assert (p->size >= 0);
453 decode_ptr(&src, &p->no_items);
458 p->decodeClientData = (*b->method->codec.start)();
462 struct ISAMB_block *new_block (ISAMB b, int leaf, int cat)
464 struct ISAMB_block *p;
466 p = xmalloc (sizeof(*p));
467 p->buf = xmalloc (b->file[cat].head.block_size);
469 if (!b->file[cat].head.free_list)
472 block_no = b->file[cat].head.last_block++;
473 p->pos = block_no * CAT_MAX + cat;
477 p->pos = b->file[cat].head.free_list;
478 assert((p->pos & CAT_MASK) == cat);
479 if (!get_block (b, p->pos, p->buf, 0))
481 yaz_log (b->log_io, "bf_read: new_block");
482 if (!bf_read (b->file[cat].bf, p->pos/CAT_MAX, 0, 0, p->buf))
484 yaz_log (YLOG_FATAL, "isamb: read fail for pos=%ld block=%ld",
485 (long) p->pos/CAT_MAX, (long) p->pos/CAT_MAX);
489 yaz_log (b->log_freelist, "got block " ZINT_FORMAT " from freelist %d:" ZINT_FORMAT, p->pos,
490 cat, p->pos/CAT_MAX);
491 memcpy (&b->file[cat].head.free_list, p->buf, sizeof(int));
494 b->file[cat].head_dirty = 1;
495 memset (p->buf, 0, b->file[cat].head.block_size);
496 p->bytes = p->buf + b->file[cat].head.block_offset;
503 p->decodeClientData = (*b->method->codec.start)();
507 struct ISAMB_block *new_leaf (ISAMB b, int cat)
509 return new_block (b, 1, cat);
513 struct ISAMB_block *new_int (ISAMB b, int cat)
515 return new_block (b, 0, cat);
518 static void check_block (ISAMB b, struct ISAMB_block *p)
520 assert(b); /* mostly to make the compiler shut up about unused b */
528 char *startp = p->bytes;
529 const char *src = startp;
530 char *endp = p->bytes + p->size;
533 decode_ptr (&src, &pos);
534 assert ((pos&CAT_MASK) == p->cat);
538 decode_ptr (&src, &item_len);
539 assert (item_len > 0 && item_len < 80);
541 decode_ptr (&src, &pos);
542 if ((pos&CAT_MASK) != p->cat)
544 assert ((pos&CAT_MASK) == p->cat);
550 void close_block (ISAMB b, struct ISAMB_block *p)
556 yaz_log (b->log_freelist, "release block " ZINT_FORMAT " from freelist %d:" ZINT_FORMAT,
557 p->pos, p->cat, p->pos/CAT_MAX);
558 memcpy (p->buf, &b->file[p->cat].head.free_list, sizeof(int));
559 b->file[p->cat].head.free_list = p->pos;
560 if (!get_block (b, p->pos, p->buf, 1))
562 yaz_log (b->log_io, "bf_write: close_block (deleted)");
563 bf_write (b->file[p->cat].bf, p->pos/CAT_MAX, 0, 0, p->buf);
568 int offset = b->file[p->cat].head.block_offset;
569 int size = p->size + offset;
570 char *dst = p->buf + 3;
571 assert (p->size >= 0);
573 /* memset becuase encode_ptr usually does not write all bytes */
574 memset(p->buf, 0, b->file[p->cat].head.block_offset);
576 p->buf[1] = size & 255;
577 p->buf[2] = size >> 8;
578 encode_ptr(&dst, p->no_items);
580 if (!get_block (b, p->pos, p->buf, 1))
582 yaz_log (b->log_io, "bf_write: close_block");
583 bf_write (b->file[p->cat].bf, p->pos/CAT_MAX, 0, 0, p->buf);
586 (*b->method->codec.stop)(p->decodeClientData);
591 int insert_sub (ISAMB b, struct ISAMB_block **p,
592 void *new_item, int *mode,
594 struct ISAMB_block **sp,
595 void *sub_item, int *sub_size,
596 const void *max_item);
598 int insert_int (ISAMB b, struct ISAMB_block *p, void *lookahead_item,
600 ISAMC_I *stream, struct ISAMB_block **sp,
601 void *split_item, int *split_size, const void *last_max_item)
603 char *startp = p->bytes;
604 const char *src = startp;
605 char *endp = p->bytes + p->size;
607 struct ISAMB_block *sub_p1 = 0, *sub_p2 = 0;
608 char sub_item[DST_ITEM_MAX];
615 assert(p->size >= 0);
616 decode_ptr (&src, &pos);
621 const char *src0 = src;
622 decode_ptr (&src, &item_len);
623 d = (*b->method->compare_item)(src, lookahead_item);
626 sub_p1 = open_block (b, pos);
628 diff_terms -= sub_p1->no_items;
629 more = insert_sub (b, &sub_p1, lookahead_item, mode,
631 sub_item, &sub_size, src);
632 diff_terms += sub_p1->no_items;
637 decode_ptr (&src, &pos);
641 sub_p1 = open_block (b, pos);
643 diff_terms -= sub_p1->no_items;
644 more = insert_sub (b, &sub_p1, lookahead_item, mode, stream, &sub_p2,
645 sub_item, &sub_size, last_max_item);
646 diff_terms += sub_p1->no_items;
649 diff_terms += sub_p2->no_items;
653 p->no_items += diff_terms;
657 /* there was a split - must insert pointer in this one */
658 char dst_buf[DST_BUF_SIZE];
661 assert (sub_size < 80 && sub_size > 1);
663 memcpy (dst, startp, src - startp);
667 encode_ptr (&dst, sub_size); /* sub length and item */
668 memcpy (dst, sub_item, sub_size);
671 encode_ptr (&dst, sub_p2->pos); /* pos */
673 if (endp - src) /* remaining data */
675 memcpy (dst, src, endp - src);
678 p->size = dst - dst_buf;
679 assert (p->size >= 0);
682 if (p->size <= b->file[p->cat].head.block_max)
684 memcpy (startp, dst_buf, dst - dst_buf);
688 struct ISAMB_block *sub_p3;
690 zint no_items_first_half = 0;
696 half = src + b->file[p->cat].head.block_size/2;
697 decode_ptr (&src, &pos);
699 /* read sub block so we can get no_items for it */
700 sub_p3 = open_block(b, pos);
701 no_items_first_half += sub_p3->no_items;
702 close_block(b, sub_p3);
706 decode_ptr (&src, &split_size_tmp);
707 *split_size = (int) split_size_tmp;
710 decode_ptr (&src, &pos);
712 /* read sub block so we can get no_items for it */
713 sub_p3 = open_block(b, pos);
714 no_items_first_half += sub_p3->no_items;
715 close_block(b, sub_p3);
717 /* p is first half */
718 p_new_size = src - dst_buf;
719 memcpy (p->bytes, dst_buf, p_new_size);
721 decode_ptr (&src, &split_size_tmp);
722 *split_size = (int) split_size_tmp;
723 memcpy (split_item, src, *split_size);
726 /* *sp is second half */
727 *sp = new_int (b, p->cat);
728 (*sp)->size = endp - src;
729 memcpy ((*sp)->bytes, src, (*sp)->size);
731 p->size = p_new_size;
733 /* adjust no_items in first&second half */
734 (*sp)->no_items = p->no_items - no_items_first_half;
735 p->no_items = no_items_first_half;
738 close_block (b, sub_p2);
740 close_block (b, sub_p1);
744 int insert_leaf (ISAMB b, struct ISAMB_block **sp1, void *lookahead_item,
745 int *lookahead_mode, ISAMC_I *stream,
746 struct ISAMB_block **sp2,
747 void *sub_item, int *sub_size,
748 const void *max_item)
750 struct ISAMB_block *p = *sp1;
753 char dst_buf[DST_BUF_SIZE], *dst = dst_buf;
755 void *c1 = (*b->method->codec.start)();
756 void *c2 = (*b->method->codec.start)();
758 int quater = b->file[b->no_cat-1].head.block_max / 4;
759 char *mid_cut = dst_buf + quater * 2;
760 char *tail_cut = dst_buf + quater * 3;
761 char *maxp = dst_buf + b->file[b->no_cat-1].head.block_max;
764 char cut_item_buf[DST_ITEM_MAX];
765 int cut_item_size = 0;
766 int no_items = 0; /* number of items (total) */
767 int no_items_1 = 0; /* number of items (first half) */
771 char file_item_buf[DST_ITEM_MAX];
772 char *file_item = file_item_buf;
775 endp = p->bytes + p->size;
776 (*b->method->codec.decode)(c1, &file_item, &src);
779 const char *dst_item = 0; /* resulting item to be inserted */
780 char *lookahead_next;
784 d = (*b->method->compare_item)(file_item_buf, lookahead_item);
786 /* d now holds comparison between existing file item and
789 d > 0: lookahead before file
790 d < 0: lookahead after file
794 /* lookahead must be inserted */
795 dst_item = lookahead_item;
796 /* if this is not an insertion, it's really bad .. */
797 if (!*lookahead_mode)
799 yaz_log (YLOG_WARN, "isamb: Inconsistent register (1)");
800 assert (*lookahead_mode);
804 dst_item = file_item_buf;
807 if (!*lookahead_mode && d == 0)
809 /* it's a deletion and they match so there is nothing to be
810 inserted anyway .. But mark the thing bad (file item
811 was part of input.. The item will not be part of output */
814 else if (!half1 && dst > mid_cut)
816 /* we have reached the splitting point for the first time */
817 const char *dst_item_0 = dst_item;
818 half1 = dst; /* candidate for splitting */
820 /* encode the resulting item */
821 (*b->method->codec.encode)(c2, &dst, &dst_item);
823 cut_item_size = dst_item - dst_item_0;
824 assert(cut_item_size > 0);
825 memcpy (cut_item_buf, dst_item_0, cut_item_size);
828 no_items_1 = no_items;
833 /* encode the resulting item */
834 (*b->method->codec.encode)(c2, &dst, &dst_item);
838 /* now move "pointers" .. result has been encoded .. */
841 /* we must move the lookahead pointer */
844 /* no more room. Mark lookahead as "gone".. */
848 /* move it really.. */
849 lookahead_next = lookahead_item;
850 if (!(*stream->read_item)(stream->clientData,
854 /* end of stream reached: no "more" and no lookahead */
858 if (lookahead_item && max_item &&
859 (*b->method->compare_item)(max_item, lookahead_item) <= 0)
861 /* the lookahead goes beyond what we allow in this
862 leaf. Mark it as "gone" */
871 /* exact match .. move both pointers */
873 lookahead_next = lookahead_item;
874 if (!(*stream->read_item)(stream->clientData,
875 &lookahead_next, lookahead_mode))
881 break; /* end of file stream reached .. */
882 file_item = file_item_buf; /* move file pointer */
883 (*b->method->codec.decode)(c1, &file_item, &src);
887 /* file pointer must be moved */
890 file_item = file_item_buf;
891 (*b->method->codec.decode)(c1, &file_item, &src);
895 maxp = dst_buf + b->file[b->no_cat-1].head.block_max + quater;
896 /* this loop runs when we are "appending" to a leaf page. That is
897 either it's empty (new) or all file items have been read in
899 while (lookahead_item)
902 const char *src = lookahead_item;
905 /* compare lookahead with max item */
907 (*b->method->compare_item)(max_item, lookahead_item) <= 0)
909 /* stop if we have reached the value of max item */
912 if (!*lookahead_mode)
914 /* this is append. So a delete is bad */
915 yaz_log (YLOG_WARN, "isamb: Inconsistent register (2)");
918 else if (!half1 && dst > tail_cut)
920 const char *src_0 = src;
921 half1 = dst; /* candidate for splitting */
923 (*b->method->codec.encode)(c2, &dst, &src);
925 cut_item_size = src - src_0;
926 assert(cut_item_size > 0);
927 memcpy (cut_item_buf, src_0, cut_item_size);
929 no_items_1 = no_items;
933 (*b->method->codec.encode)(c2, &dst, &src);
943 dst_item = lookahead_item;
944 if (!(*stream->read_item)(stream->clientData, &dst_item,
951 new_size = dst - dst_buf;
952 if (p && p->cat != b->no_cat-1 &&
953 new_size > b->file[p->cat].head.block_max)
955 /* non-btree block will be removed */
958 /* delete it too!! */
959 p = 0; /* make a new one anyway */
962 { /* must create a new one */
964 for (i = 0; i < b->no_cat; i++)
965 if (new_size <= b->file[i].head.block_max)
971 if (new_size > b->file[p->cat].head.block_max)
974 const char *cut_item = cut_item_buf;
979 assert(cut_item_size > 0);
982 p->size = half1 - dst_buf;
983 memcpy (p->bytes, dst_buf, half1 - dst_buf);
984 p->no_items = no_items_1;
987 *sp2 = new_leaf (b, p->cat);
989 (*b->method->codec.reset)(c2);
991 first_dst = (*sp2)->bytes;
993 (*b->method->codec.encode)(c2, &first_dst, &cut_item);
995 memcpy (first_dst, half2, dst - half2);
997 (*sp2)->size = (first_dst - (*sp2)->bytes) + (dst - half2);
998 (*sp2)->no_items = no_items - no_items_1;
1001 memcpy (sub_item, cut_item_buf, cut_item_size);
1002 *sub_size = cut_item_size;
1006 memcpy (p->bytes, dst_buf, dst - dst_buf);
1008 p->no_items = no_items;
1010 (*b->method->codec.stop)(c1);
1011 (*b->method->codec.stop)(c2);
1016 int insert_sub (ISAMB b, struct ISAMB_block **p, void *new_item,
1019 struct ISAMB_block **sp,
1020 void *sub_item, int *sub_size,
1021 const void *max_item)
1023 if (!*p || (*p)->leaf)
1024 return insert_leaf (b, p, new_item, mode, stream, sp, sub_item,
1025 sub_size, max_item);
1027 return insert_int (b, *p, new_item, mode, stream, sp, sub_item,
1028 sub_size, max_item);
1031 int isamb_unlink (ISAMB b, ISAMC_P pos)
1033 struct ISAMB_block *p1;
1037 p1 = open_block(b, pos);
1043 const char *src = p1->bytes + p1->offset;
1045 decode_ptr(&src, &sub_p);
1046 isamb_unlink(b, sub_p);
1048 while (src != p1->bytes + p1->size)
1050 decode_ptr(&src, &item_len);
1052 decode_ptr(&src, &sub_p);
1053 isamb_unlink(b, sub_p);
1060 ISAMB_P isamb_merge (ISAMB b, ISAMC_P pos, ISAMC_I *stream)
1062 char item_buf[DST_ITEM_MAX];
1066 int must_delete = 0;
1073 item_ptr = item_buf;
1075 (*stream->read_item)(stream->clientData, &item_ptr, &i_mode);
1079 item_ptr = item_buf;
1080 more = (*stream->read_item)(stream->clientData, &item_ptr, &i_mode);
1083 struct ISAMB_block *p = 0, *sp = 0;
1084 char sub_item[DST_ITEM_MAX];
1088 p = open_block (b, pos);
1089 more = insert_sub (b, &p, item_buf, &i_mode, stream, &sp,
1090 sub_item, &sub_size, 0);
1092 { /* increase level of tree by one */
1093 struct ISAMB_block *p2 = new_int (b, p->cat);
1094 char *dst = p2->bytes + p2->size;
1096 encode_ptr (&dst, p->pos);
1097 assert (sub_size < 40);
1098 encode_ptr (&dst, sub_size);
1099 memcpy (dst, sub_item, sub_size);
1101 encode_ptr (&dst, sp->pos);
1103 p2->size = dst - p2->bytes;
1104 p2->no_items = p->no_items + sp->no_items;
1105 pos = p2->pos; /* return new super page */
1106 close_block (b, sp);
1107 close_block (b, p2);
1111 pos = p->pos; /* return current one (again) */
1113 if (p->no_items == 0)
1121 isamb_unlink(b, pos);
1127 ISAMB_PP isamb_pp_open_x (ISAMB isamb, ISAMB_P pos, int *level, int scope)
1129 ISAMB_PP pp = xmalloc (sizeof(*pp));
1133 pp->block = xmalloc (ISAMB_MAX_LEVEL * sizeof(*pp->block));
1140 pp->skipped_numbers=0;
1141 pp->returned_numbers=0;
1143 for (i=0;i<ISAMB_MAX_LEVEL;i++)
1144 pp->skipped_nodes[i] = pp->accessed_nodes[i]=0;
1147 struct ISAMB_block *p = open_block (isamb, pos);
1148 const char *src = p->bytes + p->offset;
1149 pp->block[pp->level] = p;
1151 pp->total_size += p->size;
1157 decode_ptr (&src, &pos);
1158 p->offset = src - p->bytes;
1160 pp->accessed_nodes[pp->level]++;
1162 pp->block[pp->level+1] = 0;
1163 pp->maxlevel=pp->level;
1169 ISAMB_PP isamb_pp_open (ISAMB isamb, ISAMB_P pos, int scope)
1171 return isamb_pp_open_x (isamb, pos, 0, scope);
1174 void isamb_pp_close_x (ISAMB_PP pp, int *size, int *blocks)
1179 yaz_log(YLOG_DEBUG,"isamb_pp_close lev=%d returned "ZINT_FORMAT" values,"
1180 "skipped "ZINT_FORMAT,
1181 pp->maxlevel, pp->skipped_numbers, pp->returned_numbers);
1182 for (i=pp->maxlevel;i>=0;i--)
1183 if ( pp->skipped_nodes[i] || pp->accessed_nodes[i])
1184 yaz_log(YLOG_DEBUG,"isamb_pp_close level leaf-%d: "
1185 ZINT_FORMAT" read, "ZINT_FORMAT" skipped", i,
1186 pp->accessed_nodes[i], pp->skipped_nodes[i]);
1187 pp->isamb->skipped_numbers += pp->skipped_numbers;
1188 pp->isamb->returned_numbers += pp->returned_numbers;
1189 for (i=pp->maxlevel;i>=0;i--)
1191 pp->isamb->accessed_nodes[i] += pp->accessed_nodes[i];
1192 pp->isamb->skipped_nodes[i] += pp->skipped_nodes[i];
1195 *size = pp->total_size;
1197 *blocks = pp->no_blocks;
1198 for (i = 0; i <= pp->level; i++)
1199 close_block (pp->isamb, pp->block[i]);
1204 int isamb_block_info (ISAMB isamb, int cat)
1206 if (cat >= 0 && cat < isamb->no_cat)
1207 return isamb->file[cat].head.block_size;
1211 void isamb_pp_close (ISAMB_PP pp)
1213 isamb_pp_close_x (pp, 0, 0);
1216 /* simple recursive dumper .. */
1217 static void isamb_dump_r (ISAMB b, ISAMB_P pos, void (*pr)(const char *str),
1221 char prefix_str[1024];
1224 struct ISAMB_block *p = open_block (b, pos);
1225 sprintf(prefix_str, "%*s " ZINT_FORMAT " cat=%d size=%d max=%d items="
1226 ZINT_FORMAT, level*2, "",
1227 pos, p->cat, p->size, b->file[p->cat].head.block_max,
1230 sprintf(prefix_str, "%*s " ZINT_FORMAT, level*2, "", pos);
1233 while (p->offset < p->size)
1235 const char *src = p->bytes + p->offset;
1237 (*b->method->codec.decode)(p->decodeClientData, &dst, &src);
1238 (*b->method->log_item)(YLOG_DEBUG, buf, prefix_str);
1239 p->offset = src - (char*) p->bytes;
1241 assert(p->offset == p->size);
1245 const char *src = p->bytes + p->offset;
1249 decode_ptr (&src, &sub);
1250 p->offset = src - (char*) p->bytes;
1252 isamb_dump_r(b, sub, pr, level+1);
1254 while (p->offset < p->size)
1256 decode_ptr (&src, &item_len);
1257 (*b->method->log_item)(YLOG_DEBUG, src, prefix_str);
1259 decode_ptr (&src, &sub);
1261 p->offset = src - (char*) p->bytes;
1263 isamb_dump_r(b, sub, pr, level+1);
1270 void isamb_dump (ISAMB b, ISAMB_P pos, void (*pr)(const char *str))
1272 isamb_dump_r(b, pos, pr, 0);
1275 int isamb_pp_read (ISAMB_PP pp, void *buf)
1277 return isamb_pp_forward(pp, buf, 0);
1281 static int isamb_pp_on_right_node(ISAMB_PP pp, int level, const void *untilbuf)
1282 { /* looks one node higher to see if we should be on this node at all */
1283 /* useful in backing off quickly, and in avoiding tail descends */
1284 /* call with pp->level to begin with */
1285 struct ISAMB_block *p;
1292 yaz_log(YLOG_DEBUG,"isamb_pp_on_right returning true for root");
1294 return 1; /* we can never skip the root node */
1298 assert(p->offset <= p->size);
1299 if (p->offset < p->size )
1301 assert(p->offset>0);
1302 src=p->bytes + p->offset;
1303 decode_ptr(&src, &item_len);
1305 (*pp->isamb->method->codec.log_item)(YLOG_DEBUG,untilbuf,"on_leaf: until");
1306 (*pp->isamb->method->codec.log_item)(YLOG_DEBUG,src,"on_leaf: value");
1308 cmp=(*pp->isamb->method->compare_item)(untilbuf,src);
1309 if (cmp<pp->scope) { /* cmp<2 */
1311 yaz_log(YLOG_DEBUG,"isamb_pp_on_right returning true "
1312 "cmp=%d lev=%d ofs=%d",cmp,level,p->offset);
1318 yaz_log(YLOG_DEBUG,"isamb_pp_on_right returning false "
1319 "cmp=%d lev=%d ofs=%d",cmp,level,p->offset);
1326 yaz_log(YLOG_DEBUG,"isamb_pp_on_right at tail, looking higher "
1329 return isamb_pp_on_right_node(pp, level, untilbuf);
1331 } /* isamb_pp_on_right_node */
1333 static int isamb_pp_read_on_leaf(ISAMB_PP pp, void *buf)
1334 { /* reads the next item on the current leaf, returns 0 if end of leaf*/
1335 struct ISAMB_block *p = pp->block[pp->level];
1340 if (p->offset == p->size) {
1342 yaz_log(YLOG_DEBUG,"isamb_pp_read_on_leaf returning 0 on node %d",p->pos);
1344 return 0; /* at end of leaf */
1346 src=p->bytes + p->offset;
1348 (*pp->isamb->method->codec.decode)(p->decodeClientData,&dst, &src);
1349 p->offset = src - (char*) p->bytes;
1351 (*pp->isamb->method->codec.log_item)(YLOG_DEBUG, buf, "read_on_leaf returning 1");
1353 pp->returned_numbers++;
1355 } /* read_on_leaf */
1357 static int isamb_pp_forward_on_leaf(ISAMB_PP pp, void *buf, const void *untilbuf)
1358 { /* forwards on the current leaf, returns 0 if not found */
1362 if (!isamb_pp_read_on_leaf(pp,buf))
1364 /* FIXME - this is an extra function call, inline the read? */
1365 cmp=(*pp->isamb->method->compare_item)(untilbuf,buf);
1366 if (cmp <pp->scope){ /* cmp<2 found a good one */
1369 yaz_log(YLOG_DEBUG, "isam_pp_fwd_on_leaf skipped %d items",skips);
1371 pp->returned_numbers++;
1375 if (!isamb_pp_on_right_node(pp, pp->level, untilbuf))
1376 return 0; /* never mind the rest of this leaf */
1377 pp->skipped_numbers++;
1380 } /* forward_on_leaf */
1382 static int isamb_pp_climb_level(ISAMB_PP pp, ISAMB_P *pos)
1383 { /* climbs higher in the tree, until finds a level with data left */
1384 /* returns the node to (consider to) descend to in *pos) */
1385 struct ISAMB_block *p = pp->block[pp->level];
1389 yaz_log(YLOG_DEBUG,"isamb_pp_climb_level starting "
1390 "at level %d node %d ofs=%d sz=%d",
1391 pp->level, p->pos, p->offset, p->size);
1393 assert(pp->level >= 0);
1394 assert(p->offset <= p->size);
1398 yaz_log(YLOG_DEBUG,"isamb_pp_climb_level returning 0 at root");
1402 assert(pp->level>0);
1403 close_block(pp->isamb, pp->block[pp->level]);
1404 pp->block[pp->level]=0;
1406 p=pp->block[pp->level];
1408 yaz_log(YLOG_DEBUG,"isamb_pp_climb_level climbed to level %d node %d ofs=%d",
1409 pp->level, p->pos, p->offset);
1412 assert(p->offset <= p->size);
1413 if (p->offset == p->size ) {
1414 /* we came from the last pointer, climb on */
1415 if (!isamb_pp_climb_level(pp,pos))
1417 p=pp->block[pp->level];
1421 /* skip the child we just came from */
1423 yaz_log(YLOG_DEBUG,"isam_pp_climb_level: skipping lev=%d ofs=%d sz=%d",
1424 pp->level, p->offset, p->size);
1426 assert (p->offset < p->size );
1427 src=p->bytes + p->offset;
1428 decode_ptr(&src, &item_len);
1430 decode_ptr(&src, pos);
1431 p->offset=src - (char *)p->bytes;
1438 static zint isamb_pp_forward_unode(ISAMB_PP pp, zint pos, const void *untilbuf)
1439 { /* scans a upper node until it finds a child <= untilbuf */
1440 /* pp points to the key value, as always. pos is the child read from */
1442 /* if all values are too small, returns the last child in the node */
1443 /* FIXME - this can be detected, and avoided by looking at the */
1444 /* parent node, but that gets messy. Presumably the cost is */
1445 /* pretty low anyway */
1446 struct ISAMB_block *p = pp->block[pp->level];
1447 const char *src=p->bytes + p->offset;
1453 yaz_log(YLOG_DEBUG,"isamb_pp_forward_unode starting "
1454 "at level %d node %d ofs=%di sz=%d",
1455 pp->level, p->pos, p->offset, p->size);
1458 assert(p->offset <= p->size);
1459 if (p->offset == p->size) {
1461 yaz_log(YLOG_DEBUG,"isamb_pp_forward_unode returning at end "
1462 "at level %d node %d ofs=%di sz=%d",
1463 pp->level, p->pos, p->offset, p->size);
1465 return pos; /* already at the end of it */
1467 while(p->offset < p->size) {
1468 decode_ptr(&src,&item_len);
1469 cmp=(*pp->isamb->method->compare_item)(untilbuf,src);
1471 decode_ptr(&src,&nxtpos);
1472 if (cmp<pp->scope) /* cmp<2 */
1475 yaz_log(YLOG_DEBUG,"isamb_pp_forward_unode returning a hit "
1476 "at level %d node %d ofs=%d sz=%d",
1477 pp->level, p->pos, p->offset, p->size);
1482 p->offset=src-(char*)p->bytes;
1483 (pp->skipped_nodes[pp->maxlevel - pp->level -1])++;
1489 yaz_log(YLOG_DEBUG,"isamb_pp_forward_unode returning at tail "
1490 "at level %d node %d ofs=%d sz=%d skips=%d",
1491 pp->level, p->pos, p->offset, p->size, skips);
1493 return pos; /* that's the last one in the line */
1495 } /* forward_unode */
1497 static void isamb_pp_descend_to_leaf(ISAMB_PP pp, ISAMB_P pos, const void *untilbuf)
1498 { /* climbs down the tree, from pos, to the leftmost leaf */
1499 struct ISAMB_block *p = pp->block[pp->level];
1503 yaz_log(YLOG_DEBUG,"isamb_pp_descend_to_leaf "
1504 "starting at lev %d node %d ofs=%d lf=%d u=%p",
1505 pp->level, p->pos, p->offset, p->leaf, untilbuf);
1508 pos=isamb_pp_forward_unode(pp,pos,untilbuf);
1511 p=open_block(pp->isamb, pos);
1512 pp->block[pp->level]=p;
1513 ++(pp->accessed_nodes[pp->maxlevel-pp->level]);
1516 yaz_log(YLOG_DEBUG,"isamb_pp_descend_to_leaf "
1517 "got lev %d node %d lf=%d",
1518 pp->level, p->pos, p->leaf);
1522 assert (p->offset==0 );
1523 src=p->bytes + p->offset;
1524 decode_ptr(&src, &pos);
1525 p->offset=src-(char*)p->bytes;
1526 isamb_pp_descend_to_leaf(pp,pos,untilbuf);
1528 yaz_log(YLOG_DEBUG,"isamb_pp_descend_to_leaf "
1529 "returning at lev %d node %d ofs=%d lf=%d",
1530 pp->level, p->pos, p->offset, p->leaf);
1532 } /* descend_to_leaf */
1534 static int isamb_pp_find_next_leaf(ISAMB_PP pp)
1535 { /* finds the next leaf by climbing up and down */
1537 if (!isamb_pp_climb_level(pp,&pos))
1539 isamb_pp_descend_to_leaf(pp, pos,0);
1543 static int isamb_pp_climb_desc(ISAMB_PP pp, const void *untilbuf)
1544 { /* climbs up and descends to a leaf where values >= *untilbuf are found */
1547 struct ISAMB_block *p = pp->block[pp->level];
1548 yaz_log(YLOG_DEBUG,"isamb_pp_climb_desc starting "
1549 "at level %d node %d ofs=%d sz=%d",
1550 pp->level, p->pos, p->offset, p->size);
1552 if (!isamb_pp_climb_level(pp,&pos))
1554 /* see if it would pay to climb one higher */
1555 if (!isamb_pp_on_right_node(pp, pp->level, untilbuf))
1556 if (!isamb_pp_climb_level(pp,&pos))
1558 isamb_pp_descend_to_leaf(pp, pos,untilbuf);
1560 p = pp->block[pp->level];
1561 yaz_log(YLOG_DEBUG,"isamb_pp_climb_desc done "
1562 "at level %d node %d ofs=%d sz=%d",
1563 pp->level, p->pos, p->offset, p->size);
1568 int isamb_pp_forward (ISAMB_PP pp, void *buf, const void *untilbuf)
1571 struct ISAMB_block *p = pp->block[pp->level];
1573 yaz_log(YLOG_DEBUG,"isamb_pp_forward starting "
1574 "at level %d node %d ofs=%d sz=%d u=%p sc=%d",
1575 pp->level, p->pos, p->offset, p->size,untilbuf, scope);
1578 if (isamb_pp_forward_on_leaf( pp, buf, untilbuf)) {
1580 yaz_log(YLOG_DEBUG,"isamb_pp_forward (f) returning (A) "
1581 "at level %d node %d ofs=%d sz=%d",
1582 pp->level, p->pos, p->offset, p->size);
1586 if (! isamb_pp_climb_desc( pp, untilbuf)) {
1588 yaz_log(YLOG_DEBUG,"isamb_pp_forward (f) returning notfound (B) "
1589 "at level %d node %d ofs=%d sz=%d",
1590 pp->level, p->pos, p->offset, p->size);
1592 return 0; /* could not find a leaf */
1595 if (isamb_pp_forward_on_leaf( pp, buf, untilbuf)) {
1597 yaz_log(YLOG_DEBUG,"isamb_pp_forward (f) returning (C) "
1598 "at level %d node %d ofs=%d sz=%d",
1599 pp->level, p->pos, p->offset, p->size);
1603 }while ( isamb_pp_find_next_leaf(pp));
1604 return 0; /* could not find at all */
1606 else { /* no untilbuf, a straight read */
1607 /* FIXME - this should be moved
1608 * directly into the pp_read */
1609 /* keeping here now, to keep same
1610 * interface as the old fwd */
1611 if (isamb_pp_read_on_leaf( pp, buf)) {
1613 yaz_log(YLOG_DEBUG,"isamb_pp_forward (read) returning (D) "
1614 "at level %d node %d ofs=%d sz=%d",
1615 pp->level, p->pos, p->offset, p->size);
1619 if (isamb_pp_find_next_leaf(pp)) {
1621 yaz_log(YLOG_DEBUG,"isamb_pp_forward (read) returning (E) "
1622 "at level %d node %d ofs=%d sz=%d",
1623 pp->level, p->pos, p->offset, p->size);
1625 return isamb_pp_read_on_leaf(pp, buf);
1630 } /* isam_pp_forward (new version) */
1632 void isamb_pp_pos( ISAMB_PP pp, double *current, double *total )
1633 { /* return an estimate of the current position and of the total number of */
1634 /* occureences in the isam tree, based on the current leaf */
1635 struct ISAMB_block *p = pp->block[pp->level];
1640 *total = pp->block[0]->no_items;
1641 *current = (double) pp->returned_numbers;
1643 yaz_log(YLOG_LOG, "isamb_pp_pos returning: cur= %0.1f tot=%0.1f rn="
1644 ZINT_FORMAT, *current, *total, pp->returned_numbers);