1 /* $Id: rsmultiandor.c,v 1.24 2006-08-14 10:40:21 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 * \file rsmultiandor.c
26 * \brief This module implements the rsmulti_or and rsmulti_and result sets
28 * rsmultior is based on a heap, from which we find the next hit.
30 * rsmultiand is based on a simple array of rsets, and a linear
31 * search to find the record that exists in all of those rsets.
32 * To speed things up, the array is sorted so that the smallest
33 * rsets come first, they are most likely to have the hits furthest
34 * away, and thus forwarding to them makes the most sense.
43 #include <idzebra/util.h>
44 #include <idzebra/isamc.h>
47 static RSFD r_open_and (RSET ct, int flag);
48 static RSFD r_open_or (RSET ct, int flag);
49 static void r_close (RSFD rfd);
50 static void r_delete (RSET ct);
51 static int r_read_and (RSFD rfd, void *buf, TERMID *term);
52 static int r_read_or (RSFD rfd, void *buf, TERMID *term);
53 static int r_write (RSFD rfd, const void *buf);
54 static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
55 const void *untilbuf);
56 static int r_forward_or(RSFD rfd, void *buf, TERMID *term,
57 const void *untilbuf);
58 static void r_pos (RSFD rfd, double *current, double *total);
59 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
61 static const struct rset_control control_or =
74 static const struct rset_control control_and =
87 /* The heap structure:
88 * The rset contains a list or rsets we are ORing together
89 * The rfd contains a heap of heap-items, which contain
90 * a rfd opened to those rsets, and a buffer for one key.
91 * They also contain a ptr to the rset list in the rset
92 * itself, for practical reasons.
105 const struct rset_key_control *kctrl;
106 struct heap_item **heap; /* ptrs to the rfd */
108 typedef struct heap *HEAP;
111 struct rset_private {
118 struct heap_item *items; /* we alloc and free them here */
119 HEAP h; /* and move around here */
120 zint hits; /* returned so far */
121 int eof; /* seen the end of it */
122 int tailcount; /* how many items are tailing */
128 static int log_level = 0;
129 static int log_level_initialized = 0;
132 /* Heap functions ***********************/
135 static void heap_dump_item( HEAP h, int i, int level)
140 (void)rset_pos(h->heap[i]->rset,h->heap[i]->fd, &cur, &tot);
141 yaz_log(log_level," %d %*s i=%p buf=%p %0.1f/%0.1f",i, level, "",
142 &(h->heap[i]), h->heap[i]->buf, cur,tot );
143 heap_dump_item(h, 2*i, level+1);
144 heap_dump_item(h, 2*i+1, level+1);
146 static void heap_dump( HEAP h,char *msg) {
147 yaz_log(log_level, "heap dump: %s num=%d max=%d",msg, h->heapnum, h->heapmax);
148 heap_dump_item(h,1,1);
152 static void heap_swap (HEAP h, int x, int y)
154 struct heap_item *swap;
156 h->heap[x] = h->heap[y];
160 static int heap_cmp(HEAP h, int x, int y)
162 return (*h->kctrl->cmp)(h->heap[x]->buf,h->heap[y]->buf);
165 static int heap_empty(HEAP h)
167 return ( 0==h->heapnum );
170 /** \brief deletes the first item in the heap, and balances the rest
172 static void heap_delete (HEAP h)
174 int cur = 1, child = 2;
175 h->heap[1] = 0; /* been deleted */
176 heap_swap (h, 1, h->heapnum--);
177 while (child <= h->heapnum) {
178 if (child < h->heapnum && heap_cmp(h,child,1+child)>0 )
180 if (heap_cmp(h,cur,child) > 0)
182 heap_swap (h, cur, child);
191 /** \brief puts item into heap.
192 The heap root element has changed value (to bigger)
193 Swap downwards until the heap is ordered again
195 static void heap_balance (HEAP h)
197 int cur = 1, child = 2;
198 while (child <= h->heapnum) {
199 if (child < h->heapnum && heap_cmp(h,child,1+child)>0 )
201 if (heap_cmp(h,cur,child) > 0)
203 heap_swap (h, cur, child);
213 static void heap_insert (HEAP h, struct heap_item *hi)
217 cur = ++(h->heapnum);
218 assert(cur <= h->heapmax);
221 while (parent && (heap_cmp(h,parent,cur) > 0))
224 heap_swap (h, cur, parent);
232 HEAP heap_create (NMEM nmem, int size, const struct rset_key_control *kctrl)
234 HEAP h = (HEAP) nmem_malloc (nmem, sizeof(*h));
236 ++size; /* heap array starts at 1 */
240 h->heap = (struct heap_item**) nmem_malloc(nmem,size*sizeof(*h->heap));
241 h->heap[0]=0; /* not used */
245 static void heap_clear( HEAP h)
251 static void heap_destroy (HEAP h)
253 /* nothing to delete, all is nmem'd, and will go away in due time */
256 /** \brief compare and items for quicksort
257 used in qsort to get the multi-and args in optimal order
258 that is, those with fewest occurrences first
260 int compare_ands(const void *x, const void *y)
261 { const struct heap_item *hx = x;
262 const struct heap_item *hy = y;
263 double cur, totx, toty;
264 rset_pos(hx->fd, &cur, &totx);
265 rset_pos(hy->fd, &cur, &toty);
266 if ( totx > toty +0.5 )
268 if ( totx < toty -0.5 )
270 return 0; /* return totx - toty, except for overflows and rounding */
273 static RSET rsmulti_andor_create(NMEM nmem,
274 struct rset_key_control *kcontrol,
275 int scope, TERMID termid,
276 int no_rsets, RSET* rsets,
277 const struct rset_control *ctrl)
279 RSET rnew = rset_create_base(ctrl, nmem, kcontrol, scope, termid,
281 struct rset_private *info;
282 if (!log_level_initialized)
284 log_level = yaz_log_module_level("rsmultiandor");
285 log_level_initialized = 1;
287 yaz_log(log_level, "rsmultiand_andor_create scope=%d", scope);
288 info = (struct rset_private *) nmem_malloc(rnew->nmem, sizeof(*info));
293 RSET rset_create_or(NMEM nmem, struct rset_key_control *kcontrol,
294 int scope, TERMID termid, int no_rsets, RSET* rsets)
296 return rsmulti_andor_create(nmem, kcontrol, scope, termid,
297 no_rsets, rsets, &control_or);
300 RSET rset_create_and(NMEM nmem, struct rset_key_control *kcontrol,
301 int scope, int no_rsets, RSET* rsets)
303 return rsmulti_andor_create(nmem, kcontrol, scope, 0,
304 no_rsets, rsets, &control_and);
307 static void r_delete (RSET ct)
311 static RSFD r_open_andor (RSET ct, int flag, int is_and)
314 struct rfd_private *p;
315 const struct rset_key_control *kctrl = ct->keycontrol;
318 if (flag & RSETF_WRITE)
320 yaz_log (YLOG_FATAL, "multiandor set type is read-only");
323 rfd = rfd_create_base(ct);
325 p = (struct rfd_private *)rfd->priv;
329 /* all other pointers shouls already be allocated, in right sizes! */
333 p = (struct rfd_private *) nmem_malloc (ct->nmem,sizeof(*p));
338 p->tailbits = nmem_malloc(ct->nmem, ct->no_children*sizeof(char) );
340 p->h = heap_create( ct->nmem, ct->no_children, kctrl);
341 p->items = (struct heap_item *)
342 nmem_malloc(ct->nmem, ct->no_children*sizeof(*p->items));
343 for (i = 0; i<ct->no_children; i++)
345 p->items[i].rset = ct->children[i];
346 p->items[i].buf = nmem_malloc(ct->nmem, kctrl->key_size);
354 { /* read the array and sort it */
355 for (i = 0; i<ct->no_children; i++){
356 p->items[i].fd = rset_open(ct->children[i], RSETF_READ);
357 if (!rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
361 qsort(p->items, ct->no_children, sizeof(p->items[0]), compare_ands);
364 { /* fill the heap for ORing */
365 for (i = 0; i<ct->no_children; i++){
366 p->items[i].fd = rset_open(ct->children[i],RSETF_READ);
367 if ( rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
368 heap_insert(p->h, &(p->items[i]));
374 static RSFD r_open_or (RSET ct, int flag)
376 return r_open_andor(ct, flag, 0);
379 static RSFD r_open_and (RSET ct, int flag)
381 return r_open_andor(ct, flag, 1);
385 static void r_close (RSFD rfd)
387 struct rfd_private *p=(struct rfd_private *)(rfd->priv);
392 for (i = 0; i<rfd->rset->no_children; i++)
394 rset_close(p->items[i].fd);
397 static int r_forward_or(RSFD rfd, void *buf,
398 TERMID *term, const void *untilbuf)
399 { /* while heap head behind untilbuf, forward it and rebalance heap */
400 struct rfd_private *p = rfd->priv;
401 const struct rset_key_control *kctrl = rfd->rset->keycontrol;
402 if (heap_empty(p->h))
404 while ( (*kctrl->cmp)(p->h->heap[1]->buf,untilbuf) < -rfd->rset->scope )
406 if (rset_forward(p->h->heap[1]->fd,p->h->heap[1]->buf,
407 &p->h->heap[1]->term, untilbuf))
412 if (heap_empty(p->h))
417 return r_read_or(rfd, buf, term);
421 /** \brief reads one item key from an 'or' set
422 \param rfd set handle
423 \param buf resulting item buffer
424 \param term resulting term
426 \retval 1 item could be read
428 static int r_read_or (RSFD rfd, void *buf, TERMID *term)
430 RSET rset = rfd->rset;
431 struct rfd_private *mrfd = rfd->priv;
432 const struct rset_key_control *kctrl = rset->keycontrol;
433 struct heap_item *it;
435 if (heap_empty(mrfd->h))
437 it = mrfd->h->heap[1];
438 memcpy(buf, it->buf, kctrl->key_size);
448 rdres = rset_read(it->fd, it->buf, &it->term);
450 heap_balance(mrfd->h);
452 heap_delete(mrfd->h);
457 /** \brief reads one item key from an 'and' set
458 \param rfd set handle
459 \param buf resulting item buffer
460 \param term resulting term
462 \retval 1 item could be read
464 Has to return all hits where each item points to the
465 same sysno (scope), in order. Keep an extra key (hitkey)
466 as long as all records do not point to hitkey, forward
467 them, and update hitkey to be the highest seen so far.
468 (if any item eof's, mark eof, and return 0 thereafter)
469 Once a hit has been found, scan all items for the smallest
470 value. Mark all as being in the tail. Read next from that
471 item, and if not in the same record, clear its tail bit
473 static int r_read_and (RSFD rfd, void *buf, TERMID *term)
474 { struct rfd_private *p = rfd->priv;
476 const struct rset_key_control *kctrl = ct->keycontrol;
481 { /* we are tailing, find lowest tail and return it */
485 for (i = 0; i<ct->no_children; i++)
491 (p->items[i].buf, p->items[mintail].buf);
497 if (kctrl->get_segment)
499 /* segments enabled */
501 zint segment = kctrl->get_segment(p->items[i].buf);
502 /* store segment if not stored already */
503 if (!p->segment && segment)
504 p->segment = segment;
506 /* skip rest entirely if segments don't match */
507 if (p->segment && segment && p->segment != segment)
512 /* return the lowest tail */
513 memcpy(buf, p->items[mintail].buf, kctrl->key_size);
515 *term = p->items[mintail].term;
516 if (!rset_read(p->items[mintail].fd, p->items[mintail].buf,
517 &p->items[mintail].term))
519 p->eof = 1; /* game over, once tails have been returned */
520 p->tailbits[mintail] = 0;
526 cmp = (*kctrl->cmp)(p->items[mintail].buf,buf);
527 if (cmp >= rfd->rset->scope)
529 p->tailbits[mintail] = 0;
534 continue; /* skip again.. eventually tailcount will be 0 */
538 /* not tailing, forward until all reocrds match, and set up */
539 /* as tails. the earlier 'if' will then return the hits */
541 return 0; /* nothing more to see */
542 i = 1; /* assume items[0] is highest up */
543 while (i < ct->no_children)
545 int cmp = (*kctrl->cmp)(p->items[0].buf, p->items[i].buf);
546 if (cmp <= -rfd->rset->scope) { /* [0] was behind, forward it */
547 if (!rset_forward(p->items[0].fd, p->items[0].buf,
548 &p->items[0].term, p->items[i].buf))
550 p->eof = 1; /* game over */
553 i = 0; /* start forwarding from scratch */
555 else if (cmp>=rfd->rset->scope)
556 { /* [0] was ahead, forward i */
557 if (!rset_forward(p->items[i].fd, p->items[i].buf,
558 &p->items[i].term, p->items[0].buf))
560 p->eof = 1; /* game over */
567 /* if we get this far, all rsets are now within +- scope of [0] */
568 /* ergo, we have a hit. Mark them all as tailing, and let the */
569 /* upper 'if' return the hits in right order */
570 for (i = 0; i < ct->no_children; i++)
572 p->tailcount = ct->no_children;
579 static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
580 const void *untilbuf)
582 struct rfd_private *p = rfd->priv;
584 const struct rset_key_control *kctrl = ct->keycontrol;
589 for (i = 0; i<ct->no_children; i++)
591 cmp = (*kctrl->cmp)(p->items[i].buf,untilbuf);
592 if (cmp <= -rfd->rset->scope)
594 killtail = 1; /* we are moving to a different hit */
595 if (!rset_forward(p->items[i].fd, p->items[i].buf,
596 &p->items[i].term, untilbuf))
598 p->eof = 1; /* game over */
606 for (i = 0; i<ct->no_children; i++)
610 return r_read_and(rfd,buf,term);
613 static void r_pos (RSFD rfd, double *current, double *total)
616 struct rfd_private *mrfd =
617 (struct rfd_private *)(rfd->priv);
619 double scur = 0.0, stot = 0.0;
621 for (i = 0; i<ct->no_children; i++){
622 rset_pos(mrfd->items[i].fd, &cur, &tot);
623 yaz_log(log_level, "r_pos: %d %0.1f %0.1f", i, cur,tot);
627 if (stot < 1.0) { /* nothing there */
630 yaz_log(log_level, "r_pos: NULL %0.1f %0.1f", *current, *total);
634 *current = (double) (mrfd->hits);
635 *total = *current*stot/scur;
636 yaz_log(log_level, "r_pos: = %0.1f %0.1f", *current, *total);
640 static int r_write (RSFD rfd, const void *buf)
642 yaz_log (YLOG_FATAL, "multior set type is read-only");
646 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
649 rset_get_one_term(ct, terms, maxterms, curterm);
652 /* Special case: Some multi-ors have all terms pointing to the same
653 term. We do not want to duplicate those. Other multiors (and ands)
654 have different terms under them. Those we want.
656 int firstterm= *curterm;
659 for (i = 0; i<ct->no_children; i++)
661 rset_getterms(ct->children[i], terms, maxterms, curterm);
662 if ( ( *curterm > firstterm+1 ) &&
663 ( *curterm <= maxterms ) &&
664 ( terms[(*curterm)-1] == terms[firstterm] )
666 (*curterm)--; /* forget the term, seen that before */
675 * indent-tabs-mode: nil
677 * vim: shiftwidth=4 tabstop=8 expandtab