1 /* $Id: rsmultiandor.c,v 1.18 2005-05-18 11:47:50 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 * This module implements the rsmulti_or and rsmulti_and result sets
27 * rsmultior is based on a heap, from which we find the next hit.
29 * rsmultiand is based on a simple array of rsets, and a linear
30 * search to find the record that exists in all of those rsets.
31 * To speed things up, the array is sorted so that the smallest
32 * rsets come first, they are most likely to have the hits furthest
33 * away, and thus forwarding to them makes the most sense.
42 #include <idzebra/util.h>
43 #include <idzebra/isamc.h>
46 static RSFD r_open_and (RSET ct, int flag);
47 static RSFD r_open_or (RSET ct, int flag);
48 static void r_close (RSFD rfd);
49 static void r_delete (RSET ct);
50 static int r_read_and (RSFD rfd, void *buf, TERMID *term);
51 static int r_read_or (RSFD rfd, void *buf, TERMID *term);
52 static int r_write (RSFD rfd, const void *buf);
53 static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
54 const void *untilbuf);
55 static int r_forward_or(RSFD rfd, void *buf, TERMID *term,
56 const void *untilbuf);
57 static void r_pos (RSFD rfd, double *current, double *total);
58 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
60 static const struct rset_control control_or =
73 static const struct rset_control control_and =
86 /* The heap structure:
87 * The rset contains a list or rsets we are ORing together
88 * The rfd contains a heap of heap-items, which contain
89 * a rfd opened to those rsets, and a buffer for one key.
90 * They also contain a ptr to the rset list in the rset
91 * itself, for practical reasons.
104 const struct rset_key_control *kctrl;
105 struct heap_item **heap; /* ptrs to the rfd */
107 typedef struct heap *HEAP;
110 struct rset_private {
118 struct heap_item *items; /* we alloc and free them here */
119 HEAP h; /* and move around here */
120 zint hits; /* returned so far */
121 int eof; /* seen the end of it */
122 int tailcount; /* how many items are tailing */
126 static int log_level = 0;
127 static int log_level_initialized = 0;
130 /* Heap functions ***********************/
133 static void heap_dump_item( HEAP h, int i, int level)
138 (void)rset_pos(h->heap[i]->rset,h->heap[i]->fd, &cur, &tot);
139 yaz_log(log_level," %d %*s i=%p buf=%p %0.1f/%0.1f",i, level, "",
140 &(h->heap[i]), h->heap[i]->buf, cur,tot );
141 heap_dump_item(h, 2*i, level+1);
142 heap_dump_item(h, 2*i+1, level+1);
144 static void heap_dump( HEAP h,char *msg) {
145 yaz_log(log_level, "heap dump: %s num=%d max=%d",msg, h->heapnum, h->heapmax);
146 heap_dump_item(h,1,1);
150 static void heap_swap (HEAP h, int x, int y)
152 struct heap_item *swap;
154 h->heap[x] = h->heap[y];
158 static int heap_cmp(HEAP h, int x, int y)
160 return (*h->kctrl->cmp)(h->heap[x]->buf,h->heap[y]->buf);
163 static int heap_empty(HEAP h)
165 return ( 0==h->heapnum );
168 static void heap_delete (HEAP h)
169 { /* deletes the first item in the heap, and balances the rest */
170 int cur = 1, child = 2;
171 h->heap[1] = 0; /* been deleted */
172 heap_swap (h, 1, h->heapnum--);
173 while (child <= h->heapnum) {
174 if (child < h->heapnum && heap_cmp(h,child,1+child)>0 )
176 if (heap_cmp(h,cur,child) > 0)
178 heap_swap (h, cur, child);
187 static void heap_balance (HEAP h)
188 { /* The heap root element has changed value (to bigger) */
189 /* swap downwards until the heap is ordered again */
190 int cur = 1, child = 2;
191 while (child <= h->heapnum) {
192 if (child < h->heapnum && heap_cmp(h,child,1+child)>0 )
194 if (heap_cmp(h,cur,child) > 0)
196 heap_swap (h, cur, child);
206 static void heap_insert (HEAP h, struct heap_item *hi)
210 cur = ++(h->heapnum);
211 assert(cur <= h->heapmax);
214 while (parent && (heap_cmp(h,parent,cur) > 0))
217 heap_swap (h, cur, parent);
225 HEAP heap_create (NMEM nmem, int size, const struct rset_key_control *kctrl)
227 HEAP h = (HEAP) nmem_malloc (nmem, sizeof(*h));
229 ++size; /* heap array starts at 1 */
233 h->heap = (struct heap_item**) nmem_malloc(nmem,size*sizeof(*h->heap));
234 h->heap[0]=0; /* not used */
238 static void heap_clear( HEAP h)
244 static void heap_destroy (HEAP h)
246 /* nothing to delete, all is nmem'd, and will go away in due time */
249 int compare_ands(const void *x, const void *y)
250 { /* used in qsort to get the multi-and args in optimal order */
251 /* that is, those with fewest occurrences first */
252 const struct heap_item *hx = x;
253 const struct heap_item *hy = y;
254 double cur, totx, toty;
255 rset_pos(hx->fd, &cur, &totx);
256 rset_pos(hy->fd, &cur, &toty);
257 if ( totx > toty +0.5 )
259 if ( totx < toty -0.5 )
261 return 0; /* return totx - toty, except for overflows and rounding */
264 /* Creating and deleting rsets ***********************/
266 static RSET rsmulti_andor_create(NMEM nmem,
267 struct rset_key_control *kcontrol,
268 int scope, int no_rsets, RSET* rsets,
269 const struct rset_control *ctrl)
271 RSET rnew = rset_create_base(ctrl, nmem, kcontrol, scope, 0);
272 struct rset_private *info;
273 if (!log_level_initialized)
275 log_level = yaz_log_module_level("rsmultiandor");
276 log_level_initialized = 1;
278 yaz_log(log_level, "rsmultiand_andor_create scope=%d", scope);
279 info = (struct rset_private *) nmem_malloc(rnew->nmem,sizeof(*info));
280 info->no_rsets = no_rsets;
281 info->rsets = (RSET*)nmem_malloc(rnew->nmem, no_rsets*sizeof(*rsets));
282 memcpy(info->rsets,rsets,no_rsets*sizeof(*rsets));
287 RSET rsmulti_or_create(NMEM nmem, struct rset_key_control *kcontrol,
288 int scope, int no_rsets, RSET* rsets)
290 return rsmulti_andor_create(nmem, kcontrol, scope,
291 no_rsets, rsets, &control_or);
294 RSET rsmulti_and_create(NMEM nmem, struct rset_key_control *kcontrol,
295 int scope, int no_rsets, RSET* rsets)
297 return rsmulti_andor_create(nmem, kcontrol, scope,
298 no_rsets, rsets, &control_and);
301 static void r_delete (RSET ct)
303 struct rset_private *info = (struct rset_private *) ct->priv;
305 for(i = 0; i<info->no_rsets; i++)
306 rset_delete(info->rsets[i]);
310 /* Opening and closing fd's on them *********************/
312 static RSFD r_open_andor (RSET ct, int flag, int is_and)
315 struct rfd_private *p;
316 struct rset_private *info = (struct rset_private *) ct->priv;
317 const struct rset_key_control *kctrl = ct->keycontrol;
320 if (flag & RSETF_WRITE)
322 yaz_log (YLOG_FATAL, "multiandor set type is read-only");
325 rfd = rfd_create_base(ct);
327 p = (struct rfd_private *)rfd->priv;
331 /* all other pointers shouls already be allocated, in right sizes! */
334 p = (struct rfd_private *) nmem_malloc (ct->nmem,sizeof(*p));
339 p->tailbits = nmem_malloc(ct->nmem, info->no_rsets*sizeof(char) );
341 p->h = heap_create( ct->nmem, info->no_rsets, kctrl);
342 p->items=(struct heap_item *) nmem_malloc(ct->nmem,
343 info->no_rsets*sizeof(*p->items));
344 for (i = 0; i<info->no_rsets; i++)
346 p->items[i].rset = info->rsets[i];
347 p->items[i].buf = nmem_malloc(ct->nmem, kctrl->key_size);
355 { /* read the array and sort it */
356 for (i = 0; i<info->no_rsets; i++){
357 p->items[i].fd = rset_open(info->rsets[i],RSETF_READ);
358 if (!rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
362 qsort(p->items, info->no_rsets, sizeof(p->items[0]), compare_ands);
364 { /* fill the heap for ORing */
365 for (i = 0; i<info->no_rsets; i++){
366 p->items[i].fd = rset_open(info->rsets[i],RSETF_READ);
367 if ( rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
368 heap_insert(p->h, &(p->items[i]));
374 static RSFD r_open_or (RSET ct, int flag)
376 return r_open_andor(ct, flag, 0);
379 static RSFD r_open_and (RSET ct, int flag)
381 return r_open_andor(ct, flag, 1);
385 static void r_close (RSFD rfd)
387 struct rset_private *info=
388 (struct rset_private *)(rfd->rset->priv);
389 struct rfd_private *p=(struct rfd_private *)(rfd->priv);
394 for (i = 0; i<info->no_rsets; i++)
396 rset_close(p->items[i].fd);
397 rfd_delete_base(rfd);
402 static int r_forward_or(RSFD rfd, void *buf,
403 TERMID *term,const void *untilbuf)
404 { /* while heap head behind untilbuf, forward it and rebalance heap */
405 struct rfd_private *p = rfd->priv;
406 const struct rset_key_control *kctrl = rfd->rset->keycontrol;
407 if (heap_empty(p->h))
409 while ( (*kctrl->cmp)(p->h->heap[1]->buf,untilbuf) < -rfd->rset->scope )
411 if (rset_forward(p->h->heap[1]->fd,p->h->heap[1]->buf,
412 &p->h->heap[1]->term, untilbuf))
417 if (heap_empty(p->h))
422 return r_read_or(rfd,buf,term);
426 static int r_read_or (RSFD rfd, void *buf, TERMID *term)
428 struct rfd_private *mrfd = rfd->priv;
429 const struct rset_key_control *kctrl = rfd->rset->keycontrol;
430 struct heap_item *it;
432 if (heap_empty(mrfd->h))
434 it = mrfd->h->heap[1];
435 memcpy(buf,it->buf, kctrl->key_size);
439 rdres = rset_read(it->fd, it->buf, &it->term);
441 heap_balance(mrfd->h);
443 heap_delete(mrfd->h);
448 static int r_read_and (RSFD rfd, void *buf, TERMID *term)
449 { /* Has to return all hits where each item points to the */
450 /* same sysno (scope), in order. Keep an extra key (hitkey) */
451 /* as long as all records do not point to hitkey, forward */
452 /* them, and update hitkey to be the highest seen so far. */
453 /* (if any item eof's, mark eof, and return 0 thereafter) */
454 /* Once a hit has been found, scan all items for the smallest */
455 /* value. Mark all as being in the tail. Read next from that */
456 /* item, and if not in the same record, clear its tail bit */
457 struct rfd_private *p = rfd->priv;
458 const struct rset_key_control *kctrl = rfd->rset->keycontrol;
459 struct rset_private *info = rfd->rset->priv;
465 { /* we are tailing, find lowest tail and return it */
467 while ((mintail<info->no_rsets) && !p->tailbits[mintail])
468 mintail++; /* first tail */
469 for (i = mintail+1; i<info->no_rsets; i++)
473 cmp=(*kctrl->cmp)(p->items[i].buf,p->items[mintail].buf);
478 /* return the lowest tail */
479 memcpy(buf, p->items[mintail].buf, kctrl->key_size);
481 *term = p->items[mintail].term;
482 if (!rset_read(p->items[mintail].fd, p->items[mintail].buf,
483 &p->items[mintail].term))
485 p->eof = 1; /* game over, once tails have been returned */
486 p->tailbits[mintail]=0;
492 cmp=(*kctrl->cmp)(p->items[mintail].buf,buf);
493 if (cmp >= rfd->rset->scope){
494 p->tailbits[mintail]=0;
500 /* not tailing, forward until all reocrds match, and set up */
501 /* as tails. the earlier 'if' will then return the hits */
503 return 0; /* nothing more to see */
504 i = 1; /* assume items[0] is highest up */
505 while (i<info->no_rsets) {
506 cmp=(*kctrl->cmp)(p->items[0].buf,p->items[i].buf);
507 if (cmp<=-rfd->rset->scope) { /* [0] was behind, forward it */
508 if (!rset_forward(p->items[0].fd, p->items[0].buf,
509 &p->items[0].term, p->items[i].buf))
511 p->eof = 1; /* game over */
514 i = 0; /* start frowarding from scratch */
515 } else if (cmp>=rfd->rset->scope)
516 { /* [0] was ahead, forward i */
517 if (!rset_forward(p->items[i].fd, p->items[i].buf,
518 &p->items[i].term, p->items[0].buf))
520 p->eof = 1; /* game over */
526 /* if we get this far, all rsets are now within +- scope of [0] */
527 /* ergo, we have a hit. Mark them all as tailing, and let the */
528 /* upper 'if' return the hits in right order */
529 for (i = 0; i<info->no_rsets; i++)
531 p->tailcount = info->no_rsets;
536 static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
537 const void *untilbuf)
539 struct rfd_private *p = rfd->priv;
540 const struct rset_key_control *kctrl = rfd->rset->keycontrol;
541 struct rset_private *info = rfd->rset->priv;
546 for (i = 0; i<info->no_rsets; i++)
548 cmp = (*kctrl->cmp)(p->items[i].buf,untilbuf);
549 if (cmp <= -rfd->rset->scope)
551 killtail = 1; /* we are moving to a different hit */
552 if (!rset_forward(p->items[i].fd, p->items[i].buf,
553 &p->items[i].term, untilbuf))
555 p->eof = 1; /* game over */
563 for (i = 0; i<info->no_rsets; i++)
567 return r_read_and(rfd,buf,term);
570 static void r_pos (RSFD rfd, double *current, double *total)
572 struct rset_private *info =
573 (struct rset_private *)(rfd->rset->priv);
574 struct rfd_private *mrfd =
575 (struct rfd_private *)(rfd->priv);
577 double scur = 0.0, stot = 0.0;
579 for (i = 0; i<info->no_rsets; i++){
580 rset_pos(mrfd->items[i].fd, &cur, &tot);
581 yaz_log(log_level, "r_pos: %d %0.1f %0.1f", i, cur,tot);
585 if (stot < 1.0) { /* nothing there */
588 yaz_log(log_level, "r_pos: NULL %0.1f %0.1f", *current, *total);
592 *current = (double) (mrfd->hits);
593 *total = *current*stot/scur;
594 yaz_log(log_level, "r_pos: = %0.1f %0.1f", *current, *total);
598 static int r_write (RSFD rfd, const void *buf)
600 yaz_log (YLOG_FATAL, "multior set type is read-only");
604 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
605 /* Special case: Some multi-ors have all terms pointing to the same */
606 /* term. We do not want to duplicate those. Other multiors (and ands) */
607 /* have different terms under them. Those we want. */
609 struct rset_private *info =
610 (struct rset_private *) ct->priv;
611 int firstterm= *curterm;
613 for (i = 0; i<info->no_rsets; i++)
615 rset_getterms(info->rsets[i], terms, maxterms, curterm);
616 if ( ( *curterm > firstterm+1 ) &&
617 ( *curterm <= maxterms ) &&
618 ( terms[(*curterm)-1] == terms[firstterm] )
620 (*curterm)--; /* forget the term, seen that before */