1 /* This file is part of the Zebra server.
2 Copyright (C) Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 * \file rsmultiandor.c
23 * \brief This module implements the rsmulti_or and rsmulti_and result sets
25 * rsmultior is based on a heap, from which we find the next hit.
27 * rsmultiand is based on a simple array of rsets, and a linear
28 * search to find the record that exists in all of those rsets.
29 * To speed things up, the array is sorted so that the smallest
30 * rsets come first, they are most likely to have the hits furthest
31 * away, and thus forwarding to them makes the most sense.
43 #include <idzebra/util.h>
44 #include <idzebra/isamc.h>
47 static RSFD r_open_and(RSET ct, int flag);
48 static RSFD r_open_or(RSET ct, int flag);
49 static void r_close(RSFD rfd);
50 static void r_delete(RSET ct);
51 static int r_read_and(RSFD rfd, void *buf, TERMID *term);
52 static int r_read_or(RSFD rfd, void *buf, TERMID *term);
53 static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
54 const void *untilbuf);
55 static int r_forward_or(RSFD rfd, void *buf, TERMID *term,
56 const void *untilbuf);
57 static void r_pos_and(RSFD rfd, double *current, double *total);
58 static void r_pos_or(RSFD rfd, double *current, double *total);
59 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
61 static const struct rset_control control_or =
74 static const struct rset_control control_and =
87 /* The heap structure:
88 * The rset contains a list or rsets we are ORing together
89 * The rfd contains a heap of heap-items, which contain
90 * a rfd opened to those rsets, and a buffer for one key.
91 * They also contain a ptr to the rset list in the rset
92 * itself, for practical reasons.
105 const struct rset_key_control *kctrl;
106 struct heap_item **heap; /* ptrs to the rfd */
108 typedef struct heap *HEAP;
111 struct rset_private {
117 struct heap_item *items; /* we alloc and free them here */
118 HEAP h; /* and move around here */
119 zint hits; /* returned so far */
120 int eof; /* seen the end of it */
121 int tailcount; /* how many items are tailing */
127 static int log_level = 0;
128 static int log_level_initialized = 0;
130 /* Heap functions ***********************/
132 static void heap_swap(HEAP h, int x, int y)
134 struct heap_item *swap;
136 h->heap[x] = h->heap[y];
140 static int heap_cmp(HEAP h, int x, int y)
142 return (*h->kctrl->cmp)(h->heap[x]->buf, h->heap[y]->buf);
145 static int heap_empty(HEAP h)
147 return 0 == h->heapnum;
150 /** \brief deletes the first item in the heap, and balances the rest
152 static void heap_delete(HEAP h)
154 int cur = 1, child = 2;
155 h->heap[1] = 0; /* been deleted */
156 heap_swap(h, 1, h->heapnum--);
157 while (child <= h->heapnum)
159 if (child < h->heapnum && heap_cmp(h, child, 1 + child) > 0)
161 if (heap_cmp(h,cur,child) > 0)
163 heap_swap(h, cur, child);
172 /** \brief puts item into heap.
173 The heap root element has changed value (to bigger)
174 Swap downwards until the heap is ordered again
176 static void heap_balance(HEAP h)
178 int cur = 1, child = 2;
179 while (child <= h->heapnum)
181 if (child < h->heapnum && heap_cmp(h, child, 1 + child) > 0)
183 if (heap_cmp(h,cur,child) > 0)
185 heap_swap(h, cur, child);
194 static void heap_insert(HEAP h, struct heap_item *hi)
198 cur = ++(h->heapnum);
199 assert(cur <= h->heapmax);
202 while (parent && (heap_cmp(h,parent,cur) > 0))
205 heap_swap(h, cur, parent);
212 HEAP heap_create(NMEM nmem, int size, const struct rset_key_control *kctrl)
214 HEAP h = (HEAP) nmem_malloc(nmem, sizeof(*h));
216 ++size; /* heap array starts at 1 */
220 h->heap = (struct heap_item**) nmem_malloc(nmem, size * sizeof(*h->heap));
221 h->heap[0] = 0; /* not used */
225 static void heap_clear( HEAP h)
231 static void heap_destroy(HEAP h)
233 /* nothing to delete, all is nmem'd, and will go away in due time */
236 /** \brief compare and items for quicksort
237 used in qsort to get the multi-and args in optimal order
238 that is, those with fewest occurrences first
240 int compare_ands(const void *x, const void *y)
241 { const struct heap_item *hx = x;
242 const struct heap_item *hy = y;
243 double cur, totx, toty;
244 rset_pos(hx->fd, &cur, &totx);
245 rset_pos(hy->fd, &cur, &toty);
246 if (totx > toty + 0.5)
248 if (totx < toty - 0.5)
250 return 0; /* return totx - toty, except for overflows and rounding */
253 static RSET rsmulti_andor_create(NMEM nmem,
254 struct rset_key_control *kcontrol,
255 int scope, TERMID termid,
256 int no_rsets, RSET* rsets,
257 const struct rset_control *ctrl)
259 RSET rnew = rset_create_base(ctrl, nmem, kcontrol, scope, termid,
261 struct rset_private *info;
262 if (!log_level_initialized)
264 log_level = yaz_log_module_level("rsmultiandor");
265 log_level_initialized = 1;
267 yaz_log(log_level, "rsmultiand_andor_create scope=%d", scope);
268 info = (struct rset_private *) nmem_malloc(rnew->nmem, sizeof(*info));
273 RSET rset_create_or(NMEM nmem, struct rset_key_control *kcontrol,
274 int scope, TERMID termid, int no_rsets, RSET* rsets)
276 return rsmulti_andor_create(nmem, kcontrol, scope, termid,
277 no_rsets, rsets, &control_or);
280 RSET rset_create_and(NMEM nmem, struct rset_key_control *kcontrol,
281 int scope, int no_rsets, RSET* rsets)
283 return rsmulti_andor_create(nmem, kcontrol, scope, 0,
284 no_rsets, rsets, &control_and);
287 static void r_delete(RSET ct)
291 static RSFD r_open_andor(RSET ct, int flag, int is_and)
294 struct rfd_private *p;
295 const struct rset_key_control *kctrl = ct->keycontrol;
298 if (flag & RSETF_WRITE)
300 yaz_log(YLOG_FATAL, "multiandor set type is read-only");
303 rfd = rfd_create_base(ct);
306 p = (struct rfd_private *)rfd->priv;
310 /* all other pointers shouls already be allocated, in right sizes! */
314 p = (struct rfd_private *) nmem_malloc(ct->nmem,sizeof(*p));
319 p->tailbits = nmem_malloc(ct->nmem, ct->no_children*sizeof(char) );
321 p->h = heap_create(ct->nmem, ct->no_children, kctrl);
322 p->items = (struct heap_item *)
323 nmem_malloc(ct->nmem, ct->no_children*sizeof(*p->items));
324 for (i = 0; i < ct->no_children; i++)
326 p->items[i].rset = ct->children[i];
327 p->items[i].buf = nmem_malloc(ct->nmem, kctrl->key_size);
335 { /* read the array and sort it */
336 for (i = 0; i < ct->no_children; i++)
338 p->items[i].fd = rset_open(ct->children[i], RSETF_READ);
339 if (!rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
343 qsort(p->items, ct->no_children, sizeof(p->items[0]), compare_ands);
346 { /* fill the heap for ORing */
347 for (i = 0; i < ct->no_children; i++)
349 p->items[i].fd = rset_open(ct->children[i],RSETF_READ);
350 if ( rset_read(p->items[i].fd, p->items[i].buf, &p->items[i].term))
351 heap_insert(p->h, &(p->items[i]));
357 static RSFD r_open_or(RSET ct, int flag)
359 return r_open_andor(ct, flag, 0);
362 static RSFD r_open_and(RSET ct, int flag)
364 return r_open_andor(ct, flag, 1);
367 static void r_close(RSFD rfd)
369 struct rfd_private *p=(struct rfd_private *)(rfd->priv);
374 for (i = 0; i < rfd->rset->no_children; i++)
376 rset_close(p->items[i].fd);
379 static int r_forward_or(RSFD rfd, void *buf,
380 TERMID *term, const void *untilbuf)
381 { /* while heap head behind untilbuf, forward it and rebalance heap */
382 struct rfd_private *p = rfd->priv;
383 const struct rset_key_control *kctrl = rfd->rset->keycontrol;
384 if (heap_empty(p->h))
386 while ((*kctrl->cmp)(p->h->heap[1]->buf,untilbuf) < -rfd->rset->scope )
388 if (rset_forward(p->h->heap[1]->fd, p->h->heap[1]->buf,
389 &p->h->heap[1]->term, untilbuf))
394 if (heap_empty(p->h))
399 return r_read_or(rfd, buf, term);
402 /** \brief reads one item key from an 'or' set
403 \param rfd set handle
404 \param buf resulting item buffer
405 \param term resulting term
407 \retval 1 item could be read
409 static int r_read_or(RSFD rfd, void *buf, TERMID *term)
411 RSET rset = rfd->rset;
412 struct rfd_private *mrfd = rfd->priv;
413 const struct rset_key_control *kctrl = rset->keycontrol;
414 struct heap_item *it;
416 if (heap_empty(mrfd->h))
418 it = mrfd->h->heap[1];
419 memcpy(buf, it->buf, kctrl->key_size);
428 rdres = rset_read(it->fd, it->buf, &it->term);
430 heap_balance(mrfd->h);
432 heap_delete(mrfd->h);
436 /** \brief reads one item key from an 'and' set
437 \param rfd set handle
438 \param buf resulting item buffer
439 \param term resulting term
441 \retval 1 item could be read
443 Has to return all hits where each item points to the
444 same sysno (scope), in order. Keep an extra key (hitkey)
445 as long as all records do not point to hitkey, forward
446 them, and update hitkey to be the highest seen so far.
447 (if any item eof's, mark eof, and return 0 thereafter)
448 Once a hit has been found, scan all items for the smallest
449 value. Mark all as being in the tail. Read next from that
450 item, and if not in the same record, clear its tail bit
452 static int r_read_and(RSFD rfd, void *buf, TERMID *term)
454 struct rfd_private *p = rfd->priv;
456 const struct rset_key_control *kctrl = ct->keycontrol;
462 { /* we are tailing, find lowest tail and return it */
466 for (i = 0; i < ct->no_children; i++)
472 (p->items[i].buf, p->items[mintail].buf);
478 if (kctrl->get_segment)
479 { /* segments enabled */
480 zint segment = kctrl->get_segment(p->items[i].buf);
481 /* store segment if not stored already */
482 if (!p->segment && segment)
483 p->segment = segment;
485 /* skip rest entirely if segments don't match */
486 if (p->segment && segment && p->segment != segment)
491 /* return the lowest tail */
492 memcpy(buf, p->items[mintail].buf, kctrl->key_size);
494 *term = p->items[mintail].term;
495 if (!rset_read(p->items[mintail].fd, p->items[mintail].buf,
496 &p->items[mintail].term))
498 p->eof = 1; /* game over, once tails have been returned */
499 p->tailbits[mintail] = 0;
505 cmp = (*kctrl->cmp)(p->items[mintail].buf,buf);
506 if (cmp >= rfd->rset->scope)
508 p->tailbits[mintail] = 0;
513 continue; /* skip again.. eventually tailcount will be 0 */
514 if (p->tailcount == 0)
518 /* not tailing, forward until all records match, and set up */
519 /* as tails. the earlier 'if' will then return the hits */
521 return 0; /* nothing more to see */
522 i = 1; /* assume items[0] is highest up */
523 while (i < ct->no_children)
525 int cmp = (*kctrl->cmp)(p->items[0].buf, p->items[i].buf);
526 if (cmp <= -rfd->rset->scope) { /* [0] was behind, forward it */
527 if (!rset_forward(p->items[0].fd, p->items[0].buf,
528 &p->items[0].term, p->items[i].buf))
530 p->eof = 1; /* game over */
533 i = 0; /* start forwarding from scratch */
535 else if (cmp >= rfd->rset->scope)
536 { /* [0] was ahead, forward i */
537 if (!rset_forward(p->items[i].fd, p->items[i].buf,
538 &p->items[i].term, p->items[0].buf))
540 p->eof = 1; /* game over */
547 /* if we get this far, all rsets are now within +- scope of [0] */
548 /* ergo, we have a hit. Mark them all as tailing, and let the */
549 /* upper 'if' return the hits in right order */
550 for (i = 0; i < ct->no_children; i++)
552 p->tailcount = ct->no_children;
559 static int r_forward_and(RSFD rfd, void *buf, TERMID *term,
560 const void *untilbuf)
562 struct rfd_private *p = rfd->priv;
564 const struct rset_key_control *kctrl = ct->keycontrol;
569 for (i = 0; i < ct->no_children; i++)
571 cmp = (*kctrl->cmp)(p->items[i].buf,untilbuf);
572 if (cmp <= -rfd->rset->scope)
574 killtail = 1; /* we are moving to a different hit */
575 if (!rset_forward(p->items[i].fd, p->items[i].buf,
576 &p->items[i].term, untilbuf))
578 p->eof = 1; /* game over */
586 for (i = 0; i < ct->no_children; i++)
590 return r_read_and(rfd,buf,term);
593 static void r_pos_x(RSFD rfd, double *current, double *total, int and_op)
596 struct rfd_private *mrfd = (struct rfd_private *)(rfd->priv);
597 double ratio = and_op ? 0.0 : 1.0;
599 double sum_cur = 0.0;
600 double sum_tot = 0.0;
601 for (i = 0; i < ct->no_children; i++)
604 rset_pos(mrfd->items[i].fd, &cur, &tot);
606 yaz_log(log_level, "r_pos: %d %0.1f %0.1f", i, cur,tot);
611 double nratio = cur / tot;
619 sum_cur += (cur - 1);
623 if (!and_op && sum_tot > 0.0)
625 yaz_log(YLOG_LOG, "or op sum_cur=%0.1f sum_tot=%0.1f hits=%f", sum_cur, sum_tot, (double) mrfd->hits);
626 ratio = sum_cur / sum_tot;
628 if (ratio == 0.0 || ratio == 1.0)
629 { /* nothing there */
632 yaz_log(log_level, "r_pos: NULL %0.1f %0.1f", *current, *total);
636 *current = (double) (mrfd->hits);
637 *total = *current / ratio;
638 yaz_log(log_level, "r_pos: = %0.1f %0.1f", *current, *total);
642 static void r_pos_and(RSFD rfd, double *current, double *total)
644 r_pos_x(rfd, current, total, 1);
647 static void r_pos_or(RSFD rfd, double *current, double *total)
649 r_pos_x(rfd, current, total, 0);
652 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
655 rset_get_one_term(ct, terms, maxterms, curterm);
658 /* Special case: Some multi-ors have all terms pointing to the same
659 term. We do not want to duplicate those. Other multiors (and ands)
660 have different terms under them. Those we want.
662 int firstterm = *curterm;
664 for (i = 0; i < ct->no_children; i++)
666 rset_getterms(ct->children[i], terms, maxterms, curterm);
667 if (*curterm > firstterm + 1 && *curterm <= maxterms &&
668 terms[(*curterm) - 1] == terms[firstterm])
669 (*curterm)--; /* forget the term, seen that before */
678 * c-file-style: "Stroustrup"
679 * indent-tabs-mode: nil
681 * vim: shiftwidth=4 tabstop=8 expandtab