2 * Copyright (C) 1994-2002, Index Data
4 * Sebastian Hammer, Adam Dickmeiss
6 * $Id: zsets.c,v 1.37 2002-07-25 13:06:43 adam Exp $
19 #define SORT_IDX_ENTRYSIZE 64
20 #define ZSET_SORT_MAX_LEVEL 3
22 struct zebra_set_term_entry {
37 struct zset_sort_info *sort_info;
38 struct zebra_set_term_entry *term_entries;
40 struct zebra_set *next;
44 struct zset_sort_entry {
47 char buf[ZSET_SORT_MAX_LEVEL][SORT_IDX_ENTRYSIZE];
50 struct zset_sort_info {
53 struct zset_sort_entry *all_entries;
54 struct zset_sort_entry **entries;
57 ZebraSet resultSetAddRPN (ZebraHandle zh, ODR input, ODR output,
58 Z_RPNQuery *rpn, int num_bases,
69 zebraSet = resultSetAdd (zh, setname, 1);
74 zebraSet->nmem = nmem_create ();
76 zebraSet->num_bases = num_bases;
78 nmem_malloc (zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
79 for (i = 0; i<num_bases; i++)
80 zebraSet->basenames[i] = nmem_strdup (zebraSet->nmem, basenames[i]);
83 zebraSet->rset = rpn_search (zh, output->mem, rpn,
85 zebraSet->basenames, zebraSet->name,
87 zh->hits = zebraSet->hits;
94 void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type,
95 const char *db, int set,
96 int use, const char *term)
99 s->nmem = nmem_create ();
100 if (!s->term_entries)
103 s->term_entries_max = 1000;
105 nmem_malloc (s->nmem, s->term_entries_max *
106 sizeof(*s->term_entries));
107 for (i = 0; i < s->term_entries_max; i++)
108 s->term_entries[i].term = 0;
110 if (s->hits < s->term_entries_max)
112 s->term_entries[s->hits].reg_type = reg_type;
113 s->term_entries[s->hits].db = nmem_strdup (s->nmem, db);
114 s->term_entries[s->hits].set = set;
115 s->term_entries[s->hits].use = use;
116 s->term_entries[s->hits].term = nmem_strdup (s->nmem, term);
122 int zebra_resultSetTerms (ZebraHandle zh, const char *setname,
124 int *type, char *out, size_t *len)
126 ZebraSet s = resultSetGet (zh, setname);
133 no_max = s->rset->no_rset_terms;
134 if (no < 0 || no >= no_max)
137 *count = s->rset->rset_terms[no]->count;
139 *type = s->rset->rset_terms[no]->type;
143 char *inbuf = s->rset->rset_terms[no]->name;
144 size_t inleft = strlen(inbuf);
145 size_t outleft = *len - 1;
148 if (zh->iconv_from_utf8 != (iconv_t)(-1))
153 ret = iconv(zh->iconv_from_utf8, &inbuf, &inleft,
155 if (ret == (size_t)(-1))
164 if (inleft > outleft)
167 memcpy (out, inbuf, *len);
175 ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov)
180 for (s = zh->sets; s; s = s->next)
181 if (!strcmp (s->name, name))
185 logf (LOG_DEBUG, "updating result set %s", name);
186 if (!ov || s->locked)
189 rset_delete (s->rset);
191 nmem_destroy (s->nmem);
195 logf (LOG_DEBUG, "adding result set %s", name);
196 s = (ZebraSet) xmalloc (sizeof(*s));
199 s->name = (char *) xmalloc (strlen(name)+1);
200 strcpy (s->name, name);
202 s->sort_info = (struct zset_sort_info *)
203 xmalloc (sizeof(*s->sort_info));
204 s->sort_info->max_entries = 1000;
205 s->sort_info->entries = (struct zset_sort_entry **)
206 xmalloc (sizeof(*s->sort_info->entries) *
207 s->sort_info->max_entries);
208 s->sort_info->all_entries = (struct zset_sort_entry *)
209 xmalloc (sizeof(*s->sort_info->all_entries) *
210 s->sort_info->max_entries);
211 for (i = 0; i < s->sort_info->max_entries; i++)
212 s->sort_info->entries[i] = s->sort_info->all_entries + i;
223 ZebraSet resultSetGet (ZebraHandle zh, const char *name)
227 for (s = zh->sets; s; s = s->next)
228 if (!strcmp (s->name, name))
230 if (!s->term_entries && !s->rset && s->rpn)
232 NMEM nmem = nmem_create ();
233 yaz_log (LOG_LOG, "research %s", name);
235 rpn_search (zh, nmem, s->rpn, s->num_bases,
236 s->basenames, s->name, s);
244 void resultSetInvalidate (ZebraHandle zh)
246 ZebraSet s = zh->sets;
248 for (; s; s = s->next)
251 rset_delete (s->rset);
256 void resultSetDestroy (ZebraHandle zh, int num, char **names,int *statuses)
258 ZebraSet * ss = &zh->sets;
262 for (i = 0; i<num; i++)
263 statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
265 zh->errString = NULL;
272 for (i = 0; i<num; i++)
273 if (!strcmp (s->name, names[i]))
276 statuses[i] = Z_DeleteStatus_success;
285 xfree (s->sort_info->all_entries);
286 xfree (s->sort_info->entries);
287 xfree (s->sort_info);
290 nmem_destroy (s->nmem);
292 rset_delete (s->rset);
301 ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name,
302 int num, int *positions)
308 struct zset_sort_info *sort_info;
310 if (!(sset = resultSetGet (zh, name)))
312 if (!(rset = sset->rset))
314 if (!sset->term_entries)
316 sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num);
317 for (i = 0; i<num; i++)
324 if (positions[i] <= sset->term_entries_max)
326 sr[i].term = sset->term_entries[positions[i]-1].term;
327 sr[i].db = sset->term_entries[positions[i]-1].db;
333 sr = (ZebraPosSet) xmalloc (sizeof(*sr) * num);
334 for (i = 0; i<num; i++)
341 sort_info = sset->sort_info;
346 for (i = 0; i<num; i++)
348 position = positions[i];
349 if (position > 0 && position <= sort_info->num_entries)
351 logf (LOG_DEBUG, "got pos=%d (sorted)", position);
352 sr[i].sysno = sort_info->entries[position-1]->sysno;
353 sr[i].score = sort_info->entries[position-1]->score;
357 /* did we really get all entries using sort ? */
358 for (i = 0; i<num; i++)
363 if (i < num) /* nope, get the rest, unsorted - sorry */
373 position = sort_info->num_entries;
374 while (num_i < num && positions[num_i] < position)
376 rfd = rset_open (rset, RSETF_READ);
377 while (num_i < num && rset_read (rset, rfd, &key, &term_index))
379 if (key.sysno != psysno)
384 /* determine we alreay have this in our set */
385 for (i = sort_info->num_entries; --i >= 0; )
386 if (psysno == sort_info->entries[i]->sysno)
392 assert (num_i < num);
393 if (position == positions[num_i])
395 sr[num_i].sysno = psysno;
396 logf (LOG_DEBUG, "got pos=%d (unsorted)", position);
397 sr[num_i].score = -1;
402 rset_close (rset, rfd);
408 void zebraPosSetDestroy (ZebraHandle zh, ZebraPosSet records, int num)
419 void resultSetInsertSort (ZebraHandle zh, ZebraSet sset,
420 struct sortKeyInfo *criteria, int num_criteria,
423 struct zset_sort_entry this_entry;
424 struct zset_sort_entry *new_entry = NULL;
425 struct zset_sort_info *sort_info = sset->sort_info;
428 sortIdx_sysno (zh->reg->sortIdx, sysno);
429 for (i = 0; i<num_criteria; i++)
431 sortIdx_type (zh->reg->sortIdx, criteria[i].attrUse);
432 sortIdx_read (zh->reg->sortIdx, this_entry.buf[i]);
434 i = sort_info->num_entries;
438 for (j = 0; j<num_criteria; j++)
440 if (criteria[j].numerical)
442 double diff = atof(this_entry.buf[j]) -
443 atof(sort_info->entries[i]->buf[j]);
452 rel = memcmp (this_entry.buf[j], sort_info->entries[i]->buf[j],
460 if (criteria[j].relation == 'A')
465 else if (criteria[j].relation == 'D')
472 j = sort_info->max_entries;
476 if (sort_info->num_entries == j)
479 j = (sort_info->num_entries)++;
480 new_entry = sort_info->entries[j];
483 sort_info->entries[j] = sort_info->entries[j-1];
486 sort_info->entries[i] = new_entry;
488 for (i = 0; i<num_criteria; i++)
489 memcpy (new_entry->buf[i], this_entry.buf[i], SORT_IDX_ENTRYSIZE);
490 new_entry->sysno = sysno;
491 new_entry->score = -1;
494 void resultSetInsertRank (ZebraHandle zh, struct zset_sort_info *sort_info,
495 int sysno, int score, int relation)
497 struct zset_sort_entry *new_entry = NULL;
500 i = sort_info->num_entries;
505 rel = score - sort_info->entries[i]->score;
512 else if (relation == 'A')
519 j = sort_info->max_entries;
523 if (sort_info->num_entries == j)
526 j = (sort_info->num_entries)++;
528 new_entry = sort_info->entries[j];
531 sort_info->entries[j] = sort_info->entries[j-1];
534 sort_info->entries[i] = new_entry;
536 new_entry->sysno = sysno;
537 new_entry->score = score;
540 void resultSetSort (ZebraHandle zh, NMEM nmem,
541 int num_input_setnames, const char **input_setnames,
542 const char *output_setname,
543 Z_SortKeySpecList *sort_sequence, int *sort_status)
548 if (num_input_setnames == 0)
553 if (num_input_setnames > 1)
558 logf (LOG_DEBUG, "result set sort input=%s output=%s",
559 *input_setnames, output_setname);
560 sset = resultSetGet (zh, input_setnames[0]);
564 zh->errString = nmem_strdup (nmem, input_setnames[0]);
567 if (!(rset = sset->rset))
570 zh->errString = nmem_strdup (nmem, input_setnames[0]);
573 if (strcmp (output_setname, input_setnames[0]))
575 rset = rset_dup (rset);
576 sset = resultSetAdd (zh, output_setname, 1);
579 resultSetSortSingle (zh, nmem, sset, rset, sort_sequence, sort_status);
582 void resultSetSortSingle (ZebraHandle zh, NMEM nmem,
583 ZebraSet sset, RSET rset,
584 Z_SortKeySpecList *sort_sequence, int *sort_status)
588 struct sortKeyInfo sort_criteria[3];
593 logf (LOG_LOG, "resultSetSortSingle start");
594 sset->sort_info->num_entries = 0;
597 num_criteria = sort_sequence->num_specs;
598 if (num_criteria > 3)
600 for (i = 0; i < num_criteria; i++)
602 Z_SortKeySpec *sks = sort_sequence->specs[i];
605 if (*sks->sortRelation == Z_SortRelation_ascending)
606 sort_criteria[i].relation = 'A';
607 else if (*sks->sortRelation == Z_SortRelation_descending)
608 sort_criteria[i].relation = 'D';
614 if (sks->sortElement->which == Z_SortElement_databaseSpecific)
619 else if (sks->sortElement->which != Z_SortElement_generic)
624 sk = sks->sortElement->u.generic;
627 case Z_SortKey_sortField:
628 logf (LOG_DEBUG, "Sort: key %d is of type sortField", i+1);
631 case Z_SortKey_elementSpec:
632 logf (LOG_DEBUG, "Sort: key %d is of type elementSpec", i+1);
635 case Z_SortKey_sortAttributes:
636 logf (LOG_DEBUG, "Sort: key %d is of type sortAttributes", i+1);
637 sort_criteria[i].attrUse =
638 zebra_maps_sort (zh->reg->zebra_maps,
639 sk->u.sortAttributes,
640 &sort_criteria[i].numerical);
641 logf (LOG_DEBUG, "use value = %d", sort_criteria[i].attrUse);
642 if (sort_criteria[i].attrUse == -1)
647 if (sortIdx_type (zh->reg->sortIdx, sort_criteria[i].attrUse))
655 rfd = rset_open (rset, RSETF_READ);
656 while (rset_read (rset, rfd, &key, &term_index))
658 if (key.sysno != psysno)
662 resultSetInsertSort (zh, sset,
663 sort_criteria, num_criteria, psysno);
666 rset_close (rset, rfd);
668 for (i = 0; i < rset->no_rset_terms; i++)
669 yaz_log (LOG_LOG, "term=\"%s\" nn=%d type=%s count=%d",
670 rset->rset_terms[i]->name,
671 rset->rset_terms[i]->nn,
672 rset->rset_terms[i]->flags,
673 rset->rset_terms[i]->count);
675 *sort_status = Z_SortStatus_success;
676 logf (LOG_LOG, "resultSetSortSingle end");
679 RSET resultSetRef (ZebraHandle zh, Z_ResultSetId *resultSetId)
683 if ((s = resultSetGet (zh, resultSetId)))
688 void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset)
694 ZebraRankClass rank_class;
695 struct rank_control *rc;
696 struct zset_sort_info *sort_info;
698 sort_info = zebraSet->sort_info;
699 sort_info->num_entries = 0;
701 rfd = rset_open (rset, RSETF_READ);
703 yaz_log (LOG_LOG, "resultSetRank");
705 rank_class = zebraRankLookup (zh, "rank-1");
706 rc = rank_class->control;
708 if (rset_read (rset, rfd, &key, &term_index))
710 int psysno = key.sysno;
713 (*rc->begin) (zh->reg, rank_class->class_handle, rset);
718 if (key.sysno != psysno)
720 score = (*rc->calc) (handle, psysno);
722 resultSetInsertRank (zh, sort_info, psysno, score, 'A');
726 (*rc->add) (handle, key.seqno, term_index);
728 while (rset_read (rset, rfd, &key, &term_index));
729 score = (*rc->calc) (handle, psysno);
730 resultSetInsertRank (zh, sort_info, psysno, score, 'A');
731 (*rc->end) (zh->reg, handle);
733 rset_close (rset, rfd);
735 for (i = 0; i < rset->no_rset_terms; i++)
736 yaz_log (LOG_LOG, "term=\"%s\" nn=%d type=%s count=%d",
737 rset->rset_terms[i]->name,
738 rset->rset_terms[i]->nn,
739 rset->rset_terms[i]->flags,
740 rset->rset_terms[i]->count);
742 yaz_log (LOG_LOG, "%d keys, %d distinct sysnos", kno, zebraSet->hits);
745 ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name)
747 ZebraRankClass p = zh->reg->rank_classes;
748 while (p && strcmp (p->control->name, name))
750 if (p && !p->init_flag)
752 if (p->control->create)
753 p->class_handle = (*p->control->create)(zh->reg);
759 void zebraRankInstall (struct zebra_register *reg, struct rank_control *ctrl)
761 ZebraRankClass p = (ZebraRankClass) xmalloc (sizeof(*p));
762 p->control = (struct rank_control *) xmalloc (sizeof(*p->control));
763 memcpy (p->control, ctrl, sizeof(*p->control));
764 p->control->name = xstrdup (ctrl->name);
766 p->next = reg->rank_classes;
767 reg->rank_classes = p;
770 void zebraRankDestroy (struct zebra_register *reg)
772 ZebraRankClass p = reg->rank_classes;
775 ZebraRankClass p_next = p->next;
776 if (p->init_flag && p->control->destroy)
777 (*p->control->destroy)(reg, p->class_handle);
778 xfree (p->control->name);
783 reg->rank_classes = NULL;