1 /* This file is part of the Zebra server.
2 Copyright (C) 1995-2008 Index Data
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
31 #include <yaz/diagbib1.h>
34 #define ZSET_SORT_MAX_LEVEL 10
36 struct zebra_set_term_entry {
47 NMEM rset_nmem; /* for creating the rsets in */
50 const char **basenames;
52 Z_SortKeySpecList *sortSpec;
53 struct zset_sort_info *sort_info;
54 struct zebra_set_term_entry *term_entries;
56 struct zebra_set *next;
58 int estimated_hit_count;
60 zint cache_position; /* last position */
61 RSFD cache_rfd; /* rfd (NULL if not existing) */
62 zint cache_psysno; /* sysno for last position */
63 zint approx_limit; /* limit before we do approx */
66 struct zset_sort_entry {
71 struct zset_sort_info {
74 struct zset_sort_entry *all_entries;
75 struct zset_sort_entry **entries;
78 static int log_level_set=0;
79 static int log_level_sort=0;
80 static int log_level_searchhits=0;
81 static int log_level_searchterms=0;
82 static int log_level_resultsets=0;
84 static void loglevels(void)
88 log_level_sort = yaz_log_module_level("sorting");
89 log_level_searchhits = yaz_log_module_level("searchhits");
90 log_level_searchterms = yaz_log_module_level("searchterms");
91 log_level_resultsets = yaz_log_module_level("resultsets");
96 static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
97 Z_RPNQuery *rpn, ZebraSet sset)
100 Z_SortKeySpecList *sort_sequence;
102 ZEBRA_RES res = ZEBRA_OK;
104 sort_sequence = (Z_SortKeySpecList *)
105 nmem_malloc(nmem, sizeof(*sort_sequence));
106 sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
107 sort_sequence->specs = (Z_SortKeySpec **)
108 nmem_malloc(nmem, sort_sequence->num_specs *
109 sizeof(*sort_sequence->specs));
110 for (i = 0; i<sort_sequence->num_specs; i++)
111 sort_sequence->specs[i] = 0;
113 rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit);
115 res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId,
118 sset->num_bases, sset->basenames,
125 for (i = 0; sort_sequence->specs[i]; i++)
127 sort_sequence->num_specs = i;
128 rset->hits_limit = sset->approx_limit;
131 res = resultSetRank(zh, sset, rset, rset_nmem);
135 res = resultSetSortSingle(zh, nmem, sset, rset,
136 sort_sequence, &sort_status);
143 ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
144 int num_bases, char **basenames,
146 zint *hits, int *estimated_hit_count)
153 *estimated_hit_count = 0;
155 zebraSet = resultSetAdd(zh, setname, 1);
158 zebraSet->locked = 1;
161 zebraSet->rset_nmem = nmem_create();
163 zebraSet->num_bases = num_bases;
164 zebraSet->basenames =
165 nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames));
166 for (i = 0; i<num_bases; i++)
167 zebraSet->basenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]);
169 res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
171 *hits = zebraSet->hits;
172 if (zebraSet->estimated_hit_count)
173 *estimated_hit_count = 1;
177 zebraSet->locked = 0;
183 void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type,
184 const char *db, const char *index_name,
187 assert(zh); /* compiler shut up */
189 s->nmem = nmem_create();
190 if (!s->term_entries)
193 s->term_entries_max = 1000;
195 nmem_malloc(s->nmem, s->term_entries_max *
196 sizeof(*s->term_entries));
197 for (i = 0; i < s->term_entries_max; i++)
198 s->term_entries[i].term = 0;
200 if (s->hits < s->term_entries_max)
202 s->term_entries[s->hits].reg_type = reg_type;
203 s->term_entries[s->hits].db = nmem_strdup(s->nmem, db);
204 s->term_entries[s->hits].index_name = nmem_strdup(s->nmem, index_name);
205 s->term_entries[s->hits].term = nmem_strdup(s->nmem, term);
210 ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov)
215 for (s = zh->sets; s; s = s->next)
216 if (!strcmp(s->name, name))
223 yaz_log(log_level_resultsets, "updating result set %s", name);
224 if (!ov || s->locked)
229 rset_close(s->cache_rfd);
230 rset_delete(s->rset);
233 nmem_destroy(s->rset_nmem);
235 nmem_destroy(s->nmem);
239 const char *sort_max_str = zebra_get_resource(zh, "sortmax", "1000");
241 yaz_log(log_level_resultsets, "adding result set %s", name);
242 s = (ZebraSet) xmalloc(sizeof(*s));
245 s->name = xstrdup(name);
247 s->sort_info = (struct zset_sort_info *)
248 xmalloc(sizeof(*s->sort_info));
249 s->sort_info->max_entries = atoi(sort_max_str);
250 if (s->sort_info->max_entries < 2)
251 s->sort_info->max_entries = 2;
253 s->sort_info->entries = (struct zset_sort_entry **)
254 xmalloc(sizeof(*s->sort_info->entries) *
255 s->sort_info->max_entries);
256 s->sort_info->all_entries = (struct zset_sort_entry *)
257 xmalloc(sizeof(*s->sort_info->all_entries) *
258 s->sort_info->max_entries);
259 for (i = 0; i < s->sort_info->max_entries; i++)
260 s->sort_info->entries[i] = s->sort_info->all_entries + i;
270 s->cache_position = 0;
272 s->approx_limit = zh->approx_limit;
273 s->estimated_hit_count = 0;
277 ZebraSet resultSetGet(ZebraHandle zh, const char *name)
281 for (s = zh->sets; s; s = s->next)
282 if (!strcmp(s->name, name))
284 if (!s->term_entries && !s->rset && s->rpn)
286 NMEM nmem = nmem_create();
287 yaz_log(log_level_resultsets, "research %s", name);
289 s->rset_nmem = nmem_create();
290 resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
291 if (s->rset && s->sortSpec)
294 yaz_log(log_level_resultsets, "resort %s", name);
295 resultSetSortSingle(zh, nmem, s, s->rset, s->sortSpec,
305 ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname,
306 const char ***basenames, int *num_bases)
308 ZebraSet sset = resultSetGet(zh, setname);
311 *basenames = sset->basenames;
312 *num_bases = sset->num_bases;
317 void resultSetInvalidate(ZebraHandle zh)
319 ZebraSet s = zh->sets;
321 yaz_log(log_level_resultsets, "invalidating result sets");
322 for (; s; s = s->next)
327 rset_close(s->cache_rfd);
328 rset_delete(s->rset);
332 s->cache_position = 0;
334 nmem_destroy(s->rset_nmem);
339 void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses)
341 ZebraSet * ss = &zh->sets;
345 for (i = 0; i<num; i++)
346 statuses[i] = Z_DeleteStatus_resultSetDidNotExist;
353 for (i = 0; i<num; i++)
354 if (!strcmp(s->name, names[i]))
357 statuses[i] = Z_DeleteStatus_success;
366 xfree(s->sort_info->all_entries);
367 xfree(s->sort_info->entries);
371 nmem_destroy(s->nmem);
375 rset_close(s->cache_rfd);
376 rset_delete(s->rset);
379 nmem_destroy(s->rset_nmem);
388 ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh,
393 zint *pos = pos_small;
397 if (num > 10000 || num <= 0)
401 pos = xmalloc(sizeof(*pos) * num);
403 for (i = 0; i<num; i++)
406 mr = zebra_meta_records_create(zh, name, num, pos);
413 ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name,
414 int num, zint *positions)
417 ZebraMetaRecord *sr = 0;
420 struct zset_sort_info *sort_info;
421 size_t sysno_mem_index = 0;
423 if (zh->m_staticrank)
428 if (!(sset = resultSetGet(zh, name)))
430 if (!(rset = sset->rset))
432 if (!sset->term_entries)
434 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
435 for (i = 0; i<num; i++)
442 if (positions[i] <= sset->term_entries_max)
444 sr[i].term = sset->term_entries[positions[i]-1].term;
445 sr[i].db = sset->term_entries[positions[i]-1].db;
451 sr = (ZebraMetaRecord *) xmalloc(sizeof(*sr) * num);
452 for (i = 0; i<num; i++)
459 sort_info = sset->sort_info;
464 for (i = 0; i<num; i++)
466 position = positions[i];
467 if (position > 0 && position <= sort_info->num_entries)
469 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT
470 " (sorted)", position);
471 sr[i].sysno = sort_info->entries[position-1]->sysno;
472 sr[i].score = sort_info->entries[position-1]->score;
476 /* did we really get all entries using sort ? */
477 for (i = 0; i<num; i++)
482 if (i < num) /* nope, get the rest, unsorted - sorry */
491 position = sort_info->num_entries;
492 while (num_i < num && positions[num_i] <= position)
495 if (sset->cache_rfd &&
496 num_i < num && positions[num_i] > sset->cache_position)
498 position = sset->cache_position;
499 rfd = sset->cache_rfd;
500 psysno = sset->cache_psysno;
505 rset_close(sset->cache_rfd);
506 rfd = rset_open(rset, RSETF_READ);
508 while (num_i < num && rset_read(rfd, &key, 0))
510 zint this_sys = key.mem[sysno_mem_index];
511 if (this_sys != psysno)
516 /* determine we alreay have this in our set */
517 for (i = sort_info->num_entries; --i >= 0; )
518 if (psysno == sort_info->entries[i]->sysno)
525 if (position == positions[num_i])
527 sr[num_i].sysno = psysno;
528 yaz_log(log_level_sort, "got pos=" ZINT_FORMAT " (unsorted)", position);
529 sr[num_i].score = -1;
534 sset->cache_position = position;
535 sset->cache_psysno = psysno;
536 sset->cache_rfd = rfd;
542 void zebra_meta_records_destroy(ZebraHandle zh, ZebraMetaRecord *records,
545 assert(zh); /* compiler shut up about unused arg */
551 int *ord; /* array of ord for each database searched */
552 int *numerical; /* array of ord for each database searched */
553 const char *index_type;
556 void resultSetInsertSort(ZebraHandle zh, ZebraSet sset,
557 struct sortKeyInfo *criteria, int num_criteria,
559 char *cmp_buf[], char *tmp_cmp_buf[], int *cached_success_db)
561 struct zset_sort_entry *new_entry = NULL;
562 struct zset_sort_info *sort_info = sset->sort_info;
564 int scan_db,scan_count;
565 int numbases = zh->num_basenames;
567 zebra_sort_sysno(zh->reg->sort_index, sysno);
568 for (i = 0; i<num_criteria; i++)
570 char *this_entry_buf = tmp_cmp_buf[i];
571 memset(this_entry_buf, '\0', SORT_IDX_ENTRYSIZE);
573 /* if the first database doesn't have a sort index,
574 we assume none of them will */
575 if (criteria[i].ord[0] != -1)
577 /* now make a best guess for the database in which we think
578 the record is located if its not in our best guess, try the
579 other databases one by one, till we had them all */
580 scan_db = *cached_success_db;
586 if (scan_count>numbases)
588 /* well...we scanned all databases and still nothing...give up */
589 yaz_log(log_level_sort, "zebra_sort_read failed (record not found in indices)");
593 /* the criteria[i].ord is the file id of the sort index */
594 yaz_log(log_level_sort, "pre zebra_sort_type ord is %d", criteria[i].ord[scan_db]);
595 zebra_sort_type(zh->reg->sort_index, criteria[i].ord[scan_db]);
596 if (zebra_sort_read(zh->reg->sort_index, this_entry_buf))
598 /* allright, found it */
599 /* cache this db so we start trying from this db
601 *cached_success_db=scan_db;
606 yaz_log(log_level_sort, "record not found in database, trying next one");
608 if (scan_db>=numbases)
616 yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index");
619 i = sort_info->num_entries;
623 for (j = 0; j<num_criteria; j++)
625 char *this_entry_buf = tmp_cmp_buf[j];
626 char *other_entry_buf =
627 cmp_buf[j] + i * SORT_IDX_ENTRYSIZE;
628 if (criteria[j].numerical[*cached_success_db])
630 char this_entry_org[1024];
631 char other_entry_org[1024];
633 /* when searching multiple databases, we use the index
634 type of the first one. So if they differ between
635 databases, we have a problem here we could store the
636 index_type for each database, but if we didn't find the
637 record in any sort index, then we still don't know to
638 which database it belongs. */
639 const char *index_type = criteria[j].index_type;
640 zebra_term_untrans(zh, index_type, this_entry_org,
642 zebra_term_untrans(zh, index_type, other_entry_org,
644 diff = atof(this_entry_org) - atof(other_entry_org);
655 rel = memcmp(this_entry_buf, other_entry_buf,
658 /* when the compare is equal, continue to next criteria,
665 if (criteria[j].relation == 'A')
670 else if (criteria[j].relation == 'D')
677 yaz_log(log_level_sort, "ok, we want to insert record at position %d",i);
678 j = sort_info->max_entries;
680 yaz_log(log_level_sort, "sort_info->max_entries reached (%d) abort sort",j);
684 if (sort_info->num_entries == j)
687 j = (sort_info->num_entries)++;
688 new_entry = sort_info->entries[j];
689 /* move up all higher entries (to make room) */
693 for (k = 0; k<num_criteria; k++)
695 char *j_buf = cmp_buf[k] + j * SORT_IDX_ENTRYSIZE;
696 char *j_1_buf = cmp_buf[k] + (j-1) * SORT_IDX_ENTRYSIZE;
697 memcpy(j_buf, j_1_buf, SORT_IDX_ENTRYSIZE);
699 sort_info->entries[j] = sort_info->entries[j-1];
702 /* and insert the new entry at the correct place */
703 sort_info->entries[i] = new_entry;
705 /* and add this to the compare buffer */
706 for (i = 0; i<num_criteria; i++)
708 char *new_entry_buf = cmp_buf[i] + j * SORT_IDX_ENTRYSIZE;
709 char *this_entry_buf = tmp_cmp_buf[i];
710 memcpy(new_entry_buf, this_entry_buf, SORT_IDX_ENTRYSIZE);
712 new_entry->sysno = sysno;
713 new_entry->score = -1;
716 void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info,
717 zint sysno, int score, int relation)
719 struct zset_sort_entry *new_entry = NULL;
721 assert(zh); /* compiler shut up about unused arg */
723 i = sort_info->num_entries;
728 rel = score - sort_info->entries[i]->score;
735 else if (relation == 'A')
742 j = sort_info->max_entries;
746 if (sort_info->num_entries == j)
749 j = (sort_info->num_entries)++;
751 new_entry = sort_info->entries[j];
754 sort_info->entries[j] = sort_info->entries[j-1];
757 sort_info->entries[i] = new_entry;
759 new_entry->sysno = sysno;
760 new_entry->score = score;
763 static Z_RPNQuery *copy_RPNQuery(Z_RPNQuery *src, NMEM nmem)
766 ODR encode = odr_createmem(ODR_ENCODE);
767 ODR decode = odr_createmem(ODR_DECODE);
769 if (z_RPNQuery(encode, &src, 0, 0))
772 char *buf = odr_getbuf(encode, &len, 0);
776 odr_setbuf(decode, buf, len, 0);
777 z_RPNQuery(decode, &dst, 0, 0);
780 nmem_transfer(nmem, decode->mem);
786 static Z_SortKeySpecList *copy_SortKeySpecList(Z_SortKeySpecList *src, NMEM nmem)
788 Z_SortKeySpecList *dst = 0;
789 ODR encode = odr_createmem(ODR_ENCODE);
790 ODR decode = odr_createmem(ODR_DECODE);
792 if (z_SortKeySpecList(encode, &src, 0, 0))
795 char *buf = odr_getbuf(encode, &len, 0);
799 odr_setbuf(decode, buf, len, 0);
800 z_SortKeySpecList(decode, &dst, 0, 0);
803 nmem_transfer(nmem, decode->mem);
809 ZebraSet resultSetClone(ZebraHandle zh, const char *setname,
815 nset = resultSetAdd(zh, setname, 1);
819 nset->nmem = nmem_create();
821 nset->num_bases = rset->num_bases;
823 nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames));
824 for (i = 0; i<rset->num_bases; i++)
825 nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]);
828 nset->rset = rset_dup(rset->rset);
830 nset->rpn = copy_RPNQuery(rset->rpn, nset->nmem);
834 ZEBRA_RES resultSetSort(ZebraHandle zh, NMEM nmem,
835 int num_input_setnames, const char **input_setnames,
836 const char *output_setname,
837 Z_SortKeySpecList *sort_sequence, int *sort_status)
842 if (num_input_setnames == 0)
844 zebra_setError(zh, YAZ_BIB1_NO_RESULT_SET_NAME_SUPPLIED_ON_SORT, 0);
847 if (num_input_setnames > 1)
849 zebra_setError(zh, YAZ_BIB1_SORT_TOO_MANY_INPUT_RESULTS, 0);
854 yaz_log(log_level_sort, "result set sort input=%s output=%s",
855 *input_setnames, output_setname);
856 sset = resultSetGet(zh, input_setnames[0]);
859 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
863 if (!(rset = sset->rset))
865 zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
869 if (strcmp(output_setname, input_setnames[0]))
870 sset = resultSetClone(zh, output_setname, sset);
871 sset->sortSpec = copy_SortKeySpecList(sort_sequence, sset->nmem);
872 return resultSetSortSingle(zh, nmem, sset, rset, sort_sequence,
876 ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem,
877 ZebraSet sset, RSET rset,
878 Z_SortKeySpecList *sort_sequence,
883 int cached_success_db = 0;
888 struct sortKeyInfo sort_criteria[ZSET_SORT_MAX_LEVEL];
889 char *cmp_buf[ZSET_SORT_MAX_LEVEL];
890 char *tmp_cmp_buf[ZSET_SORT_MAX_LEVEL];
896 size_t sysno_mem_index = 0;
898 int numbases = zh->num_basenames;
899 yaz_log(log_level_sort, "searching %d databases",numbases);
901 if (zh->m_staticrank)
904 assert(nmem); /* compiler shut up about unused param */
905 sset->sort_info->num_entries = 0;
907 rset_getterms(rset, 0, 0, &n);
908 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
909 rset_getterms(rset, terms, n, &numTerms);
912 num_criteria = sort_sequence->num_specs;
913 if (num_criteria > ZSET_SORT_MAX_LEVEL)
914 num_criteria = ZSET_SORT_MAX_LEVEL;
915 /* set up the search criteria */
916 for (i = 0; i < num_criteria; i++)
918 Z_SortKeySpec *sks = sort_sequence->specs[i];
922 sort_criteria[i].ord = (int *)
923 nmem_malloc(nmem, sizeof(int)*numbases);
924 sort_criteria[i].numerical = (int *)
925 nmem_malloc(nmem, sizeof(int)*numbases);
927 /* initialize ord and numerical for each database */
928 for (ib = 0; ib < numbases; ib++)
930 sort_criteria[i].ord[ib] = -1;
931 sort_criteria[i].numerical[ib] = 0;
934 if (sks->which == Z_SortKeySpec_missingValueData)
936 zebra_setError(zh, YAZ_BIB1_UNSUPP_MISSING_DATA_ACTION, 0);
939 if (*sks->sortRelation == Z_SortKeySpec_ascending)
940 sort_criteria[i].relation = 'A';
941 else if (*sks->sortRelation == Z_SortKeySpec_descending)
942 sort_criteria[i].relation = 'D';
945 zebra_setError(zh, YAZ_BIB1_ILLEGAL_SORT_RELATION, 0);
948 if (sks->sortElement->which == Z_SortElement_databaseSpecific)
950 zebra_setError(zh, YAZ_BIB1_DATABASE_SPECIFIC_SORT_UNSUPP, 0);
953 else if (sks->sortElement->which != Z_SortElement_generic)
955 zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0);
958 sk = sks->sortElement->u.generic;
961 case Z_SortKey_sortField:
962 yaz_log(log_level_sort, "key %d is of type sortField", i+1);
963 for (ib = 0; ib < numbases; ib++)
965 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
966 sort_criteria[i].numerical[ib] = 0;
967 sort_criteria[i].ord[ib] =
968 zebraExplain_lookup_attr_str(zh->reg->zei,
969 zinfo_index_category_sort,
971 if (sks->which != Z_SortKeySpec_null
972 && sort_criteria[i].ord[ib] == -1)
975 YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
980 case Z_SortKey_elementSpec:
981 yaz_log(log_level_sort, "key %d is of type elementSpec", i+1);
982 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
984 case Z_SortKey_sortAttributes:
985 yaz_log(log_level_sort, "key %d is of type sortAttributes", i+1);
986 /* for every database we searched, get the sort index file
987 id (ord) and its numerical indication and store them in
989 for (ib = 0; ib < numbases; ib++)
991 zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]);
992 res = zebra_sort_get_ord(zh, sk->u.sortAttributes,
993 &sort_criteria[i].ord[ib],
994 &sort_criteria[i].numerical[ib]);
997 if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK)
1001 /* right now we look up the index type based on the first database
1002 if the index_type's can differ between the indexes of different
1003 databases (which i guess they can?) then we have to store the
1004 index types for each database, just like the ord and numerical */
1005 if (zebraExplain_lookup_ord(zh->reg->zei, sort_criteria[i].ord[0],
1006 &sort_criteria[i].index_type,
1009 zebra_setError(zh, YAZ_BIB1_CANNOT_SORT_ACCORDING_TO_SEQUENCE, 0);
1013 /* allocate space for each cmpare buf + one extra for tmp comparison */
1014 /* cmp_buf is an array of array, the first dimension is the criteria and the second dimension are
1015 all other result entries to compare against. This is slowly filled when records are processed.
1016 tmp_cmp_buf is an array with a value of the current record for each criteria
1018 for (i = 0; i<num_criteria; i++)
1020 cmp_buf[i] = xmalloc(sset->sort_info->max_entries
1021 * SORT_IDX_ENTRYSIZE);
1022 tmp_cmp_buf[i] = xmalloc(SORT_IDX_ENTRYSIZE);
1024 rfd = rset_open(rset, RSETF_READ);
1025 while (rset_read(rfd, &key, &termid))
1027 zint this_sys = key.mem[sysno_mem_index];
1028 if (log_level_searchhits)
1029 key_logdump_txt(log_level_searchhits, &key, termid->name);
1031 if (this_sys != psysno)
1033 if ((sset->hits & 255) == 0 && zh->break_handler_func)
1035 if (zh->break_handler_func(zh->break_handler_data))
1037 rset_set_hits_limit(rset, 0);
1043 resultSetInsertSort(zh, sset,
1044 sort_criteria, num_criteria, psysno, cmp_buf,
1045 tmp_cmp_buf, &cached_success_db);
1050 /* free the compare buffers */
1051 for (i = 0; i<num_criteria; i++)
1054 xfree(tmp_cmp_buf[i]);
1057 yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort",
1059 for (i = 0; i < numTerms; i++)
1060 yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT,
1061 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1062 *sort_status = Z_SortResponse_success;
1066 RSET resultSetRef(ZebraHandle zh, const char *resultSetId)
1070 if ((s = resultSetGet(zh, resultSetId)))
1075 ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet,
1076 RSET rset, NMEM nmem)
1085 ZebraRankClass rank_class;
1086 struct zset_sort_info *sort_info;
1087 const char *rank_handler_name = res_get_def(zh->res, "rank", "rank-1");
1088 size_t sysno_mem_index = 0;
1090 if (zh->m_staticrank)
1091 sysno_mem_index = 1;
1095 sort_info = zebraSet->sort_info;
1096 sort_info->num_entries = 0;
1098 zebraSet->estimated_hit_count = 0;
1099 rset_getterms(rset, 0, 0, &n);
1100 terms = (TERMID *) nmem_malloc(nmem, sizeof(*terms)*n);
1101 rset_getterms(rset, terms, n, &numTerms);
1103 rank_class = zebraRankLookup(zh, rank_handler_name);
1106 yaz_log(YLOG_WARN, "No such rank handler: %s", rank_handler_name);
1107 zebra_setError(zh, YAZ_BIB1_UNSUPP_SEARCH, "Cannot find rank handler");
1112 RSFD rfd = rset_open(rset, RSETF_READ);
1113 struct rank_control *rc = rank_class->control;
1116 void *handle = (*rc->begin) (zh->reg, rank_class->class_handle, rset,
1117 nmem, terms, numTerms);
1118 zint psysno = 0; /* previous doc id / sys no */
1119 zint pstaticrank = 0; /* previous static rank */
1121 while (rset_read(rfd, &key, &termid))
1123 zint this_sys = key.mem[sysno_mem_index];
1125 zint seqno = key.mem[key.len-1];
1127 if (log_level_searchhits)
1128 key_logdump_txt(log_level_searchhits, &key, termid->name);
1129 if (this_sys != psysno)
1130 { /* new record .. */
1131 if (!(rfd->counted_items & 255) && zh->break_handler_func)
1133 if (zh->break_handler_func(zh->break_handler_data))
1135 yaz_log(YLOG_LOG, "Aborted search");
1139 if (rfd->counted_items > rset->hits_limit)
1142 { /* only if we did have a previous record */
1143 score = (*rc->calc)(handle, psysno, pstaticrank,
1145 /* insert the hit. A=Ascending */
1146 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1151 zebraSet->estimated_hit_count = 1;
1152 rset_set_hits_limit(rset, 0);
1156 if (zh->m_staticrank)
1157 pstaticrank = key.mem[0];
1159 (*rc->add)(handle, CAST_ZINT_TO_INT(seqno), termid);
1163 { /* we had - at least - one record */
1164 score = (*rc->calc)(handle, psysno, pstaticrank, &stop_flag);
1165 /* insert the hit. A=Ascending */
1166 resultSetInsertRank(zh, sort_info, psysno, score, 'A');
1169 (*rc->end)(zh->reg, handle);
1172 zebraSet->hits = rset->hits_count;
1174 yaz_log(log_level_searchterms, ZINT_FORMAT " keys, "
1175 ZINT_FORMAT " sysnos, rank", kno, zebraSet->hits);
1176 for (i = 0; i < numTerms; i++)
1178 yaz_log(log_level_searchterms, "term=\"%s\" type=%s count="
1180 terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count);
1185 ZebraRankClass zebraRankLookup(ZebraHandle zh, const char *name)
1187 ZebraRankClass p = zh->reg->rank_classes;
1188 while (p && strcmp(p->control->name, name))
1190 if (p && !p->init_flag)
1192 if (p->control->create)
1193 p->class_handle = (*p->control->create)(zh);
1199 void zebraRankInstall(struct zebra_register *reg, struct rank_control *ctrl)
1201 ZebraRankClass p = (ZebraRankClass) xmalloc(sizeof(*p));
1202 p->control = (struct rank_control *) xmalloc(sizeof(*p->control));
1203 memcpy(p->control, ctrl, sizeof(*p->control));
1204 p->control->name = xstrdup(ctrl->name);
1206 p->next = reg->rank_classes;
1207 reg->rank_classes = p;
1210 void zebraRankDestroy(struct zebra_register *reg)
1212 ZebraRankClass p = reg->rank_classes;
1215 ZebraRankClass p_next = p->next;
1216 if (p->init_flag && p->control->destroy)
1217 (*p->control->destroy)(reg, p->class_handle);
1218 xfree(p->control->name);
1223 reg->rank_classes = NULL;
1226 static int trav_rset_for_termids(RSET rset, TERMID *termid_array,
1227 zint *hits_array, int *approx_array)
1231 for (i = 0; i<rset->no_children; i++)
1232 no += trav_rset_for_termids(rset->children[i],
1233 (termid_array ? termid_array + no : 0),
1234 (hits_array ? hits_array + no : 0),
1235 (approx_array ? approx_array + no : 0));
1239 termid_array[no] = rset->term;
1241 hits_array[no] = rset->hits_count;
1243 approx_array[no] = rset->hits_approx;
1245 yaz_log(YLOG_LOG, "rset=%p term=%s limit=" ZINT_FORMAT
1246 " count=" ZINT_FORMAT,
1247 rset, rset->term->name, rset->hits_limit, rset->hits_count);
1254 ZEBRA_RES zebra_result_set_term_no(ZebraHandle zh, const char *setname,
1257 ZebraSet sset = resultSetGet(zh, setname);
1261 *num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1267 ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname,
1268 int no, zint *count, int *approx,
1269 char *termbuf, size_t *termlen,
1270 const char **term_ref_id)
1272 ZebraSet sset = resultSetGet(zh, setname);
1275 int num_terms = trav_rset_for_termids(sset->rset, 0, 0, 0);
1276 if (no >= 0 && no < num_terms)
1278 TERMID *term_array = xmalloc(num_terms * sizeof(*term_array));
1279 zint *hits_array = xmalloc(num_terms * sizeof(*hits_array));
1280 int *approx_array = xmalloc(num_terms * sizeof(*approx_array));
1282 trav_rset_for_termids(sset->rset, term_array,
1283 hits_array, approx_array);
1286 *count = hits_array[no];
1288 *approx = approx_array[no];
1291 char *inbuf = term_array[no]->name;
1292 size_t inleft = strlen(inbuf);
1293 size_t outleft = *termlen - 1;
1295 if (zh->iconv_from_utf8 != 0)
1297 char *outbuf = termbuf;
1300 ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft,
1302 if (ret == (size_t)(-1))
1306 yaz_iconv(zh->iconv_from_utf8, 0, 0,
1308 *termlen = outbuf - termbuf;
1313 if (inleft > outleft)
1316 memcpy(termbuf, inbuf, *termlen);
1318 termbuf[*termlen] = '\0';
1321 *term_ref_id = term_array[no]->ref_id;
1325 xfree(approx_array);
1332 ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname,
1333 zint sysno, zebra_snippets *snippets)
1335 ZebraSet sset = resultSetGet(zh, setname);
1336 yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT,
1342 struct rset_key_control *kc = zebra_key_control_create(zh);
1343 NMEM nmem = nmem_create();
1345 RSET rsets[2], rset_comb;
1346 RSET rset_temp = rset_create_temp(nmem, kc, kc->scope,
1347 res_get(zh->res, "setTmpDir"),0 );
1350 RSFD rsfd = rset_open(rset_temp, RSETF_WRITE);
1357 rset_write(rsfd, &key);
1360 rsets[0] = rset_temp;
1361 rsets[1] = rset_dup(sset->rset);
1363 rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets);
1365 rsfd = rset_open(rset_comb, RSETF_READ);
1367 while (rset_read(rsfd, &key, &termid))
1371 struct ord_list *ol;
1372 for (ol = termid->ol; ol; ol = ol->next)
1374 zebra_snippets_append(snippets, key.mem[key.len-1], 0,
1375 ol->ord, termid->name);
1381 rset_delete(rset_comb);
1388 static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh,
1389 const char **basenames, int num_bases,
1391 zint *sysnos, int *no_sysnos)
1393 ZEBRA_RES res = ZEBRA_OK;
1394 int sysnos_offset = 0;
1397 if (!zh->reg->isamb || !zh->m_segment_indexing)
1399 if (sysnos_offset < *no_sysnos)
1405 for (i = 0; res == ZEBRA_OK && i < num_bases; i++)
1407 const char *database = basenames[i];
1408 if (zebraExplain_curDatabase(zh->reg->zei, database) == 0)
1410 const char *index_type = "w";
1411 const char *use_string = "_ALLRECORDS";
1413 zinfo_index_category_t cat = zinfo_index_category_alwaysmatches;
1414 ord = zebraExplain_lookup_attr_str(zh->reg->zei, cat,
1415 index_type, use_string);
1419 int ord_len = key_SU_encode(ord, ord_buf);
1422 ord_buf[ord_len] = '\0';
1424 info = dict_lookup(zh->reg->dict, ord_buf);
1427 if (*info != sizeof(ISAM_P))
1435 struct it_key key_until, key_found;
1439 memcpy(&isam_p, info+1, sizeof(ISAM_P));
1441 pt = isamb_pp_open(zh->reg->isamb, isam_p, 2);
1446 key_until.mem[i++] = recid;
1447 key_until.mem[i++] = 0; /* section_id */
1448 if (zh->m_segment_indexing)
1449 key_until.mem[i++] = 0; /* segment */
1450 key_until.mem[i++] = 0;
1453 r = isamb_pp_forward(pt, &key_found, &key_until);
1454 while (r && key_found.mem[0] == recid)
1456 if (sysnos_offset < *no_sysnos)
1457 sysnos[sysnos_offset++] =
1458 key_found.mem[key_found.len-1];
1459 r = isamb_pp_read(pt, &key_found);
1469 *no_sysnos = sysnos_offset;
1473 ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh,
1474 const char *setname,
1476 zint *sysnos, int *no_sysnos)
1478 const char **basenames;
1482 res = resultSetGetBaseNames(zh, setname, &basenames, &num_bases);
1483 if (res != ZEBRA_OK)
1486 return zebra_recid_to_sysno(zh, basenames, num_bases,
1487 recid, sysnos, no_sysnos);
1490 void zebra_count_set(ZebraHandle zh, RSET rset, zint *count,
1497 yaz_log(YLOG_DEBUG, "count_set");
1499 rset->hits_limit = approx_limit;
1502 rfd = rset_open(rset, RSETF_READ);
1503 while (rset_read(rfd, &key,0 /* never mind terms */))
1505 if (key.mem[0] != psysno)
1507 psysno = key.mem[0];
1508 if (rfd->counted_items >= rset->hits_limit)
1513 *count = rset->hits_count;
1520 * indent-tabs-mode: nil
1522 * vim: shiftwidth=4 tabstop=8 expandtab