From f1a944fbdac20e88bf55918f2a4f66c301d684e2 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 20 Mar 2002 20:24:29 +0000 Subject: [PATCH] Hits per term. Returned in SearchResult-1 --- README | 4 +- include/rset.h | 6 +- include/rsisamc.h | 4 +- include/rstemp.h | 6 +- index/trunc.c | 8 ++- index/zebraapi.c | 4 +- index/zrpn.c | 5 +- index/zserver.c | 75 ++++++++++++++++++++++++- index/zserver.h | 5 +- index/zsets.c | 159 +++++++++++++---------------------------------------- rset/rsbool.c | 62 +-------------------- rset/rset.c | 8 ++- rset/rsisamc.c | 29 +++++++++- rset/rsm_or.c | 62 +++++++++------------ rset/rstemp.c | 21 ++++++- 15 files changed, 226 insertions(+), 232 deletions(-) diff --git a/README b/README index 37e129b..7d73fbd 100644 --- a/README +++ b/README @@ -2,7 +2,7 @@ Copyright (C) 1995-2002, Index Data ApS. See the file LICENSE.zebra for details. README for Zebra - $Id: README,v 1.11 2002-03-15 20:45:33 adam Exp $ + $Id: README,v 1.12 2002-03-20 20:24:29 adam Exp $ Documentation about this software can be found in the subdirectory 'doc' and Zebra's home page: http://www.indexdata.dk/zebra/ @@ -13,4 +13,4 @@ We maintain a mailing-list for the purpose of announcing new versions of the software, bug-reports, discussion etc. You can sign up by sending mail to zebra-request@indexdata.dk and include the following command in your email: - subscribe zebra-l + subscribe zebra-l diff --git a/include/rset.h b/include/rset.h index 6baacf4..65198ed 100644 --- a/include/rset.h +++ b/include/rset.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: rset.h,v $ - * Revision 1.16 1999-02-02 14:50:38 adam + * Revision 1.17 2002-03-20 20:24:29 adam + * Hits per term. Returned in SearchResult-1 + * + * Revision 1.16 1999/02/02 14:50:38 adam * Updated WIN32 code specific sections. Changed header. * * Revision 1.15 1998/03/05 08:37:44 adam @@ -96,6 +99,7 @@ struct rset_term { char *name; int nn; char *flags; + int count; }; typedef struct rset diff --git a/include/rsisamc.h b/include/rsisamc.h index 430a11f..d443c15 100644 --- a/include/rsisamc.h +++ b/include/rsisamc.h @@ -3,7 +3,7 @@ * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * - * $Id: rsisamc.h,v 1.5 1999-05-12 13:08:06 adam Exp $ + * $Id: rsisamc.h,v 1.6 2002-03-20 20:24:29 adam Exp $ */ #ifndef RSET_ISAMC_H @@ -20,6 +20,8 @@ extern const struct rset_control *rset_kind_isamc; typedef struct rset_isamc_parms { + int (*cmp)(const void *p1, const void *p2); + int key_size; ISAMC is; ISAMC_P pos; RSET_TERM rset_term; diff --git a/include/rstemp.h b/include/rstemp.h index 63c3b7e..4f62164 100644 --- a/include/rstemp.h +++ b/include/rstemp.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: rstemp.h,v $ - * Revision 1.6 1999-02-02 14:50:43 adam + * Revision 1.7 2002-03-20 20:24:29 adam + * Hits per term. Returned in SearchResult-1 + * + * Revision 1.6 1999/02/02 14:50:43 adam * Updated WIN32 code specific sections. Changed header. * * Revision 1.5 1998/03/05 08:37:44 adam @@ -39,6 +42,7 @@ extern const struct rset_control *rset_kind_temp; typedef struct rset_temp_parms { + int (*cmp)(const void *p1, const void *p2); int key_size; char *temp_path; RSET_TERM rset_term; diff --git a/index/trunc.c b/index/trunc.c index 167c562..954302c 100644 --- a/index/trunc.c +++ b/index/trunc.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: trunc.c,v $ - * Revision 1.19 2001-01-16 16:56:15 heikki + * Revision 1.20 2002-03-20 20:24:29 adam + * Hits per term. Returned in SearchResult-1 + * + * Revision 1.19 2001/01/16 16:56:15 heikki * Searching in my isam-d * * Revision 1.18 2000/05/18 12:01:36 adam @@ -190,6 +193,7 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length, RSFD result_rsfd; rset_temp_parms parms; + parms.cmp = key_compare_it; parms.key_size = sizeof(struct it_key); parms.temp_path = res_get (zi->service->res, "setTmpDir"); parms.rset_term = rset_term_create (term, length, flags); @@ -555,6 +559,8 @@ RSET rset_trunc (ZebraHandle zi, ISAMS_P *isam_p, int no, { rset_isamc_parms parms; + parms.key_size = sizeof(struct it_key); + parms.cmp = key_compare_it; parms.pos = *isam_p; parms.is = zi->service->isamc; parms.rset_term = rset_term_create (term, length, flags); diff --git a/index/zebraapi.c b/index/zebraapi.c index 817b803..4875585 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2002, Index Data * All rights reserved. * - * $Id: zebraapi.c,v 1.47 2002-03-15 20:11:36 adam Exp $ + * $Id: zebraapi.c,v 1.48 2002-03-20 20:24:29 adam Exp $ */ #include @@ -459,6 +459,8 @@ void zebra_search_rpn (ZebraHandle zh, ODR stream, ODR decode, logf(LOG_APP,"SEARCH:%d:",zh->hits); } + + void zebra_records_retrieve (ZebraHandle zh, ODR stream, const char *setname, Z_RecordComposition *comp, oid_value input_format, int num_recs, diff --git a/index/zrpn.c b/index/zrpn.c index 0f05b2d..4de4914 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -3,7 +3,7 @@ * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * - * $Id: zrpn.c,v 1.109 2002-02-18 11:46:58 adam Exp $ + * $Id: zrpn.c,v 1.110 2002-03-20 20:24:29 adam Exp $ */ #include #include @@ -1136,6 +1136,7 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, parms.rset_term = rset_term_create (prox_term, length_prox_term, flags); parms.rset_term->nn = min_nn; + parms.cmp = key_compare_it; parms.key_size = sizeof (struct it_key); parms.temp_path = res_get (zh->service->res, "setTmpDir"); result = rset_create (rset_kind_temp, &parms); @@ -1194,6 +1195,7 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no, parms.rset_term = rset_term_create (prox_term, length_prox_term, flags); parms.rset_term->nn = min_nn; + parms.cmp = key_compare_it; parms.key_size = sizeof (struct it_key); parms.temp_path = res_get (zh->service->res, "setTmpDir"); result = rset_create (rset_kind_temp, &parms); @@ -1769,6 +1771,7 @@ static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt, rset_temp_parms parms; parms.rset_term = rset_term_create (termz, -1, rank_type); + parms.cmp = key_compare_it; parms.key_size = sizeof (struct it_key); parms.temp_path = res_get (zh->service->res, "setTmpDir"); result = rset_create (rset_kind_temp, &parms); diff --git a/index/zserver.c b/index/zserver.c index 1df6afd..c935479 100644 --- a/index/zserver.c +++ b/index/zserver.c @@ -2,7 +2,7 @@ * Copyright (C) 1995-2000, Index Data * All rights reserved. * - * $Id: zserver.c,v 1.83 2002-02-20 17:30:01 adam Exp $ + * $Id: zserver.c,v 1.84 2002-03-20 20:24:30 adam Exp $ */ #include @@ -99,6 +99,76 @@ bend_initresult *bend_init (bend_initrequest *q) return r; } +static void search_terms (ZebraHandle zh, bend_search_rr *r) +{ + int count; + int no_terms; + int i; + struct Z_External *ext; + Z_SearchInfoReport *sr; + + /* get no of terms for result set */ + zebra_resultSetTerms (zh, r->setname, -1, &count, &no_terms); + if (!no_terms) + return; + + r->search_info = odr_malloc (r->stream, sizeof(*r->search_info)); + + r->search_info->num_elements = 1; + r->search_info->list = + odr_malloc (r->stream, sizeof(*r->search_info->list)); + r->search_info->list[0] = + odr_malloc (r->stream, sizeof(**r->search_info->list)); + r->search_info->list[0]->category = 0; + r->search_info->list[0]->which = Z_OtherInfo_externallyDefinedInfo; + ext = odr_malloc (r->stream, sizeof(*ext)); + r->search_info->list[0]->information.externallyDefinedInfo = ext; + ext->direct_reference = + yaz_oidval_to_z3950oid (r->stream, CLASS_USERINFO, VAL_SEARCHRES1); + ext->indirect_reference = 0; + ext->descriptor = 0; + ext->which = Z_External_searchResult1; + sr = odr_malloc (r->stream, sizeof(Z_SearchInfoReport)); + ext->u.searchResult1 = sr; + sr->num = no_terms; + sr->elements = odr_malloc (r->stream, sr->num * + sizeof(*sr->elements)); + for (i = 0; isetname, i, + &count, &no_terms); + + sr->elements[i] = odr_malloc (r->stream, sizeof(**sr->elements)); + sr->elements[i]->subqueryId = 0; + sr->elements[i]->fullQuery = odr_malloc (r->stream, + sizeof(bool_t)); + *sr->elements[i]->fullQuery = 0; + sr->elements[i]->subqueryExpression = + odr_malloc (r->stream, sizeof(Z_QueryExpression)); + sr->elements[i]->subqueryExpression->which = + Z_QueryExpression_term; + sr->elements[i]->subqueryExpression->u.term = + odr_malloc (r->stream, sizeof(Z_QueryExpressionTerm)); + term = odr_malloc (r->stream, sizeof(Z_Term)); + sr->elements[i]->subqueryExpression->u.term->queryTerm = term; + + term->which = Z_Term_general; + term->u.general = odr_malloc (r->stream, sizeof(Odr_oct)); + term->u.general->buf = odr_strdup (r->stream, termz); + + term->u.general->len = strlen (termz); + term->u.general->size = strlen (termz); + + sr->elements[i]->subqueryExpression->u.term->termComment = 0; + sr->elements[i]->subqueryInterpretation = 0; + sr->elements[i]->subqueryRecommendation = 0; + sr->elements[i]->subqueryCount = odr_intdup (r->stream, count); + sr->elements[i]->subqueryWeight = 0; + sr->elements[i]->resultsByDB = 0; + } +} + int bend_search (void *handle, bend_search_rr *r) { ZebraHandle zh = (ZebraHandle) handle; @@ -116,6 +186,8 @@ int bend_search (void *handle, bend_search_rr *r) r->errcode = zh->errCode; r->errstring = zh->errString; r->hits = zh->hits; + if (!r->errcode) + search_terms (zh, r); break; case Z_Query_type_2: r->errcode = 107; @@ -127,6 +199,7 @@ int bend_search (void *handle, bend_search_rr *r) return 0; } + int bend_fetch (void *handle, bend_fetch_rr *r) { ZebraHandle zh = (ZebraHandle) handle; diff --git a/index/zserver.h b/index/zserver.h index 77c7db6..ba264e8 100644 --- a/index/zserver.h +++ b/index/zserver.h @@ -3,7 +3,7 @@ * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * - * $Id: zserver.h,v 1.52 2002-02-20 17:30:01 adam Exp $ + * $Id: zserver.h,v 1.53 2002-03-20 20:24:30 adam Exp $ */ #if HAVE_SYS_TIMES_H @@ -164,6 +164,9 @@ RSET resultSetRef (ZebraHandle zh, Z_ResultSetId *resultSetId); void resultSetDestroy (ZebraHandle zh, int num_names, char **names, int *statuses); +const char *zebra_resultSetTerms (ZebraHandle zh, const char *setname, + int no, int *count, int *no_max); + ZebraPosSet zebraPosSetCreate (ZebraHandle zh, const char *name, int num, int *positions); void zebraPosSetDestroy (ZebraHandle zh, ZebraPosSet records, int num); diff --git a/index/zsets.c b/index/zsets.c index 2fed59c..2aeb072 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,118 +1,9 @@ /* - * Copyright (C) 1994-2000, Index Data + * Copyright (C) 1994-2002, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * - * $Log: zsets.c,v $ - * Revision 1.32 2002-02-20 17:30:01 adam - * Work on new API. Locking system re-implemented - * - * Revision 1.31 2001/11/19 23:05:22 adam - * Added a few prototypes. - * - * Revision 1.30 2001/10/15 19:53:43 adam - * POSIX thread updates. First work on term sets. - * - * Revision 1.29 2001/01/22 10:42:56 adam - * Added numerical sort. - * - * Revision 1.28 2000/07/07 12:49:20 adam - * Optimized resultSetInsert{Rank,Sort}. - * - * Revision 1.27 2000/04/05 09:49:36 adam - * On Unix, zebra/z'mbol uses automake. - * - * Revision 1.26 2000/03/20 19:08:36 adam - * Added remote record import using Z39.50 extended services and Segment - * Requests. - * - * Revision 1.25 2000/03/15 15:00:31 adam - * First work on threaded version. - * - * Revision 1.24 1999/11/04 15:00:45 adam - * Implemented delete result set(s). - * - * Revision 1.23 1999/05/26 07:49:13 adam - * C++ compilation. - * - * Revision 1.22 1999/02/02 14:51:15 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.21 1998/11/16 16:03:46 adam - * Moved loggin utilities to Yaz. Was implemented in file zlogs.c. - * - * Revision 1.20 1998/11/16 10:10:53 adam - * Fixed problem with zebraPosSetCreate that occurred when positions were - * less than 1. - * - * Revision 1.19 1998/09/22 10:48:22 adam - * Minor changes in search API. - * - * Revision 1.18 1998/09/22 10:03:45 adam - * Changed result sets to be persistent in the sense that they can - * be re-searched if needed. - * Fixed memory leak in rsm_or. - * - * Revision 1.17 1998/06/23 15:33:36 adam - * Added feature to specify sort criteria in query (type 7 specifies - * sort flags). - * - * Revision 1.16 1998/05/20 10:12:24 adam - * Implemented automatic EXPLAIN database maintenance. - * Modified Zebra to work with ASN.1 compiled version of YAZ. - * - * Revision 1.15 1998/03/05 08:45:14 adam - * New result set model and modular ranking system. Moved towards - * descent server API. System information stored as "SGML" records. - * - * Revision 1.14 1998/02/10 16:39:15 adam - * Minor change. - * - * Revision 1.13 1998/02/10 12:03:06 adam - * Implemented Sort. - * - * Revision 1.12 1997/09/25 14:57:36 adam - * Windows NT port. - * - * Revision 1.11 1996/12/23 15:30:46 adam - * Work on truncation. - * Bug fix: result sets weren't deleted after server shut down. - * - * Revision 1.10 1995/10/30 15:08:08 adam - * Bug fixes. - * - * Revision 1.9 1995/10/17 18:02:14 adam - * New feature: databases. Implemented as prefix to words in dictionary. - * - * Revision 1.8 1995/10/10 13:59:25 adam - * Function rset_open changed its wflag parameter to general flags. - * - * Revision 1.7 1995/10/06 14:38:01 adam - * New result set method: r_score. - * Local no (sysno) and score is transferred to retrieveCtrl. - * - * Revision 1.6 1995/09/28 09:19:49 adam - * xfree/xmalloc used everywhere. - * Extract/retrieve method seems to work for text records. - * - * Revision 1.5 1995/09/27 16:17:32 adam - * More work on retrieve. - * - * Revision 1.4 1995/09/07 13:58:36 adam - * New parameter: result-set file descriptor (RSFD) to support multiple - * positions within the same result-set. - * Boolean operators: and, or, not implemented. - * Result-set references. - * - * Revision 1.3 1995/09/06 16:11:19 adam - * Option: only one word key per file. - * - * Revision 1.2 1995/09/06 10:33:04 adam - * More work on present. Some log messages removed. - * - * Revision 1.1 1995/09/05 15:28:40 adam - * More work on search engine. - * + * $Id: zsets.c,v 1.33 2002-03-20 20:24:30 adam Exp $ */ #include #include @@ -220,6 +111,24 @@ void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type, (s->hits)++; } + +const char *zebra_resultSetTerms (ZebraHandle zh, const char *setname, + int no, int *count, int *no_max) +{ + ZebraSet s = resultSetGet (zh, setname); + + *count = 0; + *no_max = 0; + if (!s || !s->rset) + return 0; + *no_max = s->rset->no_rset_terms; + if (no < 0 || no >= *no_max) + return 0; + *count = s->rset->rset_terms[no]->count; + return s->rset->rset_terms[no]->name; +} + + ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov) { ZebraSet s; @@ -640,7 +549,7 @@ void resultSetSortSingle (ZebraHandle zh, NMEM nmem, int term_index; RSFD rfd; - logf (LOG_DEBUG, "resultSetSortSingle start"); + logf (LOG_LOG, "resultSetSortSingle start"); sset->sort_info->num_entries = 0; sset->hits = 0; @@ -715,8 +624,15 @@ void resultSetSortSingle (ZebraHandle zh, NMEM nmem, } rset_close (rset, rfd); + for (i = 0; i < rset->no_rset_terms; i++) + yaz_log (LOG_LOG, "term=\"%s\" nn=%d type=%s count=%d", + rset->rset_terms[i]->name, + rset->rset_terms[i]->nn, + rset->rset_terms[i]->flags, + rset->rset_terms[i]->count); + *sort_status = Z_SortStatus_success; - logf (LOG_DEBUG, "resultSetSortSingle end"); + logf (LOG_LOG, "resultSetSortSingle end"); } RSET resultSetRef (ZebraHandle zh, Z_ResultSetId *resultSetId) @@ -743,12 +659,7 @@ void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) zebraSet->hits = 0; rfd = rset_open (rset, RSETF_READ); - logf (LOG_DEBUG, "resultSetRank"); - for (i = 0; i < rset->no_rset_terms; i++) - logf (LOG_DEBUG, "term=\"%s\" cnt=%d type=%s", - rset->rset_terms[i]->name, - rset->rset_terms[i]->nn, - rset->rset_terms[i]->flags); + yaz_log (LOG_LOG, "resultSetRank"); rank_class = zebraRankLookup (zh, "rank-1"); rc = rank_class->control; @@ -779,7 +690,15 @@ void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset) (*rc->end) (zh, handle); } rset_close (rset, rfd); - logf (LOG_DEBUG, "%d keys, %d distinct sysnos", kno, zebraSet->hits); + + for (i = 0; i < rset->no_rset_terms; i++) + yaz_log (LOG_LOG, "term=\"%s\" nn=%d type=%s count=%d", + rset->rset_terms[i]->name, + rset->rset_terms[i]->nn, + rset->rset_terms[i]->flags, + rset->rset_terms[i]->count); + + yaz_log (LOG_LOG, "%d keys, %d distinct sysnos", kno, zebraSet->hits); } ZebraRankClass zebraRankLookup (ZebraHandle zh, const char *name) diff --git a/rset/rsbool.c b/rset/rsbool.c index bdc39e6..dce5eb2 100644 --- a/rset/rsbool.c +++ b/rset/rsbool.c @@ -1,67 +1,9 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2002, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * - * $Log: rsbool.c,v $ - * Revision 1.16 1999-05-26 07:49:14 adam - * C++ compilation. - * - * Revision 1.15 1999/02/02 14:51:32 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.14 1998/03/05 08:36:27 adam - * New result set model. - * - * Revision 1.13 1997/12/18 10:54:24 adam - * New method result set method rs_hits that returns the number of - * hits in result-set (if known). The ranked result set returns real - * number of hits but only when not combined with other operands. - * - * Revision 1.12 1997/10/31 12:37:01 adam - * Code calls xfree() instead of free(). - * - * Revision 1.11 1997/09/09 13:38:15 adam - * Partial port to WIN95/NT. - * - * Revision 1.10 1996/10/29 13:55:20 adam - * Include of zebrautl.h instead of alexutil.h. - * - * Revision 1.9 1995/12/11 09:15:22 adam - * New set types: sand/sor/snot - ranked versions of and/or/not in - * ranked/semi-ranked result sets. - * Note: the snot not finished yet. - * New rset member: flag. - * Bug fix: r_delete in rsrel.c did free bad memory block. - * - * Revision 1.8 1995/10/12 12:41:55 adam - * Private info (buf) moved from struct rset_control to struct rset. - * Bug fixes in relevance. - * - * Revision 1.7 1995/10/10 14:00:03 adam - * Function rset_open changed its wflag parameter to general flags. - * - * Revision 1.6 1995/10/06 14:38:05 adam - * New result set method: r_score. - * Local no (sysno) and score is transferred to retrieveCtrl. - * - * Revision 1.5 1995/09/08 14:52:41 adam - * Work on relevance feedback. - * - * Revision 1.4 1995/09/08 08:54:04 adam - * More efficient and operation. - * - * Revision 1.3 1995/09/07 13:58:43 adam - * New parameter: result-set file descriptor (RSFD) to support multiple - * positions within the same result-set. - * Boolean operators: and, or, not implemented. - * - * Revision 1.2 1995/09/06 16:11:55 adam - * More work on boolean sets. - * - * Revision 1.1 1995/09/06 13:27:15 adam - * New set type: bool. Not finished yet. - * + * $Id: rsbool.c,v 1.17 2002-03-20 20:24:30 adam Exp $ */ #include diff --git a/rset/rset.c b/rset/rset.c index d6bab13..93fe8df 100644 --- a/rset/rset.c +++ b/rset/rset.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: rset.c,v $ - * Revision 1.14 1999-05-26 07:49:14 adam + * Revision 1.15 2002-03-20 20:24:30 adam + * Hits per term. Returned in SearchResult-1 + * + * Revision 1.14 1999/05/26 07:49:14 adam * C++ compilation. * * Revision 1.13 1999/02/02 14:51:33 adam @@ -119,7 +122,8 @@ RSET_TERM rset_term_create (const char *name, int length, const char *flags) t->flags = NULL; else t->flags = xstrdup (flags); - t->nn = 1; + t->nn = -1; + t->count = 0; return t; } diff --git a/rset/rsisamc.c b/rset/rsisamc.c index 76c9702..96f6386 100644 --- a/rset/rsisamc.c +++ b/rset/rsisamc.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: rsisamc.c,v $ - * Revision 1.8 1999-11-30 13:48:04 adam + * Revision 1.9 2002-03-20 20:24:30 adam + * Hits per term. Returned in SearchResult-1 + * + * Revision 1.8 1999/11/30 13:48:04 adam * Improved installation. Updated for inclusion of YAZ header files. * * Revision 1.7 1999/05/26 07:49:14 adam @@ -67,11 +70,15 @@ struct rset_pp_info { ISAMC_PP pt; struct rset_pp_info *next; struct rset_isamc_info *info; + int *countp; + void *buf; }; struct rset_isamc_info { ISAMC is; ISAMC_P pos; + int key_size; + int (*cmp)(const void *p1, const void *p2); struct rset_pp_info *ispt_list; }; @@ -84,6 +91,9 @@ static void *r_create(RSET ct, const struct rset_control *sel, void *parms) info = (struct rset_isamc_info *) xmalloc (sizeof(*info)); info->is = pt->is; info->pos = pt->pos; + info->key_size = pt->key_size; + yaz_log (LOG_LOG, "info->key_size = %d\n", info->key_size); + info->cmp = pt->cmp; info->ispt_list = NULL; ct->no_rset_terms = 1; ct->rset_terms = (RSET_TERM *) xmalloc (sizeof(*ct->rset_terms)); @@ -109,6 +119,9 @@ RSFD r_open (RSET ct, int flag) ptinfo->info = info; if (ct->rset_terms[0]->nn < 0) ct->rset_terms[0]->nn = isc_pp_num (ptinfo->pt); + ct->rset_terms[0]->count = 0; + ptinfo->countp = &ct->rset_terms[0]->count; + ptinfo->buf = xmalloc (info->key_size); return ptinfo; } @@ -120,6 +133,7 @@ static void r_close (RSFD rfd) for (ptinfop = &info->ispt_list; *ptinfop; ptinfop = &(*ptinfop)->next) if (*ptinfop == rfd) { + xfree ((*ptinfop)->buf); isc_pp_close ((*ptinfop)->pt); *ptinfop = (*ptinfop)->next; xfree (rfd); @@ -153,8 +167,19 @@ static int r_count (RSET ct) static int r_read (RSFD rfd, void *buf, int *term_index) { + struct rset_pp_info *pinfo = (struct rset_pp_info *) rfd; + int r; *term_index = 0; - return isc_pp_read( ((struct rset_pp_info*) rfd)->pt, buf); + r = isc_pp_read(pinfo->pt, buf); + if (r > 0) + { + if (*pinfo->countp == 0 || (*pinfo->info->cmp)(buf, pinfo->buf) > 1) + { + memcpy (pinfo->buf, buf, pinfo->info->key_size); + (*pinfo->countp)++; + } + } + return r; } static int r_write (RSFD rfd, const void *buf) diff --git a/rset/rsm_or.c b/rset/rsm_or.c index 382e5dd..5232aa0 100644 --- a/rset/rsm_or.c +++ b/rset/rsm_or.c @@ -1,42 +1,9 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2002, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * - * $Log: rsm_or.c,v $ - * Revision 1.10 1999-11-30 13:48:04 adam - * Improved installation. Updated for inclusion of YAZ header files. - * - * Revision 1.9 1999/07/13 14:45:42 adam - * Fixed memory leak. - * - * Revision 1.8 1999/05/26 07:49:14 adam - * C++ compilation. - * - * Revision 1.7 1998/09/22 10:03:46 adam - * Changed result sets to be persistent in the sense that they can - * be re-searched if needed. - * Fixed memory leak in rsm_or. - * - * Revision 1.6 1998/03/05 08:36:28 adam - * New result set model. - * - * Revision 1.5 1997/12/18 10:54:25 adam - * New method result set method rs_hits that returns the number of - * hits in result-set (if known). The ranked result set returns real - * number of hits but only when not combined with other operands. - * - * Revision 1.4 1997/10/31 12:37:55 adam - * Code calls xfree() instead of free(). - * - * Revision 1.3 1997/09/09 13:38:16 adam - * Partial port to WIN95/NT. - * - * Revision 1.2 1996/12/23 15:30:49 adam - * Work on truncation. - * - * Revision 1.1 1996/12/20 11:07:21 adam - * Implemented Multi-or result set. + * $Id: rsm_or.c,v 1.11 2002-03-20 20:24:30 adam Exp $ * */ @@ -108,6 +75,8 @@ struct rset_mor_rfd { struct rset_mor_rfd *next; struct rset_mor_info *info; struct trunc_info *ti; + int *countp; + char *pbuf; }; static void heap_swap (struct trunc_info *ti, int i1, int i2) @@ -259,6 +228,13 @@ static RSFD r_open (RSET ct, int flag) } } rfd->position = info->no_save_positions; + + if (ct->no_rset_terms == 1) + rfd->countp = &ct->rset_terms[0]->count; + else + rfd->countp = 0; + rfd->pbuf = xmalloc (info->key_size); + r_rewind (rfd); return rfd; } @@ -279,6 +255,7 @@ static void r_close (RSFD rfd) if (((struct rset_mor_rfd *) rfd)->ispt[i]) isc_pp_close (((struct rset_mor_rfd *) rfd)->ispt[i]); xfree (((struct rset_mor_rfd *)rfd)->ispt); + xfree (((struct rset_mor_rfd *)rfd)->pbuf); xfree (rfd); return; } @@ -312,7 +289,8 @@ static int r_count (RSET ct) static int r_read (RSFD rfd, void *buf, int *term_index) { - struct trunc_info *ti = ((struct rset_mor_rfd *) rfd)->ti; + struct rset_mor_rfd *mrfd = (struct rset_mor_rfd *) rfd; + struct trunc_info *ti = mrfd->ti; int n = ti->indx[ti->ptr[1]]; if (!ti->heapnum) @@ -330,6 +308,12 @@ static int r_read (RSFD rfd, void *buf, int *term_index) } else heap_delete (ti); + if (mrfd->countp && ( + *mrfd->countp == 0 || (*ti->cmp)(buf, mrfd->pbuf) > 1)) + { + memcpy (mrfd->pbuf, buf, ti->keysize); + (*mrfd->countp)++; + } return 1; } while (1) @@ -346,6 +330,12 @@ static int r_read (RSFD rfd, void *buf, int *term_index) break; } } + if (mrfd->countp && ( + *mrfd->countp == 0 || (*ti->cmp)(buf, mrfd->pbuf) > 1)) + { + memcpy (mrfd->pbuf, buf, ti->keysize); + (*mrfd->countp)++; + } return 1; } diff --git a/rset/rstemp.c b/rset/rstemp.c index bd4f83b..1d39687 100644 --- a/rset/rstemp.c +++ b/rset/rstemp.c @@ -3,7 +3,7 @@ * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * - * $Id: rstemp.c,v 1.29 2002-03-15 20:11:36 adam Exp $ + * $Id: rstemp.c,v 1.30 2002-03-20 20:24:30 adam Exp $ */ #include @@ -57,11 +57,14 @@ struct rset_temp_info { int dirty; /* window is dirty */ int hits; /* no of hits */ char *temp_path; + int (*cmp)(const void *p1, const void *p2); }; struct rset_temp_rfd { struct rset_temp_info *info; struct rset_temp_rfd *next; + int *countp; + void *buf; }; static void *r_create(RSET ct, const struct rset_control *sel, void *parms) @@ -80,6 +83,7 @@ static void *r_create(RSET ct, const struct rset_control *sel, void *parms) info->pos_buf = 0; info->dirty = 0; info->hits = -1; + info->cmp = temp_parms->cmp; if (!temp_parms->temp_path) info->temp_path = NULL; else @@ -90,6 +94,7 @@ static void *r_create(RSET ct, const struct rset_control *sel, void *parms) ct->no_rset_terms = 1; ct->rset_terms = (RSET_TERM *) xmalloc (sizeof(*ct->rset_terms)); ct->rset_terms[0] = temp_parms->rset_term; + return info; } @@ -114,6 +119,10 @@ static RSFD r_open (RSET ct, int flag) rfd = (struct rset_temp_rfd *) xmalloc (sizeof(*rfd)); rfd->info = info; r_rewind (rfd); + + rfd->countp = &ct->rset_terms[0]->count; + rfd->buf = xmalloc (info->key_size); + return rfd; } @@ -190,6 +199,7 @@ static void r_close (RSFD rfd) close (info->fd); info->fd = -1; } + xfree (((struct rset_temp_rfd *)rfd)->buf); xfree (rfd); } @@ -272,7 +282,8 @@ static int r_count (RSET ct) static int r_read (RSFD rfd, void *buf, int *term_index) { - struct rset_temp_info *info = ((struct rset_temp_rfd*)rfd)->info; + struct rset_temp_rfd *mrfd = (struct rset_temp_rfd*) rfd; + struct rset_temp_info *info = mrfd->info; size_t nc = info->pos_cur + info->key_size; @@ -288,6 +299,12 @@ static int r_read (RSFD rfd, void *buf, int *term_index) info->key_size); info->pos_cur = nc; *term_index = 0; + + if (*mrfd->countp == 0 || (*info->cmp)(buf, mrfd->buf) > 1) + { + memcpy (mrfd->buf, buf, mrfd->info->key_size); + (*mrfd->countp)++; + } return 1; } -- 1.7.10.4