1 /* $Id: rsbetween.c,v 1.37 2005-04-26 10:09:38 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 /* rsbetween is (mostly) used for xml searches. It returns the hits of the
25 * "middle" rset, that are in between the "left" and "right" rsets. For
26 * example "Shakespeare" in between "<author>" and </author>. The thing is
27 * complicated by the inclusion of attributes (from their own rset). If attrs
28 * specified, they must match the "left" rset (start tag). "Hamlet" between
29 * "<title lang = eng>" and "</title>". (This assumes that the attributes are
30 * indexed to the same seqno as the tags).
39 #include <idzebra/util.h>
43 static RSFD r_open(RSET ct, int flag);
44 static void r_close(RSFD rfd);
45 static void r_delete(RSET ct);
46 static int r_forward(RSFD rfd, void *buf,
47 TERMID *term, const void *untilbuf);
48 static int r_read(RSFD rfd, void *buf, TERMID *term );
49 static int r_write(RSFD rfd, const void *buf);
50 static void r_pos(RSFD rfd, double *current, double *total);
51 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
53 static const struct rset_control control =
71 struct rset_between_info {
72 RSET andset; /* the multi-and of the above */
73 TERMID startterm; /* pseudo terms for detecting which one we read from */
78 struct rset_between_rfd {
80 void *recbuf; /* a key that tells which record we are in */
81 void *startbuf; /* the start tag */
82 int startbufok; /* we have seen the first start tag */
83 void *attrbuf; /* the attr tag. If these two match, we have attr match */
84 int attrbufok; /* we have seen the first attr tag, can compare */
85 int depth; /* number of start-tags without end-tags */
86 int attrdepth; /* on what depth the attr matched */
90 static int log_level = 0;
91 static int log_level_initialized = 0;
94 /* make sure that the rset has a term attached. If not, create one */
95 /* we need these terms for the tags, to distinguish what we read */
96 static void checkterm( RSET rs, char *tag, NMEM nmem)
100 rs->term = rset_term_create(tag, strlen(tag), "", 0, nmem);
106 RSET rsbetween_create( NMEM nmem, const struct key_control *kcontrol,
108 RSET rset_l, RSET rset_m, RSET rset_r, RSET rset_attr)
110 RSET rnew = rset_create_base(&control, nmem, kcontrol, scope,0);
111 struct rset_between_info *info=
112 (struct rset_between_info *) nmem_malloc(rnew->nmem,sizeof(*info));
116 if (!log_level_initialized)
118 log_level = yaz_log_module_level("rsbetween");
119 log_level_initialized = 1;
121 rsetarray[STARTTAG] = rset_l;
122 rsetarray[HIT] = rset_m;
123 rsetarray[STOPTAG] = rset_r;
124 rsetarray[ATTRTAG] = rset_attr;
126 /* make sure we have decent terms for all rsets. Create dummies if needed*/
127 checkterm( rsetarray[STARTTAG], "(start)",nmem);
128 checkterm( rsetarray[STOPTAG], "(start)",nmem);
129 info->startterm = rsetarray[STARTTAG]->term;
130 info->stopterm = rsetarray[STOPTAG]->term;
134 checkterm( rsetarray[ATTRTAG], "(start)",nmem);
135 info->attrterm = rsetarray[ATTRTAG]->term;
140 info->attrterm = NULL;
143 info->andset = rsmulti_and_create( nmem, kcontrol, scope, n, rsetarray);
145 yaz_log(log_level,"create rset at %p",rnew);
150 static void r_delete(RSET ct)
152 struct rset_between_info *info = (struct rset_between_info *) ct->priv;
153 yaz_log(log_level,"delete rset at %p",ct);
154 rset_delete(info->andset);
158 static RSFD r_open(RSET ct, int flag)
160 struct rset_between_info *info = (struct rset_between_info *) ct->priv;
162 struct rset_between_rfd *p;
164 if (flag & RSETF_WRITE)
166 yaz_log(YLOG_FATAL, "between set type is read-only");
169 rfd = rfd_create_base(ct);
171 p=(struct rset_between_rfd *)rfd->priv;
173 p = (struct rset_between_rfd *) nmem_malloc(ct->nmem, (sizeof(*p)));
175 p->recbuf = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
176 p->startbuf = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
177 p->attrbuf = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
179 p->andrfd = rset_open(info->andset, RSETF_READ);
185 yaz_log(log_level,"open rset=%p rfd=%p", ct, rfd);
189 static void r_close(RSFD rfd)
191 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
192 yaz_log(log_level,"close rfd=%p", rfd);
193 rset_close(p->andrfd);
194 rfd_delete_base(rfd);
199 static int r_forward(RSFD rfd, void *buf,
200 TERMID *term, const void *untilbuf)
202 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
204 yaz_log(log_level, "forwarding ");
205 rc = rset_forward(p->andrfd,buf,term,untilbuf);
211 static void checkattr(RSFD rfd)
213 struct rset_between_info *info =(struct rset_between_info *)rfd->rset->priv;
214 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
215 const struct key_control *kctrl = rfd->rset->keycontrol;
218 return; /* already found one */
221 p->attrdepth=-1; /* matches always */
224 if ( p->startbufok && p->attrbufok )
225 { /* have buffers to compare */
226 cmp=(kctrl->cmp)(p->startbuf,p->attrbuf);
227 if (0==cmp) /* and the keys match */
229 p->attrdepth = p->depth;
230 yaz_log(log_level, "found attribute match at depth %d",p->attrdepth);
236 static int r_read(RSFD rfd, void *buf, TERMID *term)
238 struct rset_between_info *info =(struct rset_between_info *)rfd->rset->priv;
239 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
240 const struct key_control *kctrl = rfd->rset->keycontrol;
242 TERMID dummyterm = 0;
243 yaz_log(log_level,"== read: term=%p",term);
246 while ( rset_read(p->andrfd,buf,term) )
248 yaz_log(log_level,"read loop term=%p d=%d ad=%d",
249 *term,p->depth, p->attrdepth);
252 memcpy(p->recbuf,buf,kctrl->key_size);
254 cmp = rfd->rset->scope; /* force newrecord */
257 cmp=(kctrl->cmp)(buf,p->recbuf);
258 yaz_log(log_level, "cmp=%d",cmp);
261 if (cmp>=rfd->rset->scope)
263 yaz_log(log_level,"new record");
266 memcpy(p->recbuf,buf,kctrl->key_size);
270 yaz_log(log_level," term: '%s'", (*term)->name);
271 if (*term==info->startterm)
274 yaz_log(log_level,"read start tag. d=%d",p->depth);
275 memcpy(p->startbuf,buf,kctrl->key_size);
277 checkattr(rfd); /* in case we already saw the attr here */
279 else if (*term==info->stopterm)
281 if (p->depth == p->attrdepth)
282 p->attrdepth = 0; /* ending the tag with attr match */
284 yaz_log(log_level,"read end tag. d=%d ad=%d",p->depth, p->attrdepth);
286 else if (*term==info->attrterm)
288 yaz_log(log_level,"read attr");
289 memcpy(p->attrbuf,buf,kctrl->key_size);
291 checkattr(rfd); /* in case the start tag came first */
294 { /* mut be a real hit */
295 if (p->depth && p->attrdepth)
298 yaz_log(log_level,"got a hit h="ZINT_FORMAT" d=%d ad=%d",
299 p->hits,p->depth,p->attrdepth);
300 return 1; /* we have everything in place already! */
302 yaz_log(log_level, "Ignoring hit. h="ZINT_FORMAT" d=%d ad=%d",
303 p->hits,p->depth,p->attrdepth);
312 static int r_write(RSFD rfd, const void *buf)
314 yaz_log(YLOG_FATAL, "between set type is read-only");
319 static void r_pos(RSFD rfd, double *current, double *total)
321 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
322 rset_pos(p->andrfd,current, total);
323 yaz_log(log_level,"pos: %0.1f/%0.1f ", *current, *total);
326 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
328 struct rset_between_info *info = (struct rset_between_info *) ct->priv;
329 rset_getterms(info->andset, terms, maxterms, curterm);