1 /* $Id: rsbetween.c,v 1.33 2004-11-19 15:52:58 heikki Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 /* rsbetween is (mostly) used for xml searches. It returns the hits of the
25 * "middle" rset, that are in between the "left" and "right" rsets. For
26 * example "Shakespeare" in between "<author>" and </author>. The thing is
27 * complicated by the inclusion of attributes (from their own rset). If attrs
28 * specified, they must match the "left" rset (start tag). "Hamlet" between
29 * "<title lang=eng>" and "</title>". (This assumes that the attributes are
30 * indexed to the same seqno as the tags).
43 static RSFD r_open (RSET ct, int flag);
44 static void r_close (RSFD rfd);
45 static void r_delete (RSET ct);
46 static int r_forward(RSFD rfd, void *buf,
47 TERMID *term, const void *untilbuf);
48 static int r_read(RSFD rfd, void *buf, TERMID *term );
49 static int r_write(RSFD rfd, const void *buf);
50 static void r_pos(RSFD rfd, double *current, double *total);
51 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
53 static const struct rset_control control =
67 const struct rset_control *rset_kind_between = &control;
74 struct rset_between_info {
75 RSET andset; /* the multi-and of the above */
76 TERMID startterm; /* pseudo terms for detecting which one we read from */
81 struct rset_between_rfd {
83 void *recbuf; /* a key that tells which record we are in */
84 void *startbuf; /* the start tag */
85 int startbufok; /* we have seen the first start tag */
86 void *attrbuf; /* the attr tag. If these two match, we have attr match */
87 int attrbufok; /* we have seen the first attr tag, can compare */
88 int depth; /* number of start-tags without end-tags */
89 int attrdepth; /* on what depth the attr matched */
93 static int log_level=0;
94 static int log_level_initialized=0;
97 /* make sure that the rset has a term attached. If not, create one */
98 /* we need these terms for the tags, to distinguish what we read */
99 static void checkterm( RSET rs, char *tag, NMEM nmem)
104 rset_term_create(tag,strlen(tag),"",0,nmem);
110 RSET rsbetween_create( NMEM nmem, const struct key_control *kcontrol,
112 RSET rset_l, RSET rset_m, RSET rset_r, RSET rset_attr)
114 RSET rnew=rset_create_base(&control, nmem, kcontrol, scope,0);
115 struct rset_between_info *info=
116 (struct rset_between_info *) nmem_malloc(rnew->nmem,sizeof(*info));
120 if (!log_level_initialized)
122 log_level=yaz_log_module_level("rsbetween");
123 log_level_initialized=1;
125 rsetarray[STARTTAG] = rset_l;
126 rsetarray[HIT] = rset_m;
127 rsetarray[STOPTAG] = rset_r;
128 rsetarray[ATTRTAG] = rset_attr;
130 /* make sure we have decent terms for all rsets. Create dummies if needed*/
131 checkterm( rsetarray[STARTTAG], "(start)",nmem);
132 checkterm( rsetarray[STOPTAG], "(start)",nmem);
133 info->startterm=rsetarray[STARTTAG]->term;
134 info->stopterm=rsetarray[STOPTAG]->term;
138 checkterm( rsetarray[ATTRTAG], "(start)",nmem);
139 info->attrterm=rsetarray[ATTRTAG]->term;
147 info->andset=rsmultiand_create( nmem, kcontrol, scope, n, rsetarray);
149 yaz_log(log_level,"create rset at %p",rnew);
154 static void r_delete (RSET ct)
156 struct rset_between_info *info = (struct rset_between_info *) ct->priv;
157 yaz_log(log_level,"delete rset at %p",ct);
158 rset_delete(info->andset);
162 static RSFD r_open (RSET ct, int flag)
164 struct rset_between_info *info = (struct rset_between_info *) ct->priv;
166 struct rset_between_rfd *p;
168 if (flag & RSETF_WRITE)
170 yaz_log (YLOG_FATAL, "between set type is read-only");
173 rfd=rfd_create_base(ct);
175 p=(struct rset_between_rfd *)rfd->priv;
177 p = (struct rset_between_rfd *) nmem_malloc(ct->nmem, (sizeof(*p)));
179 p->recbuf = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
180 p->startbuf = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
181 p->attrbuf = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
183 p->andrfd = rset_open (info->andset, RSETF_READ);
189 yaz_log(log_level,"open rset=%p rfd=%p", ct, rfd);
193 static void r_close (RSFD rfd)
195 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
196 yaz_log(log_level,"close rfd=%p", rfd);
197 rset_close (p->andrfd);
198 rfd_delete_base(rfd);
203 static int r_forward(RSFD rfd, void *buf,
204 TERMID *term, const void *untilbuf)
206 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
208 yaz_log(log_level, "forwarding ");
209 rc=rset_forward(p->andrfd,buf,term,untilbuf);
215 static void checkattr(RSFD rfd)
217 struct rset_between_info *info =(struct rset_between_info *)rfd->rset->priv;
218 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
219 const struct key_control *kctrl=rfd->rset->keycontrol;
222 return; /* already found one */
225 p->attrdepth=-1; /* matches always */
228 if ( p->startbufok && p->attrbufok )
229 { /* have buffers to compare */
230 cmp=(kctrl->cmp)(p->startbuf,p->attrbuf);
231 if (0==cmp) /* and the keys match */
233 p->attrdepth=p->depth;
234 yaz_log(log_level, "found attribute match at depth %d",p->attrdepth);
240 static int r_read (RSFD rfd, void *buf, TERMID *term)
242 struct rset_between_info *info =(struct rset_between_info *)rfd->rset->priv;
243 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
244 const struct key_control *kctrl=rfd->rset->keycontrol;
247 yaz_log(log_level,"== read: term=%p",term);
250 while ( rset_read(p->andrfd,buf,term) )
252 yaz_log(log_level,"read loop term=%p d=%d ad=%d",
253 *term,p->depth, p->attrdepth);
256 memcpy(p->recbuf,buf,kctrl->key_size);
258 cmp=rfd->rset->scope; /* force newrecord */
261 cmp=(kctrl->cmp)(buf,p->recbuf);
262 yaz_log(log_level, "cmp=%d",cmp);
265 if (cmp>=rfd->rset->scope)
267 yaz_log(log_level,"new record");
270 memcpy(p->recbuf,buf,kctrl->key_size);
274 yaz_log(log_level," term: '%s'", (*term)->name);
275 if (*term==info->startterm)
278 yaz_log(log_level,"read start tag. d=%d",p->depth);
279 memcpy(p->startbuf,buf,kctrl->key_size);
281 checkattr(rfd); /* in case we already saw the attr here */
283 else if (*term==info->stopterm)
285 if (p->depth == p->attrdepth)
286 p->attrdepth=0; /* ending the tag with attr match */
288 yaz_log(log_level,"read end tag. d=%d ad=%d",p->depth, p->attrdepth);
290 else if (*term==info->attrterm)
292 yaz_log(log_level,"read attr");
293 memcpy(p->attrbuf,buf,kctrl->key_size);
295 checkattr(rfd); /* in case the start tag came first */
298 { /* mut be a real hit */
299 if (p->depth && p->attrdepth)
302 yaz_log(log_level,"got a hit h="ZINT_FORMAT" d=%d ad=%d",
303 p->hits,p->depth,p->attrdepth);
304 return 1; /* we have everything in place already! */
306 yaz_log(log_level, "Ignoring hit. h="ZINT_FORMAT" d=%d ad=%d",
307 p->hits,p->depth,p->attrdepth);
316 static int r_write (RSFD rfd, const void *buf)
318 yaz_log (YLOG_FATAL, "between set type is read-only");
323 static void r_pos (RSFD rfd, double *current, double *total)
325 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
326 rset_pos(p->andrfd,current, total);
327 yaz_log(log_level,"pos: %0.1f/%0.1f ", *current, *total);
330 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
332 struct rset_between_info *info = (struct rset_between_info *) ct->priv;
333 rset_getterms(info->andset, terms, maxterms, curterm);