1 /* $Id: recgrs.c,v 1.69 2002-11-15 21:57:41 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 #include <sys/types.h>
36 #define GRS_MAX_WORD 512
42 struct grs_handler *next;
46 struct grs_handler *handlers;
49 static int read_grs_type (struct grs_handlers *h,
50 struct grs_read_info *p, const char *type,
53 struct grs_handler *gh = h->handlers;
54 const char *cp = strchr (type, '.');
56 if (cp == NULL || cp == type)
58 cp = strlen(type) + type;
62 strcpy (p->type, cp+1);
63 for (gh = h->handlers; gh; gh = gh->next)
65 if (!memcmp (type, gh->type->type, cp-type))
70 gh->clientData = (*gh->type->init)();
72 p->clientData = gh->clientData;
73 *root = (gh->type->read)(p);
74 gh->clientData = p->clientData;
81 static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t)
83 struct grs_handler *gh = (struct grs_handler *) xmalloc (sizeof(*gh));
84 gh->next = h->handlers;
91 static void *grs_init(RecType recType)
93 struct grs_handlers *h = (struct grs_handlers *) xmalloc (sizeof(*h));
96 grs_add_handler (h, recTypeGrs_sgml);
97 grs_add_handler (h, recTypeGrs_regx);
99 grs_add_handler (h, recTypeGrs_tcl);
101 grs_add_handler (h, recTypeGrs_marc);
103 grs_add_handler (h, recTypeGrs_xml);
106 grs_add_handler (h, recTypeGrs_perl);
111 static void grs_destroy(void *clientData)
113 struct grs_handlers *h = (struct grs_handlers *) clientData;
114 struct grs_handler *gh = h->handlers, *gh_next;
119 (*gh->type->destroy)(gh->clientData);
127 1 start element (tag)
129 3 start attr (and attr-exact)
136 static void index_xpath (data1_node *n, struct recExtractCtrl *p,
137 int level, RecWord *wrd, int use)
140 char tag_path_full[1024];
148 wrd->string = n->u.data.data;
149 wrd->length = n->u.data.len;
150 wrd->attrSet = VAL_IDXPATH,
152 if (p->flagShowRecords)
154 printf("%*s data=", (level + 1) * 4, "");
155 for (i = 0; i<wrd->length && i < 8; i++)
156 fputc (wrd->string[i], stdout);
165 for (nn = n; nn; nn = nn->parent)
167 if (nn->which == DATA1N_tag)
169 size_t tlen = strlen(nn->u.tag.tag);
170 if (tlen + flen > (sizeof(tag_path_full)-2))
172 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
174 tag_path_full[flen++] = '/';
176 else if (nn->which == DATA1N_root)
180 wrd->string = tag_path_full;
182 wrd->attrSet = VAL_IDXPATH;
184 if (p->flagShowRecords)
186 printf("%*s tag=", (level + 1) * 4, "");
187 for (i = 0; i<wrd->length && i < 40; i++)
188 fputc (wrd->string[i], stdout);
196 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
199 for (xp = n->u.tag.attributes; xp; xp = xp->next)
202 /* attribute (no value) */
205 wrd->string = xp->name;
206 wrd->length = strlen(xp->name);
212 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2)
214 /* attribute value exact */
215 strcpy (comb, xp->name);
217 strcat (comb, xp->value);
222 wrd->length = strlen(comb);
228 for (xp = n->u.tag.attributes; xp; xp = xp->next)
230 char attr_tag_path_full[1024];
233 sprintf (attr_tag_path_full, "@%s/%.*s",
234 xp->name, int_len, tag_path_full);
238 wrd->string = attr_tag_path_full;
239 wrd->length = strlen(attr_tag_path_full);
246 wrd->string = xp->value;
247 wrd->length = strlen(xp->value);
253 wrd->string = attr_tag_path_full;
254 wrd->length = strlen(attr_tag_path_full);
262 static void index_termlist (data1_node *par, data1_node *n,
263 struct recExtractCtrl *p, int level, RecWord *wrd)
265 data1_termlist *tlist = 0;
266 data1_datatype dtype = DATA1K_string;
268 * cycle up towards the root until we find a tag with an att..
269 * this has the effect of indexing locally defined tags with
270 * the attribute of their ancestor in the record.
273 while (!par->u.tag.element)
274 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
276 if (!par || !(tlist = par->u.tag.element->termlists))
278 if (par->u.tag.element->tag)
279 dtype = par->u.tag.element->tag->kind;
281 for (; tlist; tlist = tlist->next)
284 /* consider source */
287 if (!strcmp (tlist->source, "data") && n->which == DATA1N_data)
289 wrd->string = n->u.data.data;
290 wrd->length = n->u.data.len;
292 else if (!strcmp (tlist->source, "tag") && n->which == DATA1N_tag)
294 wrd->string = n->u.tag.tag;
295 wrd->length = strlen(n->u.tag.tag);
297 else if (sscanf (tlist->source, "attr(%511[^)])", xattr) == 1 &&
298 n->which == DATA1N_tag)
300 data1_xattr *p = n->u.tag.attributes;
301 while (p && strcmp (p->name, xattr))
305 wrd->string = p->value;
306 wrd->length = strlen(p->value);
311 if (p->flagShowRecords)
314 printf("%*sIdx: [%s]", (level + 1) * 4, "",
316 printf("%s:%s [%d] %s",
317 tlist->att->parent->name,
318 tlist->att->name, tlist->att->value,
321 for (i = 0; i<wrd->length && i < 8; i++)
322 fputc (wrd->string[i], stdout);
326 fputc ('\n', stdout);
330 wrd->reg_type = *tlist->structure;
331 wrd->attrSet = (int) (tlist->att->parent->reference);
332 wrd->attrUse = tlist->att->locals->local;
339 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level,
342 for (; n; n = n->next)
344 if (p->flagShowRecords) /* display element description to user */
346 if (n->which == DATA1N_root)
348 printf("%*s", level * 4, "");
349 printf("Record type: '%s'\n", n->u.root.type);
351 else if (n->which == DATA1N_tag)
355 printf("%*s", level * 4, "");
356 if (!(e = n->u.tag.element))
357 printf("Local tag: '%s'\n", n->u.tag.tag);
360 printf("Elm: '%s' ", e->name);
363 data1_tag *t = e->tag;
365 printf("TagNam: '%s' ", t->names->name);
368 printf("%s[%d],", t->tagset->name, t->tagset->type);
371 if (t->which == DATA1T_numeric)
372 printf("%d)", t->value.numeric);
374 printf("'%s')", t->value.string);
381 if (n->which == DATA1N_tag)
383 index_termlist (n, n, p, level, wrd);
384 /* index start tag */
385 assert (n->root->u.root.absyn);
387 if (!n->root->u.root.absyn)
388 index_xpath (n, p, level, wrd, 1);
389 else if (n->root->u.root.absyn->enable_xpath_indexing)
390 index_xpath (n, p, level, wrd, 1);
394 if (dumpkeys(n->child, p, level + 1, wrd) < 0)
398 if (n->which == DATA1N_data)
400 data1_node *par = get_parent_tag(p->dh, n);
402 if (p->flagShowRecords)
404 printf("%*s", level * 4, "");
406 if (n->u.data.len > 256)
407 printf("'%.240s ... %.6s'\n", n->u.data.data,
408 n->u.data.data + n->u.data.len-6);
409 else if (n->u.data.len > 0)
410 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
416 index_termlist (par, n, p, level, wrd);
417 if (!n->root->u.root.absyn)
418 index_xpath (n, p, level, wrd, 1016);
419 else if (n->root->u.root.absyn->enable_xpath_indexing)
420 index_xpath (n, p, level, wrd, 1016);
423 if (n->which == DATA1N_tag)
426 if (!n->root->u.root.absyn)
427 index_xpath (n, p, level, wrd, 2);
428 else if (n->root->u.root.absyn->enable_xpath_indexing)
429 index_xpath (n, p, level, wrd, 2);
432 if (p->flagShowRecords && n->which == DATA1N_root)
434 printf("%*s-------------\n\n", level * 4, "");
440 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
443 int oidtmp[OID_SIZE];
446 oe.proto = PROTO_Z3950;
447 oe.oclass = CLASS_SCHEMA;
450 oe.value = n->u.root.absyn->reference;
452 if ((oid_ent_to_oid (&oe, oidtmp)))
453 (*p->schemaAdd)(p, oidtmp);
457 return dumpkeys(n, p, 0, &wrd);
460 static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
464 struct grs_read_info gri;
466 int oidtmp[OID_SIZE];
469 gri.readf = p->readf;
470 gri.seekf = p->seekf;
471 gri.tellf = p->tellf;
474 gri.offset = p->offset;
478 if (read_grs_type (h, &gri, p->subType, &n))
479 return RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER;
481 return RECCTRL_EXTRACT_EOF;
482 oe.proto = PROTO_Z3950;
483 oe.oclass = CLASS_SCHEMA;
485 if (!n->u.root.absyn)
486 return RECCTRL_EXTRACT_ERROR;
490 oe.value = n->u.root.absyn->reference;
491 if ((oid_ent_to_oid (&oe, oidtmp)))
492 (*p->schemaAdd)(p, oidtmp);
495 /* ensure our data1 tree is UTF-8 */
496 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
499 data1_pr_tree (p->dh, n, stdout);
503 if (dumpkeys(n, p, 0, &wrd) < 0)
505 data1_free_tree(p->dh, n);
506 return RECCTRL_EXTRACT_ERROR_GENERIC;
508 data1_free_tree(p->dh, n);
509 return RECCTRL_EXTRACT_OK;
512 static int grs_extract(void *clientData, struct recExtractCtrl *p)
515 NMEM mem = nmem_create ();
516 struct grs_handlers *h = (struct grs_handlers *) clientData;
518 ret = grs_extract_sub(h, p, mem);
524 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
526 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c)
528 data1_esetname *eset;
534 case Z_RecordComp_simple:
535 if (c->u.simple->which != Z_ElementSetNames_generic)
536 return 26; /* only generic form supported. Fix this later */
537 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
538 c->u.simple->u.generic)))
540 logf(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
541 return 25; /* invalid esetname */
543 logf(LOG_DEBUG, "Esetname '%s' in simple compspec",
544 c->u.simple->u.generic);
547 case Z_RecordComp_complex:
548 if (c->u.complex->generic)
550 /* insert check for schema */
551 if ((p = c->u.complex->generic->elementSpec))
555 case Z_ElementSpec_elementSetName:
557 data1_getesetbyname(dh, n->u.root.absyn,
558 p->u.elementSetName)))
560 logf(LOG_LOG, "Unknown esetname '%s'",
561 p->u.elementSetName);
562 return 25; /* invalid esetname */
564 logf(LOG_DEBUG, "Esetname '%s' in complex compspec",
565 p->u.elementSetName);
568 case Z_ElementSpec_externalSpec:
569 if (p->u.externalSpec->which == Z_External_espec1)
571 logf(LOG_DEBUG, "Got Espec-1");
572 espec = p->u.externalSpec-> u.espec1;
576 logf(LOG_LOG, "Unknown external espec.");
577 return 25; /* bad. what is proper diagnostic? */
588 logf (LOG_DEBUG, "Element: Espec-1 match");
589 return data1_doespec1(dh, n, espec);
593 logf (LOG_DEBUG, "Element: all match");
598 /* Add Zebra info in separate namespace ...
601 <metadata xmlns="http://www.indexdata.dk/zebra/">
603 <localnumber>447</localnumber>
604 <filename>records/genera.xml</filename>
609 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
612 const char *idzebra_ns[3];
613 const char *i2 = "\n ";
614 const char *i4 = "\n ";
617 idzebra_ns[0] = "xmlns";
618 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
621 data1_mk_text (p->dh, mem, i2, top);
623 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
625 data1_mk_text (p->dh, mem, "\n", top);
627 data1_mk_text (p->dh, mem, i4, n);
629 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
633 data1_mk_text (p->dh, mem, i4, n);
634 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
636 data1_mk_text (p->dh, mem, i4, n);
637 data1_mk_tag_data_int (p->dh, n, "localnumber", p->localno, mem);
640 data1_mk_text (p->dh, mem, i4, n);
641 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
643 data1_mk_text (p->dh, mem, i2, n);
646 static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
648 data1_node *node = 0, *onode = 0, *top;
651 int res, selected = 0;
653 struct grs_read_info gri;
655 struct grs_handlers *h = (struct grs_handlers *) clientData;
656 int requested_schema = VAL_NONE;
657 data1_marctab *marctab;
661 gri.readf = p->readf;
662 gri.seekf = p->seekf;
663 gri.tellf = p->tellf;
670 logf (LOG_DEBUG, "grs_retrieve");
671 if (read_grs_type (h, &gri, p->subType, &node))
683 /* ensure our data1 tree is UTF-8 */
684 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
687 data1_pr_tree (p->dh, node, stdout);
689 top = data1_get_root_tag (p->dh, node);
691 logf (LOG_DEBUG, "grs_retrieve: size");
692 if ((dnew = data1_mk_tag_data_wd(p->dh, top, "size", mem)))
694 dnew->u.data.what = DATA1I_text;
695 dnew->u.data.data = dnew->lbuf;
696 sprintf(dnew->u.data.data, "%d", p->recordSize);
697 dnew->u.data.len = strlen(dnew->u.data.data);
700 tagname = res_get_def(p->res, "tagrank", "rank");
701 if (strcmp(tagname, "0") && p->score >= 0 &&
702 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
704 logf (LOG_DEBUG, "grs_retrieve: %s", tagname);
705 dnew->u.data.what = DATA1I_num;
706 dnew->u.data.data = dnew->lbuf;
707 sprintf(dnew->u.data.data, "%d", p->score);
708 dnew->u.data.len = strlen(dnew->u.data.data);
711 tagname = res_get_def(p->res, "tagsysno", "localControlNumber");
712 if (strcmp(tagname, "0") && p->localno > 0 &&
713 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
715 logf (LOG_DEBUG, "grs_retrieve: %s", tagname);
716 dnew->u.data.what = DATA1I_text;
717 dnew->u.data.data = dnew->lbuf;
719 sprintf(dnew->u.data.data, "%d", p->localno);
720 dnew->u.data.len = strlen(dnew->u.data.data);
723 data1_pr_tree (p->dh, node, stdout);
725 if (p->comp && p->comp->which == Z_RecordComp_complex &&
726 p->comp->u.complex->generic &&
727 p->comp->u.complex->generic->schema)
729 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema);
731 requested_schema = oe->value;
734 /* If schema has been specified, map if possible, then check that
735 * we got the right one
737 if (requested_schema != VAL_NONE)
739 logf (LOG_DEBUG, "grs_retrieve: schema mapping");
740 for (map = node->u.root.absyn->maptabs; map; map = map->next)
742 if (map->target_absyn_ref == requested_schema)
745 if (!(node = data1_map_record(p->dh, onode, map, mem)))
754 if (node->u.root.absyn &&
755 requested_schema != node->u.root.absyn->reference)
763 * Does the requested format match a known syntax-mapping? (this reflects
764 * the overlap of schema and formatting which is inherent in the MARC
767 yaz_log (LOG_DEBUG, "grs_retrieve: syntax mapping");
768 if (node->u.root.absyn)
769 for (map = node->u.root.absyn->maptabs; map; map = map->next)
771 if (map->target_absyn_ref == p->input_format)
774 if (!(node = data1_map_record(p->dh, onode, map, mem)))
783 yaz_log (LOG_DEBUG, "grs_retrieve: schemaIdentifier");
784 if (node->u.root.absyn &&
785 node->u.root.absyn->reference != VAL_NONE &&
786 p->input_format == VAL_GRS1)
790 int oidtmp[OID_SIZE];
792 oe.proto = PROTO_Z3950;
793 oe.oclass = CLASS_SCHEMA;
794 oe.value = node->u.root.absyn->reference;
796 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
799 data1_handle dh = p->dh;
803 for (ii = oid; *ii >= 0; ii++)
807 sprintf(p, "%d", *ii);
810 if ((dnew = data1_mk_tag_data_wd(dh, top,
811 "schemaIdentifier", mem)))
813 dnew->u.data.what = DATA1I_oid;
814 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
815 memcpy(dnew->u.data.data, tmp, p - tmp);
816 dnew->u.data.len = p - tmp;
821 logf (LOG_DEBUG, "grs_retrieve: element spec");
822 if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0)
826 data1_free_tree(p->dh, onode);
827 data1_free_tree(p->dh, node);
831 else if (p->comp && !res)
835 data1_pr_tree (p->dh, node, stdout);
837 logf (LOG_DEBUG, "grs_retrieve: transfer syntax mapping");
838 switch (p->output_format = (p->input_format != VAL_NONE ?
839 p->input_format : VAL_SUTRS))
842 zebra_xml_metadata (p, top, mem);
845 data1_pr_tree (p->dh, node, stdout);
849 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
851 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
856 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
857 memcpy (new_buf, p->rec_buf, p->rec_len);
858 p->rec_buf = new_buf;
863 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
865 p->diagnostic = 238; /* not available in requested syntax */
867 p->rec_len = (size_t) (-1);
870 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
874 p->rec_len = (size_t) (-1);
877 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
881 p->rec_len = (size_t) (-1);
885 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
886 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
891 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
892 memcpy (new_buf, p->rec_buf, p->rec_len);
893 p->rec_buf = new_buf;
897 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
902 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
903 memcpy (new_buf, p->rec_buf, p->rec_len);
904 p->rec_buf = new_buf;
908 if (!node->u.root.absyn)
913 for (marctab = node->u.root.absyn->marc; marctab;
914 marctab = marctab->next)
915 if (marctab->reference == p->input_format)
923 data1_iconv (p->dh, mem, node, p->encoding, "UTF-8");
924 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
925 selected, &p->rec_len)))
929 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
930 memcpy (new_buf, p->rec_buf, p->rec_len);
931 p->rec_buf = new_buf;
935 data1_free_tree(p->dh, node);
937 data1_free_tree(p->dh, onode);
942 static struct recType grs_type =
951 RecType recTypeGrs = &grs_type;