2 * Copyright (C) 1994-2002, Index Data
5 * $Id: recgrs.c,v 1.54 2002-07-05 16:07:02 adam Exp $
10 #include <sys/types.h>
21 #define GRS_MAX_WORD 512
27 struct grs_handler *next;
31 struct grs_handler *handlers;
34 static int read_grs_type (struct grs_handlers *h,
35 struct grs_read_info *p, const char *type,
38 struct grs_handler *gh = h->handlers;
39 const char *cp = strchr (type, '.');
41 if (cp == NULL || cp == type)
43 cp = strlen(type) + type;
47 strcpy (p->type, cp+1);
48 for (gh = h->handlers; gh; gh = gh->next)
50 if (!memcmp (type, gh->type->type, cp-type))
55 gh->clientData = (*gh->type->init)();
57 p->clientData = gh->clientData;
58 *root = (gh->type->read)(p);
59 gh->clientData = p->clientData;
66 static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t)
68 struct grs_handler *gh = (struct grs_handler *) xmalloc (sizeof(*gh));
69 gh->next = h->handlers;
76 static void *grs_init(RecType recType)
78 struct grs_handlers *h = (struct grs_handlers *) xmalloc (sizeof(*h));
81 grs_add_handler (h, recTypeGrs_sgml);
82 grs_add_handler (h, recTypeGrs_regx);
84 grs_add_handler (h, recTypeGrs_tcl);
86 grs_add_handler (h, recTypeGrs_marc);
88 grs_add_handler (h, recTypeGrs_xml);
93 static void grs_destroy(void *clientData)
95 struct grs_handlers *h = (struct grs_handlers *) clientData;
96 struct grs_handler *gh = h->handlers, *gh_next;
101 (*gh->type->destroy)(gh->clientData);
108 static void index_xpath (data1_node *n, struct recExtractCtrl *p,
109 int level, RecWord *wrd, int use)
112 char tag_path_full[1024];
120 wrd->string = n->u.data.data;
121 wrd->length = n->u.data.len;
122 wrd->attrSet = VAL_IDXPATH,
124 if (p->flagShowRecords)
126 printf("%*s data=", (level + 1) * 4, "");
127 for (i = 0; i<wrd->length && i < 8; i++)
128 fputc (wrd->string[i], stdout);
137 for (nn = n; nn; nn = nn->parent)
139 if (nn->which == DATA1N_tag)
141 size_t tlen = strlen(nn->u.tag.tag);
142 if (tlen + flen > (sizeof(tag_path_full)-2))
144 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
146 tag_path_full[flen++] = '/';
148 else if (nn->which == DATA1N_root)
152 wrd->string = tag_path_full;
154 wrd->attrSet = VAL_IDXPATH;
156 if (p->flagShowRecords)
158 printf("%*s tag=", (level + 1) * 4, "");
159 for (i = 0; i<wrd->length && i < 40; i++)
160 fputc (wrd->string[i], stdout);
173 static void index_termlist (data1_node *par, data1_node *n,
174 struct recExtractCtrl *p, int level, RecWord *wrd)
176 data1_termlist *tlist = 0;
177 data1_datatype dtype = DATA1K_string;
179 * cycle up towards the root until we find a tag with an att..
180 * this has the effect of indexing locally defined tags with
181 * the attribute of their ancestor in the record.
184 while (!par->u.tag.element)
185 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
187 if (!par || !(tlist = par->u.tag.element->termlists))
189 if (par->u.tag.element->tag)
190 dtype = par->u.tag.element->tag->kind;
192 for (; tlist; tlist = tlist->next)
195 /* consider source */
198 if (!strcmp (tlist->source, "data") && n->which == DATA1N_data)
200 wrd->string = n->u.data.data;
201 wrd->length = n->u.data.len;
203 else if (!strcmp (tlist->source, "tag") && n->which == DATA1N_tag)
205 wrd->string = n->u.tag.tag;
206 wrd->length = strlen(n->u.tag.tag);
208 else if (sscanf (tlist->source, "attr(%511[^)])", xattr) == 1 &&
209 n->which == DATA1N_tag)
211 data1_xattr *p = n->u.tag.attributes;
212 while (p && strcmp (p->name, xattr))
216 wrd->string = p->value;
217 wrd->length = strlen(p->value);
222 if (p->flagShowRecords)
225 printf("%*sIdx: [%s]", (level + 1) * 4, "",
227 printf("%s:%s [%d] %s",
228 tlist->att->parent->name,
229 tlist->att->name, tlist->att->value,
232 for (i = 0; i<wrd->length && i < 8; i++)
233 fputc (wrd->string[i], stdout);
237 fputc ('\n', stdout);
241 wrd->reg_type = *tlist->structure;
242 wrd->attrSet = (int) (tlist->att->parent->reference);
243 wrd->attrUse = tlist->att->locals->local;
250 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level,
253 for (; n; n = n->next)
255 if (p->flagShowRecords) /* display element description to user */
257 if (n->which == DATA1N_root)
259 printf("%*s", level * 4, "");
260 printf("Record type: '%s'\n", n->u.root.type);
262 else if (n->which == DATA1N_tag)
266 printf("%*s", level * 4, "");
267 if (!(e = n->u.tag.element))
268 printf("Local tag: '%s'\n", n->u.tag.tag);
271 printf("Elm: '%s' ", e->name);
274 data1_tag *t = e->tag;
276 printf("TagNam: '%s' ", t->names->name);
279 printf("%s[%d],", t->tagset->name, t->tagset->type);
282 if (t->which == DATA1T_numeric)
283 printf("%d)", t->value.numeric);
285 printf("'%s')", t->value.string);
292 if (n->which == DATA1N_tag)
294 index_termlist (n, n, p, level, wrd);
295 /* index start tag */
296 if (!n->root->u.root.absyn)
297 index_xpath (n, p, level, wrd, 1);
301 if (dumpkeys(n->child, p, level + 1, wrd) < 0)
305 if (n->which == DATA1N_data)
307 data1_node *par = get_parent_tag(p->dh, n);
309 if (p->flagShowRecords)
311 printf("%*s", level * 4, "");
313 if (n->u.data.len > 32)
314 printf("'%.24s ... %.6s'\n", n->u.data.data,
315 n->u.data.data + n->u.data.len-6);
316 else if (n->u.data.len > 0)
317 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
323 index_termlist (par, n, p, level, wrd);
324 if (!n->root->u.root.absyn)
325 index_xpath (n, p, level, wrd, 1016);
329 if (n->which == DATA1N_tag)
332 if (!n->root->u.root.absyn)
333 index_xpath (n, p, level, wrd, 2);
337 if (p->flagShowRecords && n->which == DATA1N_root)
339 printf("%*s-------------\n\n", level * 4, "");
345 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
348 int oidtmp[OID_SIZE];
351 oe.proto = PROTO_Z3950;
352 oe.oclass = CLASS_SCHEMA;
355 oe.value = n->u.root.absyn->reference;
357 if ((oid_ent_to_oid (&oe, oidtmp)))
358 (*p->schemaAdd)(p, oidtmp);
362 return dumpkeys(n, p, 0, &wrd);
365 static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
369 struct grs_read_info gri;
371 int oidtmp[OID_SIZE];
374 gri.readf = p->readf;
375 gri.seekf = p->seekf;
376 gri.tellf = p->tellf;
379 gri.offset = p->offset;
383 if (read_grs_type (h, &gri, p->subType, &n))
384 return RECCTRL_EXTRACT_ERROR;
386 return RECCTRL_EXTRACT_EOF;
387 oe.proto = PROTO_Z3950;
388 oe.oclass = CLASS_SCHEMA;
390 if (!n->u.root.absyn)
391 return RECCTRL_EXTRACT_ERROR;
395 oe.value = n->u.root.absyn->reference;
396 if ((oid_ent_to_oid (&oe, oidtmp)))
397 (*p->schemaAdd)(p, oidtmp);
400 data1_pr_tree (p->dh, n, stdout);
402 data1_iconv (p->dh, mem, n, "ISO-8859-1", "UTF-8");
405 if (dumpkeys(n, p, 0, &wrd) < 0)
407 data1_free_tree(p->dh, n);
408 return RECCTRL_EXTRACT_ERROR;
410 data1_free_tree(p->dh, n);
411 return RECCTRL_EXTRACT_OK;
414 static int grs_extract(void *clientData, struct recExtractCtrl *p)
417 NMEM mem = nmem_create ();
418 struct grs_handlers *h = (struct grs_handlers *) clientData;
420 ret = grs_extract_sub(h, p, mem);
426 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
428 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c)
430 data1_esetname *eset;
436 case Z_RecordComp_simple:
437 if (c->u.simple->which != Z_ElementSetNames_generic)
438 return 26; /* only generic form supported. Fix this later */
439 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
440 c->u.simple->u.generic)))
442 logf(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
443 return 25; /* invalid esetname */
445 logf(LOG_DEBUG, "Esetname '%s' in simple compspec",
446 c->u.simple->u.generic);
449 case Z_RecordComp_complex:
450 if (c->u.complex->generic)
452 /* insert check for schema */
453 if ((p = c->u.complex->generic->elementSpec))
457 case Z_ElementSpec_elementSetName:
459 data1_getesetbyname(dh, n->u.root.absyn,
460 p->u.elementSetName)))
462 logf(LOG_LOG, "Unknown esetname '%s'",
463 p->u.elementSetName);
464 return 25; /* invalid esetname */
466 logf(LOG_DEBUG, "Esetname '%s' in complex compspec",
467 p->u.elementSetName);
470 case Z_ElementSpec_externalSpec:
471 if (p->u.externalSpec->which == Z_External_espec1)
473 logf(LOG_DEBUG, "Got Espec-1");
474 espec = p->u.externalSpec-> u.espec1;
478 logf(LOG_LOG, "Unknown external espec.");
479 return 25; /* bad. what is proper diagnostic? */
490 logf (LOG_DEBUG, "Element: Espec-1 match");
491 return data1_doespec1(dh, n, espec);
495 logf (LOG_DEBUG, "Element: all match");
500 static void add_idzebra_info (struct recRetrieveCtrl *p, data1_node *top,
503 const char *idzebra_ns[7];
505 idzebra_ns[0] = "xmlns:idzebra";
506 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
509 data1_tag_add_attr (p->dh, mem, top, idzebra_ns);
511 data1_mk_tag_data_int (p->dh, top, "idzebra:size", p->recordSize,
514 data1_mk_tag_data_int (p->dh, top, "idzebra:score",
517 data1_mk_tag_data_int (p->dh, top, "idzebra:localnumber", p->localno,
520 data1_mk_tag_data_text(p->dh, top, "idzebra:filename",
524 static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
526 data1_node *node = 0, *onode = 0, *top;
529 int res, selected = 0;
531 struct grs_read_info gri;
533 struct grs_handlers *h = (struct grs_handlers *) clientData;
534 int requested_schema = VAL_NONE;
535 data1_marctab *marctab;
539 gri.readf = p->readf;
540 gri.seekf = p->seekf;
541 gri.tellf = p->tellf;
548 logf (LOG_DEBUG, "grs_retrieve");
549 if (read_grs_type (h, &gri, p->subType, &node))
562 data1_pr_tree (p->dh, node, stdout);
564 top = data1_get_root_tag (p->dh, node);
566 logf (LOG_DEBUG, "grs_retrieve: size");
567 if ((dnew = data1_mk_tag_data_wd(p->dh, top, "size", mem)))
569 dnew->u.data.what = DATA1I_text;
570 dnew->u.data.data = dnew->lbuf;
571 sprintf(dnew->u.data.data, "%d", p->recordSize);
572 dnew->u.data.len = strlen(dnew->u.data.data);
575 tagname = res_get_def(p->res, "tagrank", "rank");
576 if (strcmp(tagname, "0") && p->score >= 0 &&
577 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
579 logf (LOG_DEBUG, "grs_retrieve: %s", tagname);
580 dnew->u.data.what = DATA1I_num;
581 dnew->u.data.data = dnew->lbuf;
582 sprintf(dnew->u.data.data, "%d", p->score);
583 dnew->u.data.len = strlen(dnew->u.data.data);
586 tagname = res_get_def(p->res, "tagsysno", "localControlNumber");
587 if (strcmp(tagname, "0") && p->localno > 0 &&
588 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
590 logf (LOG_DEBUG, "grs_retrieve: %s", tagname);
591 dnew->u.data.what = DATA1I_text;
592 dnew->u.data.data = dnew->lbuf;
594 sprintf(dnew->u.data.data, "%d", p->localno);
595 dnew->u.data.len = strlen(dnew->u.data.data);
598 data1_pr_tree (p->dh, node, stdout);
600 if (p->comp && p->comp->which == Z_RecordComp_complex &&
601 p->comp->u.complex->generic &&
602 p->comp->u.complex->generic->schema)
604 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema);
606 requested_schema = oe->value;
609 /* If schema has been specified, map if possible, then check that
610 * we got the right one
612 if (requested_schema != VAL_NONE)
614 logf (LOG_DEBUG, "grs_retrieve: schema mapping");
615 for (map = node->u.root.absyn->maptabs; map; map = map->next)
617 if (map->target_absyn_ref == requested_schema)
620 if (!(node = data1_map_record(p->dh, onode, map, mem)))
629 if (node->u.root.absyn &&
630 requested_schema != node->u.root.absyn->reference)
638 * Does the requested format match a known syntax-mapping? (this reflects
639 * the overlap of schema and formatting which is inherent in the MARC
642 yaz_log (LOG_DEBUG, "grs_retrieve: syntax mapping");
643 if (node->u.root.absyn)
644 for (map = node->u.root.absyn->maptabs; map; map = map->next)
646 if (map->target_absyn_ref == p->input_format)
649 if (!(node = data1_map_record(p->dh, onode, map, mem)))
658 yaz_log (LOG_DEBUG, "grs_retrieve: schemaIdentifier");
659 if (node->u.root.absyn &&
660 node->u.root.absyn->reference != VAL_NONE &&
661 p->input_format == VAL_GRS1)
665 int oidtmp[OID_SIZE];
667 oe.proto = PROTO_Z3950;
668 oe.oclass = CLASS_SCHEMA;
669 oe.value = node->u.root.absyn->reference;
671 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
674 data1_handle dh = p->dh;
678 for (ii = oid; *ii >= 0; ii++)
682 sprintf(p, "%d", *ii);
687 if ((dnew = data1_mk_tag_data_wd(dh, node,
688 "schemaIdentifier", mem)))
690 dnew->u.data.what = DATA1I_oid;
691 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
692 memcpy(dnew->u.data.data, tmp, p - tmp);
693 dnew->u.data.len = p - tmp;
698 logf (LOG_DEBUG, "grs_retrieve: element spec");
699 if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0)
703 data1_free_tree(p->dh, onode);
704 data1_free_tree(p->dh, node);
708 else if (p->comp && !res)
712 data1_pr_tree (p->dh, node, stdout);
715 data1_iconv (p->dh, mem, node, "ISO-8859-1", "UTF-8");
717 logf (LOG_DEBUG, "grs_retrieve: transfer syntax mapping");
718 switch (p->output_format = (p->input_format != VAL_NONE ?
719 p->input_format : VAL_SUTRS))
723 add_idzebra_info (p, top, mem);
725 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
730 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
731 memcpy (new_buf, p->rec_buf, p->rec_len);
732 p->rec_buf = new_buf;
737 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
739 p->diagnostic = 238; /* not available in requested syntax */
741 p->rec_len = (size_t) (-1);
744 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
748 p->rec_len = (size_t) (-1);
751 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
755 p->rec_len = (size_t) (-1);
758 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
763 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
764 memcpy (new_buf, p->rec_buf, p->rec_len);
765 p->rec_buf = new_buf;
769 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
774 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
775 memcpy (new_buf, p->rec_buf, p->rec_len);
776 p->rec_buf = new_buf;
780 if (!node->u.root.absyn)
785 for (marctab = node->u.root.absyn->marc; marctab;
786 marctab = marctab->next)
787 if (marctab->reference == p->input_format)
794 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
795 selected, &p->rec_len)))
799 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
800 memcpy (new_buf, p->rec_buf, p->rec_len);
801 p->rec_buf = new_buf;
805 data1_free_tree(p->dh, node);
807 data1_free_tree(p->dh, onode);
812 static struct recType grs_type =
821 RecType recTypeGrs = &grs_type;