2 * Copyright (C) 1994-2002, Index Data
5 * $Id: recgrs.c,v 1.51 2002-05-28 21:10:34 adam Exp $
10 #include <sys/types.h>
21 #define GRS_MAX_WORD 512
27 struct grs_handler *next;
31 struct grs_handler *handlers;
34 static int read_grs_type (struct grs_handlers *h,
35 struct grs_read_info *p, const char *type,
38 struct grs_handler *gh = h->handlers;
39 const char *cp = strchr (type, '.');
41 if (cp == NULL || cp == type)
43 cp = strlen(type) + type;
47 strcpy (p->type, cp+1);
48 for (gh = h->handlers; gh; gh = gh->next)
50 if (!memcmp (type, gh->type->type, cp-type))
55 gh->clientData = (*gh->type->init)();
57 p->clientData = gh->clientData;
58 *root = (gh->type->read)(p);
59 gh->clientData = p->clientData;
66 static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t)
68 struct grs_handler *gh = (struct grs_handler *) xmalloc (sizeof(*gh));
69 gh->next = h->handlers;
76 static void *grs_init(RecType recType)
78 struct grs_handlers *h = (struct grs_handlers *) xmalloc (sizeof(*h));
81 grs_add_handler (h, recTypeGrs_sgml);
82 grs_add_handler (h, recTypeGrs_regx);
84 grs_add_handler (h, recTypeGrs_tcl);
86 grs_add_handler (h, recTypeGrs_marc);
88 grs_add_handler (h, recTypeGrs_xml);
93 static void grs_destroy(void *clientData)
95 struct grs_handlers *h = (struct grs_handlers *) clientData;
96 struct grs_handler *gh = h->handlers, *gh_next;
101 (*gh->type->destroy)(gh->clientData);
108 static void index_xpath (data1_node *n, struct recExtractCtrl *p,
109 int level, RecWord *wrd, int use)
112 char tag_path_full[1024];
120 wrd->string = n->u.data.data;
121 wrd->length = n->u.data.len;
122 wrd->attrSet = VAL_IDXPATH,
124 if (p->flagShowRecords)
126 printf("%*s data=", (level + 1) * 4, "");
127 for (i = 0; i<wrd->length && i < 8; i++)
128 fputc (wrd->string[i], stdout);
137 for (nn = n; nn; nn = nn->parent)
139 if (nn->which == DATA1N_tag)
141 size_t tlen = strlen(nn->u.tag.tag);
142 if (tlen + flen > (sizeof(tag_path_full)-2))
144 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
146 tag_path_full[flen++] = '/';
148 else if (nn->which == DATA1N_root)
150 size_t tlen = strlen(nn->u.root.type);
151 if (tlen + flen > (sizeof(tag_path_full)-2))
153 memcpy (tag_path_full + flen, nn->u.root.type, tlen);
155 tag_path_full[flen++] = '/';
160 wrd->string = tag_path_full;
162 wrd->attrSet = VAL_IDXPATH,
164 if (p->flagShowRecords)
166 printf("%*s tag=", (level + 1) * 4, "");
167 for (i = 0; i<wrd->length && i < 40; i++)
168 fputc (wrd->string[i], stdout);
181 static void index_termlist (data1_node *par, data1_node *n,
182 struct recExtractCtrl *p, int level, RecWord *wrd)
184 data1_termlist *tlist = 0;
185 data1_datatype dtype = DATA1K_string;
187 * cycle up towards the root until we find a tag with an att..
188 * this has the effect of indexing locally defined tags with
189 * the attribute of their ancestor in the record.
192 while (!par->u.tag.element)
193 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
195 if (!par || !(tlist = par->u.tag.element->termlists))
197 if (par->u.tag.element->tag)
198 dtype = par->u.tag.element->tag->kind;
200 for (; tlist; tlist = tlist->next)
203 /* consider source */
206 if (!strcmp (tlist->source, "data") && n->which == DATA1N_data)
208 wrd->string = n->u.data.data;
209 wrd->length = n->u.data.len;
211 else if (!strcmp (tlist->source, "tag") && n->which == DATA1N_tag)
213 wrd->string = n->u.tag.tag;
214 wrd->length = strlen(n->u.tag.tag);
216 else if (sscanf (tlist->source, "attr(%511[^)])", xattr) == 1 &&
217 n->which == DATA1N_tag)
219 data1_xattr *p = n->u.tag.attributes;
220 while (p && strcmp (p->name, xattr))
224 wrd->string = p->value;
225 wrd->length = strlen(p->value);
230 if (p->flagShowRecords)
233 printf("%*sIdx: [%s]", (level + 1) * 4, "",
235 printf("%s:%s [%d] %s",
236 tlist->att->parent->name,
237 tlist->att->name, tlist->att->value,
240 for (i = 0; i<wrd->length && i < 8; i++)
241 fputc (wrd->string[i], stdout);
245 fputc ('\n', stdout);
249 wrd->reg_type = *tlist->structure;
250 wrd->attrSet = (int) (tlist->att->parent->reference);
251 wrd->attrUse = tlist->att->locals->local;
258 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level,
261 for (; n; n = n->next)
263 if (p->flagShowRecords) /* display element description to user */
265 if (n->which == DATA1N_root)
267 printf("%*s", level * 4, "");
268 printf("Record type: '%s'\n", n->u.root.type);
270 else if (n->which == DATA1N_tag)
274 printf("%*s", level * 4, "");
275 if (!(e = n->u.tag.element))
276 printf("Local tag: '%s'\n", n->u.tag.tag);
279 printf("Elm: '%s' ", e->name);
282 data1_tag *t = e->tag;
284 printf("TagNam: '%s' ", t->names->name);
287 printf("%s[%d],", t->tagset->name, t->tagset->type);
290 if (t->which == DATA1T_numeric)
291 printf("%d)", t->value.numeric);
293 printf("'%s')", t->value.string);
300 if (n->which == DATA1N_tag)
302 index_termlist (n, n, p, level, wrd);
303 /* index start tag */
304 if (!n->root->u.root.absyn)
305 index_xpath (n, p, level, wrd, 1);
309 if (dumpkeys(n->child, p, level + 1, wrd) < 0)
313 if (n->which == DATA1N_data)
315 data1_node *par = get_parent_tag(p->dh, n);
317 if (p->flagShowRecords)
319 printf("%*s", level * 4, "");
321 if (n->u.data.len > 32)
322 printf("'%.24s ... %.6s'\n", n->u.data.data,
323 n->u.data.data + n->u.data.len-6);
324 else if (n->u.data.len > 0)
325 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
331 index_termlist (par, n, p, level, wrd);
332 if (!n->root->u.root.absyn)
333 index_xpath (n, p, level, wrd, 1016);
337 if (n->which == DATA1N_tag)
340 if (!n->root->u.root.absyn)
341 index_xpath (n, p, level, wrd, 2);
345 if (p->flagShowRecords && n->which == DATA1N_root)
347 printf("%*s-------------\n\n", level * 4, "");
353 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
356 int oidtmp[OID_SIZE];
359 oe.proto = PROTO_Z3950;
360 oe.oclass = CLASS_SCHEMA;
363 oe.value = n->u.root.absyn->reference;
365 if ((oid_ent_to_oid (&oe, oidtmp)))
366 (*p->schemaAdd)(p, oidtmp);
369 return dumpkeys(n, p, 0, &wrd);
372 static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
376 struct grs_read_info gri;
378 int oidtmp[OID_SIZE];
381 gri.readf = p->readf;
382 gri.seekf = p->seekf;
383 gri.tellf = p->tellf;
386 gri.offset = p->offset;
390 if (read_grs_type (h, &gri, p->subType, &n))
391 return RECCTRL_EXTRACT_ERROR;
393 return RECCTRL_EXTRACT_EOF;
394 oe.proto = PROTO_Z3950;
395 oe.oclass = CLASS_SCHEMA;
397 if (!n->u.root.absyn)
398 return RECCTRL_EXTRACT_ERROR;
402 oe.value = n->u.root.absyn->reference;
403 if ((oid_ent_to_oid (&oe, oidtmp)))
404 (*p->schemaAdd)(p, oidtmp);
407 data1_pr_tree (p->dh, n, stdout);
410 if (dumpkeys(n, p, 0, &wrd) < 0)
412 data1_free_tree(p->dh, n);
413 return RECCTRL_EXTRACT_ERROR;
415 data1_free_tree(p->dh, n);
416 return RECCTRL_EXTRACT_OK;
419 static int grs_extract(void *clientData, struct recExtractCtrl *p)
422 NMEM mem = nmem_create ();
423 struct grs_handlers *h = (struct grs_handlers *) clientData;
425 ret = grs_extract_sub(h, p, mem);
431 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
433 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c)
435 data1_esetname *eset;
441 case Z_RecordComp_simple:
442 if (c->u.simple->which != Z_ElementSetNames_generic)
443 return 26; /* only generic form supported. Fix this later */
444 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
445 c->u.simple->u.generic)))
447 logf(LOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
448 return 25; /* invalid esetname */
450 logf(LOG_DEBUG, "Esetname '%s' in simple compspec",
451 c->u.simple->u.generic);
454 case Z_RecordComp_complex:
455 if (c->u.complex->generic)
457 /* insert check for schema */
458 if ((p = c->u.complex->generic->elementSpec))
462 case Z_ElementSpec_elementSetName:
464 data1_getesetbyname(dh, n->u.root.absyn,
465 p->u.elementSetName)))
467 logf(LOG_LOG, "Unknown esetname '%s'",
468 p->u.elementSetName);
469 return 25; /* invalid esetname */
471 logf(LOG_DEBUG, "Esetname '%s' in complex compspec",
472 p->u.elementSetName);
475 case Z_ElementSpec_externalSpec:
476 if (p->u.externalSpec->which == Z_External_espec1)
478 logf(LOG_DEBUG, "Got Espec-1");
479 espec = p->u.externalSpec-> u.espec1;
483 logf(LOG_LOG, "Unknown external espec.");
484 return 25; /* bad. what is proper diagnostic? */
495 logf (LOG_DEBUG, "Element: Espec-1 match");
496 return data1_doespec1(dh, n, espec);
500 logf (LOG_DEBUG, "Element: all match");
505 static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
507 data1_node *node = 0, *onode = 0;
510 int res, selected = 0;
512 struct grs_read_info gri;
514 struct grs_handlers *h = (struct grs_handlers *) clientData;
515 int requested_schema = VAL_NONE;
518 gri.readf = p->readf;
519 gri.seekf = p->seekf;
520 gri.tellf = p->tellf;
527 logf (LOG_DEBUG, "grs_retrieve");
528 if (read_grs_type (h, &gri, p->subType, &node))
541 data1_pr_tree (p->dh, node, stdout);
543 logf (LOG_DEBUG, "grs_retrieve: size");
544 if ((dnew = data1_mk_tag_data_wd(p->dh, node, "size", mem)))
546 dnew->u.data.what = DATA1I_text;
547 dnew->u.data.data = dnew->lbuf;
548 sprintf(dnew->u.data.data, "%d", p->recordSize);
549 dnew->u.data.len = strlen(dnew->u.data.data);
552 tagname = res_get_def(p->res, "tagrank", "rank");
553 if (strcmp(tagname, "0") && p->score >= 0 &&
554 (dnew = data1_mk_tag_data_wd(p->dh, node, tagname, mem)))
556 logf (LOG_DEBUG, "grs_retrieve: %s", tagname);
557 dnew->u.data.what = DATA1I_num;
558 dnew->u.data.data = dnew->lbuf;
559 sprintf(dnew->u.data.data, "%d", p->score);
560 dnew->u.data.len = strlen(dnew->u.data.data);
563 tagname = res_get_def(p->res, "tagsysno", "localControlNumber");
564 if (strcmp(tagname, "0") && p->localno > 0 &&
565 (dnew = data1_mk_tag_data_wd(p->dh, node, tagname, mem)))
567 logf (LOG_DEBUG, "grs_retrieve: %s", tagname);
568 dnew->u.data.what = DATA1I_text;
569 dnew->u.data.data = dnew->lbuf;
570 sprintf(dnew->u.data.data, "%d", p->localno);
571 dnew->u.data.len = strlen(dnew->u.data.data);
574 data1_pr_tree (p->dh, node, stdout);
576 if (p->comp && p->comp->which == Z_RecordComp_complex &&
577 p->comp->u.complex->generic &&
578 p->comp->u.complex->generic->schema)
580 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema);
582 requested_schema = oe->value;
585 /* If schema has been specified, map if possible, then check that
586 * we got the right one
588 if (requested_schema != VAL_NONE)
590 logf (LOG_DEBUG, "grs_retrieve: schema mapping");
591 for (map = node->u.root.absyn->maptabs; map; map = map->next)
593 if (map->target_absyn_ref == requested_schema)
596 if (!(node = data1_map_record(p->dh, onode, map, mem)))
605 if (node->u.root.absyn &&
606 requested_schema != node->u.root.absyn->reference)
614 * Does the requested format match a known syntax-mapping? (this reflects
615 * the overlap of schema and formatting which is inherent in the MARC
618 yaz_log (LOG_DEBUG, "grs_retrieve: syntax mapping");
619 if (node->u.root.absyn)
620 for (map = node->u.root.absyn->maptabs; map; map = map->next)
622 if (map->target_absyn_ref == p->input_format)
625 if (!(node = data1_map_record(p->dh, onode, map, mem)))
634 yaz_log (LOG_DEBUG, "grs_retrieve: schemaIdentifier");
635 if (node->u.root.absyn &&
636 node->u.root.absyn->reference != VAL_NONE &&
637 p->input_format == VAL_GRS1)
641 int oidtmp[OID_SIZE];
643 oe.proto = PROTO_Z3950;
644 oe.oclass = CLASS_SCHEMA;
645 oe.value = node->u.root.absyn->reference;
647 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
650 data1_handle dh = p->dh;
654 for (ii = oid; *ii >= 0; ii++)
658 sprintf(p, "%d", *ii);
663 if ((dnew = data1_mk_tag_data_wd(dh, node,
664 "schemaIdentifier", mem)))
666 dnew->u.data.what = DATA1I_oid;
667 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
668 memcpy(dnew->u.data.data, tmp, p - tmp);
669 dnew->u.data.len = p - tmp;
674 logf (LOG_DEBUG, "grs_retrieve: element spec");
675 if (p->comp && (res = process_comp(p->dh, node, p->comp)) > 0)
679 data1_free_tree(p->dh, onode);
680 data1_free_tree(p->dh, node);
684 else if (p->comp && !res)
688 data1_pr_tree (p->dh, node, stdout);
690 logf (LOG_DEBUG, "grs_retrieve: transfer syntax mapping");
691 switch (p->output_format = (p->input_format != VAL_NONE ?
692 p->input_format : VAL_SUTRS))
694 data1_marctab *marctab;
698 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
703 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
704 memcpy (new_buf, p->rec_buf, p->rec_len);
705 p->rec_buf = new_buf;
710 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
712 p->diagnostic = 238; /* not available in requested syntax */
714 p->rec_len = (size_t) (-1);
717 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
721 p->rec_len = (size_t) (-1);
724 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
728 p->rec_len = (size_t) (-1);
731 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
736 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
737 memcpy (new_buf, p->rec_buf, p->rec_len);
738 p->rec_buf = new_buf;
742 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
747 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
748 memcpy (new_buf, p->rec_buf, p->rec_len);
749 p->rec_buf = new_buf;
753 if (!node->u.root.absyn)
758 for (marctab = node->u.root.absyn->marc; marctab;
759 marctab = marctab->next)
760 if (marctab->reference == p->input_format)
767 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
768 selected, &p->rec_len)))
772 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
773 memcpy (new_buf, p->rec_buf, p->rec_len);
774 p->rec_buf = new_buf;
778 data1_free_tree(p->dh, node);
780 data1_free_tree(p->dh, onode);
785 static struct recType grs_type =
794 RecType recTypeGrs = &grs_type;