1 /* $Id: recgrs.c,v 1.100 2005-03-05 09:19:15 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 #include <sys/types.h>
32 #include <idzebra/recgrs.h>
34 #define GRS_MAX_WORD 512
36 struct source_parser {
44 static int sp_lex(struct source_parser *sp)
46 while (*sp->src == ' ')
50 while (*sp->src && !strchr("<>();,-: ", *sp->src))
59 sp->lookahead = *sp->src;
66 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd);
68 static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
75 if (sp->lookahead != '(')
77 sp_lex(sp); /* skip ( */
80 if (!sp_expr(sp, n, wrd))
83 if (sp->lookahead != ',')
85 sp_lex(sp); /* skip , */
88 if (!sp_expr(sp, n, &tmp_w))
90 start = atoi_n(tmp_w.term_buf, tmp_w.term_len);
92 if (sp->lookahead == ',')
94 sp_lex(sp); /* skip , */
97 if (!sp_expr(sp, n, &tmp_w))
99 len = atoi_n(tmp_w.term_buf, tmp_w.term_len);
105 if (sp->lookahead != ')')
109 if (wrd->term_buf && wrd->term_len)
111 wrd->term_buf += start;
112 wrd->term_len -= start;
113 if (wrd->term_len > len)
119 static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
124 if (sp->lookahead != '(')
126 sp_lex(sp); /* skip ( */
127 if (!sp_expr(sp, n, wrd))
129 while (sp->lookahead == ',')
133 sp_lex(sp); /* skip , */
135 if (!sp_expr(sp, n, &search_w))
137 for (i = 0; i<wrd->term_len; i++)
140 for (j = 0; j<search_w.term_len && i+j < wrd->term_len; j++)
141 if (wrd->term_buf[i+j] != search_w.term_buf[j])
143 if (j == search_w.term_len) /* match ? */
145 if (min_pos == -1 || i < min_pos)
151 if (sp->lookahead != ')')
155 min_pos = 0; /* the default if not found */
156 sprintf(num_str, "%d", min_pos);
157 wrd->term_buf = nmem_strdup(sp->nmem, num_str);
158 wrd->term_len = strlen(wrd->term_buf);
162 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
164 if (sp->lookahead != 't')
166 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
168 if (n->which == DATA1N_data)
170 wrd->term_buf = n->u.data.data;
171 wrd->term_len = n->u.data.len;
175 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
177 if (n->which == DATA1N_tag)
179 wrd->term_buf = n->u.tag.tag;
180 wrd->term_len = strlen(n->u.tag.tag);
184 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
188 if (sp->lookahead != '(')
192 if (!sp_expr(sp, n, &tmp_w))
197 if (n->which == DATA1N_tag)
199 data1_xattr *p = n->u.tag.attributes;
200 while (p && strlen(p->name) != tmp_w.term_len &&
201 memcmp (p->name, tmp_w.term_buf, tmp_w.term_len))
205 wrd->term_buf = p->value;
206 wrd->term_len = strlen(p->value);
209 if (sp->lookahead != ')')
213 else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len))
215 return sp_first(sp, n, wrd);
217 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
219 return sp_range(sp, n, wrd);
221 else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok))
223 wrd->term_buf = nmem_malloc(sp->nmem, sp->len);
224 memcpy(wrd->term_buf, sp->tok, sp->len);
225 wrd->term_len = sp->len;
228 else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'')
230 wrd->term_len = sp->len - 2;
231 wrd->term_buf = nmem_malloc(sp->nmem, wrd->term_len);
232 memcpy(wrd->term_buf, sp->tok+1, wrd->term_len);
244 static struct source_parser *source_parser_create()
246 struct source_parser *sp = xmalloc(sizeof(*sp));
248 sp->nmem = nmem_create();
252 static void source_parser_destroy(struct source_parser *sp)
256 nmem_destroy(sp->nmem);
260 static int sp_parse(struct source_parser *sp,
261 data1_node *n, RecWord *wrd, const char *src)
267 nmem_reset(sp->nmem);
270 return sp_expr(sp, n, wrd);
273 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
282 if (p->which == XPATH_PREDICATE_RELATION) {
283 if (p->u.relation.name[0]) {
284 if (*p->u.relation.name != '@') {
286 " Only attributes (@) are supported in xelm xpath predicates");
287 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
290 attname = p->u.relation.name + 1;
292 /* looking for the attribute with a specified name */
293 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
294 yaz_log(YLOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
296 if (!strcmp(attr->name, attname)) {
297 if (p->u.relation.op[0]) {
298 if (*p->u.relation.op != '=') {
300 "Only '=' relation is supported (%s)",p->u.relation.op);
301 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
304 yaz_log(YLOG_DEBUG," - value %s <-> %s",
305 p->u.relation.value, attr->value );
306 if (!strcmp(attr->value, p->u.relation.value)) {
311 /* attribute exists, no value specified */
316 yaz_log(YLOG_DEBUG, "return %d", res);
322 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
323 if (!strcmp(p->u.boolean.op,"and")) {
324 return d1_check_xpath_predicate(n, p->u.boolean.left)
325 && d1_check_xpath_predicate(n, p->u.boolean.right);
327 else if (!strcmp(p->u.boolean.op,"or")) {
328 return (d1_check_xpath_predicate(n, p->u.boolean.left)
329 || d1_check_xpath_predicate(n, p->u.boolean.right));
331 yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
342 New function, looking for xpath "element" definitions in abs, by
343 tagpath, using a kind of ugly regxp search.The DFA was built while
344 parsing abs, so here we just go trough them and try to match
345 against the given tagpath. The first matching entry is returned.
349 Added support for enhanced xelm. Now [] predicates are considered
350 as well, when selecting indexing rules... (why the hell it's called
357 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
359 data1_absyn *abs = n->root->u.root.absyn;
360 data1_xpelement *xpe = abs->xp_elements;
363 struct xpath_location_step *xp;
366 char *pexpr = xmalloc(strlen(tagpath)+2);
369 sprintf (pexpr, "%s\n", tagpath);
370 yaz_log(YLOG_DEBUG,"Checking tagpath %s",tagpath);
373 struct DFA_state **dfaar = xpe->dfa->states;
374 struct DFA_state *s=dfaar[0];
381 c = *pexpr++; t = s->trans; i = s->tran_no;
382 if ((c >= t->ch[0] && c <= t->ch[1]) || (!t->ch[0])) {
385 if ((s = dfaar[t->to])->rule_no &&
386 (start_line || s->rule_nno)) {
390 for (t=s->trans, i=s->tran_no; --i >= 0; t++) {
391 if ((unsigned) *p >= t->ch[0] && (unsigned) *p <= t->ch[1])
398 yaz_log(YLOG_DEBUG," xpath match %s",xpe->xpath_expr);
400 yaz_log(YLOG_DEBUG," xpath no match %s",xpe->xpath_expr);
405 /* we have to check the perdicates up to the root node */
408 /* find the first tag up in the node structure */
409 nn = n; while (nn && nn->which != DATA1N_tag) {
413 /* go from inside out in the node structure, while going
414 backwards trough xpath location steps ... */
415 for (i=xpe->xpath_len - 1; i>0; i--) {
417 yaz_log(YLOG_DEBUG,"Checking step %d: %s on tag %s",
418 i,xp[i].part,nn->u.tag.tag);
420 if (!d1_check_xpath_predicate(nn, xp[i].predicate)) {
421 yaz_log(YLOG_DEBUG," Predicates didn't match");
426 if (nn->which == DATA1N_tag) {
441 yaz_log(YLOG_DEBUG,"Got it");
442 return xpe->termlists;
449 1 start element (tag)
451 3 start attr (and attr-exact)
459 Now, if there is a matching xelm described in abs, for the
460 indexed element or the attribute, then the data is handled according
461 to those definitions...
463 modified by pop, 2002-12-13
466 /* add xpath index for an attribute */
467 static void index_xpath_attr (char *tag_path, char *name, char *value,
468 char *structure, struct recExtractCtrl *p,
471 wrd->attrSet = VAL_IDXPATH;
474 wrd->term_buf = tag_path;
475 wrd->term_len = strlen(tag_path);
481 wrd->term_buf = value;
482 wrd->term_len = strlen(value);
488 wrd->term_buf = tag_path;
489 wrd->term_len = strlen(tag_path);
494 static void index_xpath (struct source_parser *sp, data1_node *n,
495 struct recExtractCtrl *p,
496 int level, RecWord *wrd, int use)
499 char tag_path_full[1024];
502 int termlist_only = 1;
506 yaz_log(YLOG_DEBUG, "index_xpath level=%d use=%d", level, use);
507 if ((!n->root->u.root.absyn) ||
508 (n->root->u.root.absyn->enable_xpath_indexing)) {
515 wrd->term_buf = n->u.data.data;
516 wrd->term_len = n->u.data.len;
520 /* we have to fetch the whole path to the data tag */
521 for (nn = n; nn; nn = nn->parent)
523 if (nn->which == DATA1N_tag)
525 size_t tlen = strlen(nn->u.tag.tag);
526 if (tlen + flen > (sizeof(tag_path_full)-2))
528 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
530 tag_path_full[flen++] = '/';
533 if (nn->which == DATA1N_root)
537 tag_path_full[flen] = 0;
539 /* If we have a matching termlist... */
540 if (n->root->u.root.absyn &&
541 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
543 for (; tl; tl = tl->next)
545 /* need to copy recword because it may be changed */
547 wrd->reg_type = *tl->structure;
548 /* this is the ! case, so structure is for the xpath index */
549 memcpy (&wrd_tl, wrd, sizeof(*wrd));
551 sp_parse(sp, n, &wrd_tl, tl->source);
553 wrd_tl.attrSet = VAL_IDXPATH;
554 wrd_tl.attrUse = use;
555 if (p->flagShowRecords)
558 printf("%*sXPath index", (level + 1) * 4, "");
559 printf (" XData:\"");
560 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
561 fputc (wrd_tl.term_buf[i], stdout);
563 if (wrd_tl.term_len > 40)
565 fputc ('\n', stdout);
568 (*p->tokenAdd)(&wrd_tl);
571 /* this is just the old fashioned attribute based index */
572 wrd_tl.attrSet = (int) (tl->att->parent->reference);
573 wrd_tl.attrUse = tl->att->locals->local;
574 if (p->flagShowRecords)
577 printf("%*sIdx: [%s]", (level + 1) * 4, "",
579 printf("%s:%s [%d] %s",
580 tl->att->parent->name,
581 tl->att->name, tl->att->value,
583 printf (" XData:\"");
584 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
585 fputc (wrd_tl.term_buf[i], stdout);
587 if (wrd_tl.term_len > 40)
589 fputc ('\n', stdout);
592 (*p->tokenAdd)(&wrd_tl);
596 /* xpath indexing is done, if there was no termlist given,
597 or no ! in the termlist, and default indexing is enabled... */
598 if (!p->flagShowRecords && !xpdone && !termlist_only)
600 wrd->attrSet = VAL_IDXPATH;
608 for (nn = n; nn; nn = nn->parent)
610 if (nn->which == DATA1N_tag)
612 size_t tlen = strlen(nn->u.tag.tag);
613 if (tlen + flen > (sizeof(tag_path_full)-2))
615 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
617 tag_path_full[flen++] = '/';
619 else if (nn->which == DATA1N_root)
625 wrd->term_buf = tag_path_full;
626 wrd->term_len = flen;
627 wrd->attrSet = VAL_IDXPATH;
629 if (p->flagShowRecords)
631 printf("%*s tag=", (level + 1) * 4, "");
632 for (i = 0; i<wrd->term_len && i < 40; i++)
633 fputc (wrd->term_buf[i], stdout);
644 tag_path_full[flen] = 0;
646 /* Add tag start/end xpath index, only when there is a ! in the apropriate xelm
647 directive, or default xpath indexing is enabled */
648 if (!(do_xpindex = 1 - termlist_only)) {
649 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n))) {
650 for (; tl; tl = tl->next) { if (!tl->att) {do_xpindex = 1;} }
654 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
657 if (use == 1) /* only for the starting tag... */
659 #define MAX_ATTR_COUNT 50
660 data1_termlist *tll[MAX_ATTR_COUNT];
664 /* get termlists for attributes, and find out, if we have to do xpath indexing */
665 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
670 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
672 int do_xpindex = 1 - termlist_only;
674 char attr_tag_path_full[1024];
677 /* this could be cached as well */
678 sprintf (attr_tag_path_full, "@%s/%.*s",
679 xp->name, int_len, tag_path_full);
681 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
683 /* if there is a ! in the xelm termlist, or default indexing is on,
684 proceed with xpath idx */
687 for (; tl; tl = tl->next)
696 /* attribute (no value) */
699 wrd->term_buf = xp->name;
700 wrd->term_len = strlen(xp->name);
706 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
708 /* attribute value exact */
709 strcpy (comb, xp->name);
711 strcat (comb, xp->value);
715 wrd->term_buf = comb;
716 wrd->term_len = strlen(comb);
726 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
728 char attr_tag_path_full[1024];
732 sprintf (attr_tag_path_full, "@%s/%.*s",
733 xp->name, int_len, tag_path_full);
737 /* If there is a termlist given (=xelm directive) */
738 for (; tl; tl = tl->next)
741 /* add xpath index for the attribute */
742 index_xpath_attr (attr_tag_path_full, xp->name,
743 xp->value, tl->structure,
747 /* add attribute based index for the attribute */
750 (tl->att->parent->reference);
751 wrd->attrUse = tl->att->locals->local;
752 wrd->reg_type = *tl->structure;
753 wrd->term_buf = xp->value;
754 wrd->term_len = strlen(xp->value);
760 /* if there was no termlist for the given path,
761 or the termlist didn't have a ! element, index
762 the attribute as "w" */
763 if ((!xpdone) && (!termlist_only))
765 index_xpath_attr (attr_tag_path_full, xp->name,
766 xp->value, "w", p, wrd);
775 static void index_termlist (struct source_parser *sp, data1_node *par,
777 struct recExtractCtrl *p, int level, RecWord *wrd)
779 data1_termlist *tlist = 0;
780 data1_datatype dtype = DATA1K_string;
783 * cycle up towards the root until we find a tag with an att..
784 * this has the effect of indexing locally defined tags with
785 * the attribute of their ancestor in the record.
788 while (!par->u.tag.element)
789 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
791 if (!par || !(tlist = par->u.tag.element->termlists))
793 if (par->u.tag.element->tag)
794 dtype = par->u.tag.element->tag->kind;
796 for (; tlist; tlist = tlist->next)
798 /* consider source */
800 assert(tlist->source);
801 sp_parse(sp, n, wrd, tlist->source);
803 if (wrd->term_buf && wrd->term_len)
805 if (p->flagShowRecords)
808 printf("%*sIdx: [%s]", (level + 1) * 4, "",
810 printf("%s:%s [%d] %s",
811 tlist->att->parent->name,
812 tlist->att->name, tlist->att->value,
814 printf (" XData:\"");
815 for (i = 0; i<wrd->term_len && i < 40; i++)
816 fputc (wrd->term_buf[i], stdout);
818 if (wrd->term_len > 40)
820 fputc ('\n', stdout);
824 wrd->reg_type = *tlist->structure;
825 wrd->attrSet = (int) (tlist->att->parent->reference);
826 wrd->attrUse = tlist->att->locals->local;
833 static int dumpkeys_r(struct source_parser *sp,
834 data1_node *n, struct recExtractCtrl *p, int level,
837 for (; n; n = n->next)
839 if (p->flagShowRecords) /* display element description to user */
841 if (n->which == DATA1N_root)
843 printf("%*s", level * 4, "");
844 printf("Record type: '%s'\n", n->u.root.type);
846 else if (n->which == DATA1N_tag)
850 printf("%*s", level * 4, "");
851 if (!(e = n->u.tag.element))
852 printf("Local tag: '%s'\n", n->u.tag.tag);
855 printf("Elm: '%s' ", e->name);
858 data1_tag *t = e->tag;
860 printf("TagNam: '%s' ", t->names->name);
863 printf("%s[%d],", t->tagset->name, t->tagset->type);
866 if (t->which == DATA1T_numeric)
867 printf("%d)", t->value.numeric);
869 printf("'%s')", t->value.string);
876 if (n->which == DATA1N_tag)
878 index_termlist(sp, n, n, p, level, wrd);
879 /* index start tag */
880 if (n->root->u.root.absyn)
881 index_xpath(sp, n, p, level, wrd, 1);
885 if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0)
889 if (n->which == DATA1N_data)
891 data1_node *par = get_parent_tag(p->dh, n);
893 if (p->flagShowRecords)
895 printf("%*s", level * 4, "");
897 if (n->u.data.len > 256)
898 printf("'%.170s ... %.70s'\n", n->u.data.data,
899 n->u.data.data + n->u.data.len-70);
900 else if (n->u.data.len > 0)
901 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
907 index_termlist(sp, par, n, p, level, wrd);
909 index_xpath(sp, n, p, level, wrd, 1016);
912 if (n->which == DATA1N_tag)
915 index_xpath(sp, n, p, level, wrd, 2);
918 if (p->flagShowRecords && n->which == DATA1N_root)
920 printf("%*s-------------\n\n", level * 4, "");
926 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
928 struct source_parser *sp = source_parser_create();
929 int r = dumpkeys_r(sp, n, p, 0, wrd);
930 source_parser_destroy(sp);
934 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
937 int oidtmp[OID_SIZE];
940 oe.proto = PROTO_Z3950;
941 oe.oclass = CLASS_SCHEMA;
944 oe.value = n->u.root.absyn->reference;
946 if ((oid_ent_to_oid (&oe, oidtmp)))
947 (*p->schemaAdd)(p, oidtmp);
951 return dumpkeys(n, p, &wrd);
954 static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
956 data1_node *(*grs_read)(struct grs_read_info *))
959 struct grs_read_info gri;
961 int oidtmp[OID_SIZE];
964 gri.readf = p->readf;
965 gri.seekf = p->seekf;
966 gri.tellf = p->tellf;
969 gri.offset = p->offset;
972 gri.clientData = clientData;
974 n = (*grs_read)(&gri);
976 return RECCTRL_EXTRACT_EOF;
977 oe.proto = PROTO_Z3950;
978 oe.oclass = CLASS_SCHEMA;
980 if (!n->u.root.absyn)
981 return RECCTRL_EXTRACT_ERROR;
985 oe.value = n->u.root.absyn->reference;
986 if ((oid_ent_to_oid (&oe, oidtmp)))
987 (*p->schemaAdd)(p, oidtmp);
989 data1_concat_text(p->dh, mem, n);
991 /* ensure our data1 tree is UTF-8 */
992 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
995 data1_pr_tree (p->dh, n, stdout);
999 if (dumpkeys(n, p, &wrd) < 0)
1001 data1_free_tree(p->dh, n);
1002 return RECCTRL_EXTRACT_ERROR_GENERIC;
1004 data1_free_tree(p->dh, n);
1005 return RECCTRL_EXTRACT_OK;
1008 int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
1009 data1_node *(*grs_read)(struct grs_read_info *))
1012 NMEM mem = nmem_create ();
1013 ret = grs_extract_sub(clientData, p, mem, grs_read);
1019 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1021 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c,
1022 char **addinfo, ODR o)
1024 data1_esetname *eset;
1025 Z_Espec1 *espec = 0;
1030 case Z_RecordComp_simple:
1031 if (c->u.simple->which != Z_ElementSetNames_generic)
1032 return 26; /* only generic form supported. Fix this later */
1033 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1034 c->u.simple->u.generic)))
1036 yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1037 *addinfo = odr_strdup(o, c->u.simple->u.generic);
1038 return 25; /* invalid esetname */
1040 yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec",
1041 c->u.simple->u.generic);
1044 case Z_RecordComp_complex:
1045 if (c->u.complex->generic)
1047 /* insert check for schema */
1048 if ((p = c->u.complex->generic->elementSpec))
1052 case Z_ElementSpec_elementSetName:
1054 data1_getesetbyname(dh, n->u.root.absyn,
1055 p->u.elementSetName)))
1057 yaz_log(YLOG_DEBUG, "Unknown esetname '%s'",
1058 p->u.elementSetName);
1059 *addinfo = odr_strdup(o, p->u.elementSetName);
1060 return 25; /* invalid esetname */
1062 yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec",
1063 p->u.elementSetName);
1066 case Z_ElementSpec_externalSpec:
1067 if (p->u.externalSpec->which == Z_External_espec1)
1069 yaz_log(YLOG_DEBUG, "Got Espec-1");
1070 espec = p->u.externalSpec-> u.espec1;
1074 yaz_log(YLOG_LOG, "Unknown external espec.");
1075 return 25; /* bad. what is proper diagnostic? */
1082 return 26; /* fix */
1086 yaz_log(YLOG_DEBUG, "Element: Espec-1 match");
1087 return data1_doespec1(dh, n, espec);
1091 yaz_log(YLOG_DEBUG, "Element: all match");
1096 /* Add Zebra info in separate namespace ...
1099 <metadata xmlns="http://www.indexdata.dk/zebra/">
1101 <localnumber>447</localnumber>
1102 <filename>records/genera.xml</filename>
1107 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1110 const char *idzebra_ns[3];
1111 const char *i2 = "\n ";
1112 const char *i4 = "\n ";
1115 idzebra_ns[0] = "xmlns";
1116 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1119 data1_mk_text (p->dh, mem, i2, top);
1121 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1123 data1_mk_text (p->dh, mem, "\n", top);
1125 data1_mk_text (p->dh, mem, i4, n);
1127 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1131 data1_mk_text (p->dh, mem, i4, n);
1132 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1134 data1_mk_text (p->dh, mem, i4, n);
1135 data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem);
1138 data1_mk_text (p->dh, mem, i4, n);
1139 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1141 data1_mk_text (p->dh, mem, i2, n);
1144 int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
1145 data1_node *(*grs_read)(struct grs_read_info *))
1147 data1_node *node = 0, *onode = 0, *top;
1150 int res, selected = 0;
1152 struct grs_read_info gri;
1153 const char *tagname;
1155 int requested_schema = VAL_NONE;
1156 data1_marctab *marctab;
1159 mem = nmem_create();
1160 gri.readf = p->readf;
1161 gri.seekf = p->seekf;
1162 gri.tellf = p->tellf;
1168 gri.clientData = clientData;
1170 yaz_log(YLOG_DEBUG, "grs_retrieve");
1171 node = (*grs_read)(&gri);
1178 data1_concat_text(p->dh, mem, node);
1181 data1_pr_tree (p->dh, node, stdout);
1183 top = data1_get_root_tag (p->dh, node);
1185 yaz_log(YLOG_DEBUG, "grs_retrieve: size");
1186 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1188 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1190 dnew->u.data.what = DATA1I_text;
1191 dnew->u.data.data = dnew->lbuf;
1192 sprintf(dnew->u.data.data, "%d", p->recordSize);
1193 dnew->u.data.len = strlen(dnew->u.data.data);
1196 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1197 if (tagname && p->score >= 0 &&
1198 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1200 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1201 dnew->u.data.what = DATA1I_num;
1202 dnew->u.data.data = dnew->lbuf;
1203 sprintf(dnew->u.data.data, "%d", p->score);
1204 dnew->u.data.len = strlen(dnew->u.data.data);
1207 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1208 "localControlNumber");
1209 if (tagname && p->localno > 0 &&
1210 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1212 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1213 dnew->u.data.what = DATA1I_text;
1214 dnew->u.data.data = dnew->lbuf;
1216 sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
1217 dnew->u.data.len = strlen(dnew->u.data.data);
1220 if (p->input_format == VAL_TEXT_XML)
1221 zebra_xml_metadata (p, top, mem);
1224 data1_pr_tree (p->dh, node, stdout);
1226 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1227 p->comp->u.complex->generic &&
1228 p->comp->u.complex->generic->which == Z_Schema_oid &&
1229 p->comp->u.complex->generic->schema.oid)
1231 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1233 requested_schema = oe->value;
1235 /* If schema has been specified, map if possible, then check that
1236 * we got the right one
1238 if (requested_schema != VAL_NONE)
1240 yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping");
1241 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1243 if (map->target_absyn_ref == requested_schema)
1246 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1255 if (node->u.root.absyn &&
1256 requested_schema != node->u.root.absyn->reference)
1258 p->diagnostic = 238;
1264 * Does the requested format match a known syntax-mapping? (this reflects
1265 * the overlap of schema and formatting which is inherent in the MARC
1268 yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping");
1269 if (node->u.root.absyn)
1270 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1272 if (map->target_absyn_ref == p->input_format)
1275 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1284 yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier");
1285 if (node->u.root.absyn &&
1286 node->u.root.absyn->reference != VAL_NONE &&
1287 p->input_format == VAL_GRS1)
1291 int oidtmp[OID_SIZE];
1293 oe.proto = PROTO_Z3950;
1294 oe.oclass = CLASS_SCHEMA;
1295 oe.value = node->u.root.absyn->reference;
1297 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1300 data1_handle dh = p->dh;
1304 for (ii = oid; *ii >= 0; ii++)
1308 sprintf(p, "%d", *ii);
1311 if ((dnew = data1_mk_tag_data_wd(dh, top,
1312 "schemaIdentifier", mem)))
1314 dnew->u.data.what = DATA1I_oid;
1315 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1316 memcpy(dnew->u.data.data, tmp, p - tmp);
1317 dnew->u.data.len = p - tmp;
1322 yaz_log(YLOG_DEBUG, "grs_retrieve: element spec");
1323 if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo,
1326 p->diagnostic = res;
1328 data1_free_tree(p->dh, onode);
1329 data1_free_tree(p->dh, node);
1333 else if (p->comp && !res)
1337 data1_pr_tree (p->dh, node, stdout);
1339 yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1340 switch (p->output_format = (p->input_format != VAL_NONE ?
1341 p->input_format : VAL_SUTRS))
1345 data1_pr_tree (p->dh, node, stdout);
1347 /* default output encoding for XML is UTF-8 */
1348 data1_iconv (p->dh, mem, node,
1349 p->encoding ? p->encoding : "UTF-8",
1350 data1_get_encoding(p->dh, node));
1352 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1354 p->diagnostic = 238;
1357 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1358 memcpy (new_buf, p->rec_buf, p->rec_len);
1359 p->rec_buf = new_buf;
1363 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1365 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1367 p->diagnostic = 238; /* not available in requested syntax */
1369 p->rec_len = (size_t) (-1);
1372 /* ensure our data1 tree is UTF-8 */
1373 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1375 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1377 p->diagnostic = 238;
1379 p->rec_len = (size_t) (-1);
1382 /* ensure our data1 tree is UTF-8 */
1383 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1384 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1386 p->diagnostic = 238;
1388 p->rec_len = (size_t) (-1);
1392 data1_iconv (p->dh, mem, node, p->encoding,
1393 data1_get_encoding(p->dh, node));
1394 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1396 p->diagnostic = 238;
1399 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1400 memcpy (new_buf, p->rec_buf, p->rec_len);
1401 p->rec_buf = new_buf;
1406 data1_iconv (p->dh, mem, node, p->encoding,
1407 data1_get_encoding(p->dh, node));
1408 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1410 p->diagnostic = 238;
1413 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1414 memcpy (new_buf, p->rec_buf, p->rec_len);
1415 p->rec_buf = new_buf;
1419 if (!node->u.root.absyn)
1421 p->diagnostic = 238;
1424 for (marctab = node->u.root.absyn->marc; marctab;
1425 marctab = marctab->next)
1426 if (marctab->reference == p->input_format)
1430 p->diagnostic = 238;
1434 data1_iconv (p->dh, mem, node, p->encoding,
1435 data1_get_encoding(p->dh, node));
1436 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1437 selected, &p->rec_len)))
1438 p->diagnostic = 238;
1441 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1442 memcpy (new_buf, p->rec_buf, p->rec_len);
1443 p->rec_buf = new_buf;
1447 data1_free_tree(p->dh, node);
1449 data1_free_tree(p->dh, onode);