1 /* $Id: recgrs.c,v 1.104 2005-08-03 07:44:27 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 #include <sys/types.h>
32 #include <idzebra/recgrs.h>
34 #define GRS_MAX_WORD 512
36 struct source_parser {
44 static int sp_lex(struct source_parser *sp)
46 while (*sp->src == ' ')
50 while (*sp->src && !strchr("<>();,-: ", *sp->src))
59 sp->lookahead = *sp->src;
66 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd);
68 static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
75 if (sp->lookahead != '(')
77 sp_lex(sp); /* skip ( */
80 if (!sp_expr(sp, n, wrd))
83 if (sp->lookahead != ',')
85 sp_lex(sp); /* skip , */
88 if (!sp_expr(sp, n, &tmp_w))
90 start = atoi_n(tmp_w.term_buf, tmp_w.term_len);
92 if (sp->lookahead == ',')
94 sp_lex(sp); /* skip , */
97 if (!sp_expr(sp, n, &tmp_w))
99 len = atoi_n(tmp_w.term_buf, tmp_w.term_len);
105 if (sp->lookahead != ')')
109 if (wrd->term_buf && wrd->term_len)
111 wrd->term_buf += start;
112 wrd->term_len -= start;
113 if (wrd->term_len > len)
119 static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
124 if (sp->lookahead != '(')
126 sp_lex(sp); /* skip ( */
127 if (!sp_expr(sp, n, wrd))
129 while (sp->lookahead == ',')
133 sp_lex(sp); /* skip , */
135 if (!sp_expr(sp, n, &search_w))
137 for (i = 0; i<wrd->term_len; i++)
140 for (j = 0; j<search_w.term_len && i+j < wrd->term_len; j++)
141 if (wrd->term_buf[i+j] != search_w.term_buf[j])
143 if (j == search_w.term_len) /* match ? */
145 if (min_pos == -1 || i < min_pos)
151 if (sp->lookahead != ')')
155 min_pos = 0; /* the default if not found */
156 sprintf(num_str, "%d", min_pos);
157 wrd->term_buf = nmem_strdup(sp->nmem, num_str);
158 wrd->term_len = strlen(wrd->term_buf);
162 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
164 if (sp->lookahead != 't')
166 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
168 if (n->which == DATA1N_data)
170 wrd->term_buf = n->u.data.data;
171 wrd->term_len = n->u.data.len;
175 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
177 if (n->which == DATA1N_tag)
179 wrd->term_buf = n->u.tag.tag;
180 wrd->term_len = strlen(n->u.tag.tag);
184 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
188 if (sp->lookahead != '(')
192 if (!sp_expr(sp, n, &tmp_w))
197 if (n->which == DATA1N_tag)
199 data1_xattr *p = n->u.tag.attributes;
200 while (p && strlen(p->name) != tmp_w.term_len &&
201 memcmp (p->name, tmp_w.term_buf, tmp_w.term_len))
205 wrd->term_buf = p->value;
206 wrd->term_len = strlen(p->value);
209 if (sp->lookahead != ')')
213 else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len))
215 return sp_first(sp, n, wrd);
217 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
219 return sp_range(sp, n, wrd);
221 else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok))
223 wrd->term_buf = nmem_malloc(sp->nmem, sp->len);
224 memcpy(wrd->term_buf, sp->tok, sp->len);
225 wrd->term_len = sp->len;
228 else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'')
230 wrd->term_len = sp->len - 2;
231 wrd->term_buf = nmem_malloc(sp->nmem, wrd->term_len);
232 memcpy(wrd->term_buf, sp->tok+1, wrd->term_len);
244 static struct source_parser *source_parser_create()
246 struct source_parser *sp = xmalloc(sizeof(*sp));
248 sp->nmem = nmem_create();
252 static void source_parser_destroy(struct source_parser *sp)
256 nmem_destroy(sp->nmem);
260 static int sp_parse(struct source_parser *sp,
261 data1_node *n, RecWord *wrd, const char *src)
267 nmem_reset(sp->nmem);
270 return sp_expr(sp, n, wrd);
273 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
282 if (p->which == XPATH_PREDICATE_RELATION) {
283 if (p->u.relation.name[0]) {
284 if (*p->u.relation.name != '@') {
286 " Only attributes (@) are supported in xelm xpath predicates");
287 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
290 attname = p->u.relation.name + 1;
292 /* looking for the attribute with a specified name */
293 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
294 yaz_log(YLOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
296 if (!strcmp(attr->name, attname)) {
297 if (p->u.relation.op[0]) {
298 if (*p->u.relation.op != '=') {
300 "Only '=' relation is supported (%s)",p->u.relation.op);
301 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
304 yaz_log(YLOG_DEBUG," - value %s <-> %s",
305 p->u.relation.value, attr->value );
306 if (!strcmp(attr->value, p->u.relation.value)) {
311 /* attribute exists, no value specified */
316 yaz_log(YLOG_DEBUG, "return %d", res);
322 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
323 if (!strcmp(p->u.boolean.op,"and")) {
324 return d1_check_xpath_predicate(n, p->u.boolean.left)
325 && d1_check_xpath_predicate(n, p->u.boolean.right);
327 else if (!strcmp(p->u.boolean.op,"or")) {
328 return (d1_check_xpath_predicate(n, p->u.boolean.left)
329 || d1_check_xpath_predicate(n, p->u.boolean.right));
331 yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
340 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
342 struct DFA_state *s = dfaar[0]; /* start state */
345 const char *p = text;
348 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
349 if (c >= t->ch[0] && c <= t->ch[1])
353 /* move to next state and return if we get a match */
359 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
360 if (c >= t->ch[0] && c <= t->ch[1])
369 New function, looking for xpath "element" definitions in abs, by
370 tagpath, using a kind of ugly regxp search.The DFA was built while
371 parsing abs, so here we just go trough them and try to match
372 against the given tagpath. The first matching entry is returned.
376 Added support for enhanced xelm. Now [] predicates are considered
377 as well, when selecting indexing rules... (why the hell it's called
384 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
386 data1_absyn *abs = n->root->u.root.absyn;
387 data1_xpelement *xpe = abs->xp_elements;
390 struct xpath_location_step *xp;
392 char *pexpr = xmalloc(strlen(tagpath)+5);
395 sprintf (pexpr, "/%s\n", tagpath);
396 for (; xpe; xpe = xpe->next)
399 ok = dfa_match_first(xpe->dfa->states, pexpr);
403 /* we have to check the perdicates up to the root node */
406 /* find the first tag up in the node structure */
407 for (nn = n; nn && nn->which != DATA1N_tag; nn = nn->parent)
410 /* go from inside out in the node structure, while going
411 backwards trough xpath location steps ... */
412 for (i = xpe->xpath_len - 1; i>0; i--)
414 yaz_log(YLOG_DEBUG, "Checking step %d: %s on tag %s",
415 i, xp[i].part, nn->u.tag.tag);
417 if (!d1_check_xpath_predicate(nn, xp[i].predicate))
419 yaz_log(YLOG_DEBUG, " Predicates didn't match");
424 if (nn->which == DATA1N_tag)
436 yaz_log(YLOG_DEBUG, "Got it");
437 return xpe->termlists;
444 1 start element (tag)
446 3 start attr (and attr-exact)
454 Now, if there is a matching xelm described in abs, for the
455 indexed element or the attribute, then the data is handled according
456 to those definitions...
458 modified by pop, 2002-12-13
461 /* add xpath index for an attribute */
462 static void index_xpath_attr (char *tag_path, char *name, char *value,
463 char *structure, struct recExtractCtrl *p,
467 wrd->index_name = ZEBRA_XPATH_ELM_BEGIN;
469 wrd->attrSet = VAL_IDXPATH;
472 wrd->index_type = '0';
473 wrd->term_buf = tag_path;
474 wrd->term_len = strlen(tag_path);
479 wrd->index_name = ZEBRA_XPATH_ATTR;
483 wrd->index_type = 'w';
484 wrd->term_buf = value;
485 wrd->term_len = strlen(value);
489 wrd->index_name = ZEBRA_XPATH_ELM_END;
493 wrd->index_type = '0';
494 wrd->term_buf = tag_path;
495 wrd->term_len = strlen(tag_path);
500 static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
505 /* we have to fetch the whole path to the data tag */
506 for (nn = n; nn; nn = nn->parent)
508 if (nn->which == DATA1N_tag)
510 size_t tlen = strlen(nn->u.tag.tag);
511 if (tlen + flen > (max - 2))
513 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
515 tag_path_full[flen++] = '/';
518 if (nn->which == DATA1N_root)
521 tag_path_full[flen] = 0;
525 static void index_xpath(struct source_parser *sp, data1_node *n,
526 struct recExtractCtrl *p,
527 int level, RecWord *wrd,
537 char tag_path_full[1024];
538 int termlist_only = 1;
543 int xpath_is_start = 0;
549 yaz_log(YLOG_DEBUG, "index_xpath level=%d xpath_index=%s",
552 yaz_log(YLOG_DEBUG, "index_xpath level=%d use=%d", level, use);
554 if ((!n->root->u.root.absyn) ||
555 (n->root->u.root.absyn->enable_xpath_indexing)) {
562 wrd->term_buf = n->u.data.data;
563 wrd->term_len = n->u.data.len;
566 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
568 /* If we have a matching termlist... */
569 if (n->root->u.root.absyn &&
570 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
572 for (; tl; tl = tl->next)
574 /* need to copy recword because it may be changed */
576 wrd->index_type = *tl->structure;
577 memcpy (&wrd_tl, wrd, sizeof(*wrd));
579 sp_parse(sp, n, &wrd_tl, tl->source);
586 /* this is the ! case, so structure is for the xpath index */
588 wrd_tl.index_name = xpath_index;
590 wrd_tl.attrSet = VAL_IDXPATH;
591 wrd_tl.attrUse = use;
593 if (p->flagShowRecords)
596 printf("%*sXPath index", (level + 1) * 4, "");
597 printf (" XData:\"");
598 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
599 fputc (wrd_tl.term_buf[i], stdout);
601 if (wrd_tl.term_len > 40)
603 fputc ('\n', stdout);
606 (*p->tokenAdd)(&wrd_tl);
609 /* this is just the old fashioned attribute based index */
611 wrd_tl.index_name = tl->index_name;
613 wrd_tl.attrSet = (int) (tl->att->parent->reference);
614 wrd_tl.attrUse = tl->att->locals->local;
616 if (p->flagShowRecords)
619 printf("%*sIdx: [%s]", (level + 1) * 4, "",
622 printf("%s %s", tl->index_name, tl->source);
624 printf("%s:%s [%d] %s",
625 tl->att->parent->name,
626 tl->att->name, tl->att->value,
629 printf (" XData:\"");
630 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
631 fputc (wrd_tl.term_buf[i], stdout);
633 if (wrd_tl.term_len > 40)
635 fputc ('\n', stdout);
638 (*p->tokenAdd)(&wrd_tl);
642 /* xpath indexing is done, if there was no termlist given,
643 or no ! in the termlist, and default indexing is enabled... */
644 if (!p->flagShowRecords && !xpdone && !termlist_only)
647 wrd->index_name = xpath_index;
649 wrd->attrSet = VAL_IDXPATH;
652 wrd->index_type = 'w';
657 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
659 wrd->index_type = '0';
660 wrd->term_buf = tag_path_full;
661 wrd->term_len = strlen(tag_path_full);
663 wrd->index_name = xpath_index;
665 wrd->attrSet = VAL_IDXPATH;
668 if (p->flagShowRecords)
670 printf("%*s tag=", (level + 1) * 4, "");
671 for (i = 0; i<wrd->term_len && i < 40; i++)
672 fputc (wrd->term_buf[i], stdout);
683 /* Add tag start/end xpath index, only when there is a ! in
684 the apropriate xelm directive, or default xpath indexing
687 if (!(do_xpindex = 1 - termlist_only))
689 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n)))
691 for (; tl; tl = tl->next)
703 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
706 if (xpath_is_start == 1) /* only for the starting tag... */
708 #define MAX_ATTR_COUNT 50
709 data1_termlist *tll[MAX_ATTR_COUNT];
713 /* get termlists for attributes, and find out, if we have to do xpath indexing */
714 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
719 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
721 int do_xpindex = 1 - termlist_only;
723 char attr_tag_path_full[1024];
725 /* this could be cached as well */
726 sprintf (attr_tag_path_full, "@%s/%s",
727 xp->name, tag_path_full);
729 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
731 /* if there is a ! in the xelm termlist, or default indexing is on,
732 proceed with xpath idx */
735 for (; tl; tl = tl->next)
749 /* attribute (no value) */
750 wrd->index_type = '0';
752 wrd->index_name = ZEBRA_XPATH_ATTR;
756 wrd->term_buf = xp->name;
757 wrd->term_len = strlen(xp->name);
763 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
765 /* attribute value exact */
766 strcpy (comb, xp->name);
768 strcat (comb, xp->value);
771 wrd->index_name = ZEBRA_XPATH_ATTR;
775 wrd->index_type = '0';
776 wrd->term_buf = comb;
777 wrd->term_len = strlen(comb);
787 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
789 char attr_tag_path_full[1024];
792 sprintf (attr_tag_path_full, "@%s/%s",
793 xp->name, tag_path_full);
797 /* If there is a termlist given (=xelm directive) */
798 for (; tl; tl = tl->next)
806 /* add xpath index for the attribute */
807 index_xpath_attr (attr_tag_path_full, xp->name,
808 xp->value, tl->structure,
812 /* add attribute based index for the attribute */
816 wrd->index_name = tl->index_name;
819 (tl->att->parent->reference);
820 wrd->attrUse = tl->att->locals->local;
822 wrd->index_type = *tl->structure;
823 wrd->term_buf = xp->value;
824 wrd->term_len = strlen(xp->value);
830 /* if there was no termlist for the given path,
831 or the termlist didn't have a ! element, index
832 the attribute as "w" */
833 if ((!xpdone) && (!termlist_only))
835 index_xpath_attr (attr_tag_path_full, xp->name,
836 xp->value, "w", p, wrd);
845 static void index_termlist (struct source_parser *sp, data1_node *par,
847 struct recExtractCtrl *p, int level, RecWord *wrd)
849 data1_termlist *tlist = 0;
850 data1_datatype dtype = DATA1K_string;
853 * cycle up towards the root until we find a tag with an att..
854 * this has the effect of indexing locally defined tags with
855 * the attribute of their ancestor in the record.
858 while (!par->u.tag.element)
859 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
861 if (!par || !(tlist = par->u.tag.element->termlists))
863 if (par->u.tag.element->tag)
864 dtype = par->u.tag.element->tag->kind;
866 for (; tlist; tlist = tlist->next)
868 /* consider source */
870 assert(tlist->source);
871 sp_parse(sp, n, wrd, tlist->source);
873 if (wrd->term_buf && wrd->term_len)
875 if (p->flagShowRecords)
878 printf("%*sIdx: [%s]", (level + 1) * 4, "",
881 printf("%s %s", tlist->index_name, tlist->source);
883 printf("%s:%s [%d] %s",
884 tlist->att->parent->name,
885 tlist->att->name, tlist->att->value,
888 printf (" XData:\"");
889 for (i = 0; i<wrd->term_len && i < 40; i++)
890 fputc (wrd->term_buf[i], stdout);
892 if (wrd->term_len > 40)
894 fputc ('\n', stdout);
898 wrd->index_type = *tlist->structure;
900 wrd->index_name = tlist->index_name;
902 wrd->attrSet = (int) (tlist->att->parent->reference);
903 wrd->attrUse = tlist->att->locals->local;
911 static int dumpkeys_r(struct source_parser *sp,
912 data1_node *n, struct recExtractCtrl *p, int level,
915 for (; n; n = n->next)
917 if (p->flagShowRecords) /* display element description to user */
919 if (n->which == DATA1N_root)
921 printf("%*s", level * 4, "");
922 printf("Record type: '%s'\n", n->u.root.type);
924 else if (n->which == DATA1N_tag)
928 printf("%*s", level * 4, "");
929 if (!(e = n->u.tag.element))
930 printf("Local tag: '%s'\n", n->u.tag.tag);
933 printf("Elm: '%s' ", e->name);
936 data1_tag *t = e->tag;
938 printf("TagNam: '%s' ", t->names->name);
941 printf("%s[%d],", t->tagset->name, t->tagset->type);
944 if (t->which == DATA1T_numeric)
945 printf("%d)", t->value.numeric);
947 printf("'%s')", t->value.string);
954 if (n->which == DATA1N_tag)
956 index_termlist(sp, n, n, p, level, wrd);
957 /* index start tag */
959 if (n->root->u.root.absyn)
960 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_BEGIN,
963 if (n->root->u.root.absyn)
964 index_xpath(sp, n, p, level, wrd, 1);
969 if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0)
973 if (n->which == DATA1N_data)
975 data1_node *par = get_parent_tag(p->dh, n);
977 if (p->flagShowRecords)
979 printf("%*s", level * 4, "");
981 if (n->u.data.len > 256)
982 printf("'%.170s ... %.70s'\n", n->u.data.data,
983 n->u.data.data + n->u.data.len-70);
984 else if (n->u.data.len > 0)
985 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
991 index_termlist(sp, par, n, p, level, wrd);
994 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_CDATA,
997 index_xpath(sp, n, p, level, wrd, 1016);
1001 if (n->which == DATA1N_tag)
1005 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_END,
1008 index_xpath(sp, n, p, level, wrd, 2);
1012 if (p->flagShowRecords && n->which == DATA1N_root)
1014 printf("%*s-------------\n\n", level * 4, "");
1020 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
1022 struct source_parser *sp = source_parser_create();
1023 int r = dumpkeys_r(sp, n, p, 0, wrd);
1024 source_parser_destroy(sp);
1028 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
1031 int oidtmp[OID_SIZE];
1034 oe.proto = PROTO_Z3950;
1035 oe.oclass = CLASS_SCHEMA;
1036 if (n->u.root.absyn)
1038 oe.value = n->u.root.absyn->reference;
1040 if ((oid_ent_to_oid (&oe, oidtmp)))
1041 (*p->schemaAdd)(p, oidtmp);
1043 (*p->init)(p, &wrd);
1045 return dumpkeys(n, p, &wrd);
1048 static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
1050 data1_node *(*grs_read)(struct grs_read_info *))
1053 struct grs_read_info gri;
1055 int oidtmp[OID_SIZE];
1058 gri.readf = p->readf;
1059 gri.seekf = p->seekf;
1060 gri.tellf = p->tellf;
1063 gri.offset = p->offset;
1066 gri.clientData = clientData;
1068 n = (*grs_read)(&gri);
1070 return RECCTRL_EXTRACT_EOF;
1071 oe.proto = PROTO_Z3950;
1072 oe.oclass = CLASS_SCHEMA;
1074 if (!n->u.root.absyn)
1075 return RECCTRL_EXTRACT_ERROR;
1077 if (n->u.root.absyn)
1079 oe.value = n->u.root.absyn->reference;
1080 if ((oid_ent_to_oid (&oe, oidtmp)))
1081 (*p->schemaAdd)(p, oidtmp);
1083 data1_concat_text(p->dh, mem, n);
1085 /* ensure our data1 tree is UTF-8 */
1086 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
1089 data1_pr_tree (p->dh, n, stdout);
1092 (*p->init)(p, &wrd);
1093 if (dumpkeys(n, p, &wrd) < 0)
1095 data1_free_tree(p->dh, n);
1096 return RECCTRL_EXTRACT_ERROR_GENERIC;
1098 data1_free_tree(p->dh, n);
1099 return RECCTRL_EXTRACT_OK;
1102 int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
1103 data1_node *(*grs_read)(struct grs_read_info *))
1106 NMEM mem = nmem_create ();
1107 ret = grs_extract_sub(clientData, p, mem, grs_read);
1113 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1115 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c,
1116 char **addinfo, ODR o)
1118 data1_esetname *eset;
1119 Z_Espec1 *espec = 0;
1124 case Z_RecordComp_simple:
1125 if (c->u.simple->which != Z_ElementSetNames_generic)
1126 return 26; /* only generic form supported. Fix this later */
1127 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1128 c->u.simple->u.generic)))
1130 yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1131 *addinfo = odr_strdup(o, c->u.simple->u.generic);
1132 return 25; /* invalid esetname */
1134 yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec",
1135 c->u.simple->u.generic);
1138 case Z_RecordComp_complex:
1139 if (c->u.complex->generic)
1141 /* insert check for schema */
1142 if ((p = c->u.complex->generic->elementSpec))
1146 case Z_ElementSpec_elementSetName:
1148 data1_getesetbyname(dh, n->u.root.absyn,
1149 p->u.elementSetName)))
1151 yaz_log(YLOG_DEBUG, "Unknown esetname '%s'",
1152 p->u.elementSetName);
1153 *addinfo = odr_strdup(o, p->u.elementSetName);
1154 return 25; /* invalid esetname */
1156 yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec",
1157 p->u.elementSetName);
1160 case Z_ElementSpec_externalSpec:
1161 if (p->u.externalSpec->which == Z_External_espec1)
1163 yaz_log(YLOG_DEBUG, "Got Espec-1");
1164 espec = p->u.externalSpec-> u.espec1;
1168 yaz_log(YLOG_LOG, "Unknown external espec.");
1169 return 25; /* bad. what is proper diagnostic? */
1176 return 26; /* fix */
1180 yaz_log(YLOG_DEBUG, "Element: Espec-1 match");
1181 return data1_doespec1(dh, n, espec);
1185 yaz_log(YLOG_DEBUG, "Element: all match");
1190 /* Add Zebra info in separate namespace ...
1193 <metadata xmlns="http://www.indexdata.dk/zebra/">
1195 <localnumber>447</localnumber>
1196 <filename>records/genera.xml</filename>
1201 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1204 const char *idzebra_ns[3];
1205 const char *i2 = "\n ";
1206 const char *i4 = "\n ";
1209 idzebra_ns[0] = "xmlns";
1210 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1213 data1_mk_text (p->dh, mem, i2, top);
1215 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1217 data1_mk_text (p->dh, mem, "\n", top);
1219 data1_mk_text (p->dh, mem, i4, n);
1221 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1225 data1_mk_text (p->dh, mem, i4, n);
1226 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1228 data1_mk_text (p->dh, mem, i4, n);
1229 data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem);
1232 data1_mk_text (p->dh, mem, i4, n);
1233 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1235 data1_mk_text (p->dh, mem, i2, n);
1238 int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
1239 data1_node *(*grs_read)(struct grs_read_info *))
1241 data1_node *node = 0, *onode = 0, *top;
1244 int res, selected = 0;
1246 struct grs_read_info gri;
1247 const char *tagname;
1249 int requested_schema = VAL_NONE;
1250 data1_marctab *marctab;
1253 mem = nmem_create();
1254 gri.readf = p->readf;
1255 gri.seekf = p->seekf;
1256 gri.tellf = p->tellf;
1262 gri.clientData = clientData;
1264 yaz_log(YLOG_DEBUG, "grs_retrieve");
1265 node = (*grs_read)(&gri);
1272 data1_concat_text(p->dh, mem, node);
1275 data1_pr_tree (p->dh, node, stdout);
1277 top = data1_get_root_tag (p->dh, node);
1279 yaz_log(YLOG_DEBUG, "grs_retrieve: size");
1280 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1282 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1284 dnew->u.data.what = DATA1I_text;
1285 dnew->u.data.data = dnew->lbuf;
1286 sprintf(dnew->u.data.data, "%d", p->recordSize);
1287 dnew->u.data.len = strlen(dnew->u.data.data);
1290 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1291 if (tagname && p->score >= 0 &&
1292 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1294 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1295 dnew->u.data.what = DATA1I_num;
1296 dnew->u.data.data = dnew->lbuf;
1297 sprintf(dnew->u.data.data, "%d", p->score);
1298 dnew->u.data.len = strlen(dnew->u.data.data);
1301 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1302 "localControlNumber");
1303 if (tagname && p->localno > 0 &&
1304 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1306 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1307 dnew->u.data.what = DATA1I_text;
1308 dnew->u.data.data = dnew->lbuf;
1310 sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
1311 dnew->u.data.len = strlen(dnew->u.data.data);
1314 if (p->input_format == VAL_TEXT_XML)
1315 zebra_xml_metadata (p, top, mem);
1318 data1_pr_tree (p->dh, node, stdout);
1320 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1321 p->comp->u.complex->generic &&
1322 p->comp->u.complex->generic->which == Z_Schema_oid &&
1323 p->comp->u.complex->generic->schema.oid)
1325 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1327 requested_schema = oe->value;
1329 /* If schema has been specified, map if possible, then check that
1330 * we got the right one
1332 if (requested_schema != VAL_NONE)
1334 yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping");
1335 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1337 if (map->target_absyn_ref == requested_schema)
1340 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1349 if (node->u.root.absyn &&
1350 requested_schema != node->u.root.absyn->reference)
1352 p->diagnostic = 238;
1358 * Does the requested format match a known syntax-mapping? (this reflects
1359 * the overlap of schema and formatting which is inherent in the MARC
1362 yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping");
1363 if (node->u.root.absyn)
1364 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1366 if (map->target_absyn_ref == p->input_format)
1369 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1378 yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier");
1379 if (node->u.root.absyn &&
1380 node->u.root.absyn->reference != VAL_NONE &&
1381 p->input_format == VAL_GRS1)
1385 int oidtmp[OID_SIZE];
1387 oe.proto = PROTO_Z3950;
1388 oe.oclass = CLASS_SCHEMA;
1389 oe.value = node->u.root.absyn->reference;
1391 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1394 data1_handle dh = p->dh;
1398 for (ii = oid; *ii >= 0; ii++)
1402 sprintf(p, "%d", *ii);
1405 if ((dnew = data1_mk_tag_data_wd(dh, top,
1406 "schemaIdentifier", mem)))
1408 dnew->u.data.what = DATA1I_oid;
1409 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1410 memcpy(dnew->u.data.data, tmp, p - tmp);
1411 dnew->u.data.len = p - tmp;
1416 yaz_log(YLOG_DEBUG, "grs_retrieve: element spec");
1417 if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo,
1420 p->diagnostic = res;
1422 data1_free_tree(p->dh, onode);
1423 data1_free_tree(p->dh, node);
1427 else if (p->comp && !res)
1431 data1_pr_tree (p->dh, node, stdout);
1433 yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1434 switch (p->output_format = (p->input_format != VAL_NONE ?
1435 p->input_format : VAL_SUTRS))
1439 data1_pr_tree (p->dh, node, stdout);
1441 /* default output encoding for XML is UTF-8 */
1442 data1_iconv (p->dh, mem, node,
1443 p->encoding ? p->encoding : "UTF-8",
1444 data1_get_encoding(p->dh, node));
1446 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1448 p->diagnostic = 238;
1451 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1452 memcpy (new_buf, p->rec_buf, p->rec_len);
1453 p->rec_buf = new_buf;
1457 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1459 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1461 p->diagnostic = 238; /* not available in requested syntax */
1463 p->rec_len = (size_t) (-1);
1466 /* ensure our data1 tree is UTF-8 */
1467 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1469 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1471 p->diagnostic = 238;
1473 p->rec_len = (size_t) (-1);
1476 /* ensure our data1 tree is UTF-8 */
1477 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1478 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1480 p->diagnostic = 238;
1482 p->rec_len = (size_t) (-1);
1486 data1_iconv (p->dh, mem, node, p->encoding,
1487 data1_get_encoding(p->dh, node));
1488 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1490 p->diagnostic = 238;
1493 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1494 memcpy (new_buf, p->rec_buf, p->rec_len);
1495 p->rec_buf = new_buf;
1500 data1_iconv (p->dh, mem, node, p->encoding,
1501 data1_get_encoding(p->dh, node));
1502 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1504 p->diagnostic = 238;
1507 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1508 memcpy (new_buf, p->rec_buf, p->rec_len);
1509 p->rec_buf = new_buf;
1513 if (!node->u.root.absyn)
1515 p->diagnostic = 238;
1518 for (marctab = node->u.root.absyn->marc; marctab;
1519 marctab = marctab->next)
1520 if (marctab->reference == p->input_format)
1524 p->diagnostic = 238;
1528 data1_iconv (p->dh, mem, node, p->encoding,
1529 data1_get_encoding(p->dh, node));
1530 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1531 selected, &p->rec_len)))
1532 p->diagnostic = 238;
1535 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1536 memcpy (new_buf, p->rec_buf, p->rec_len);
1537 p->rec_buf = new_buf;
1541 data1_free_tree(p->dh, node);
1543 data1_free_tree(p->dh, onode);