1 /* $Id: recgrs.c,v 1.106 2006-02-06 13:34:13 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 #include <sys/types.h>
32 #include <idzebra/recgrs.h>
34 #define GRS_MAX_WORD 512
36 struct source_parser {
44 static int sp_lex(struct source_parser *sp)
46 while (*sp->src == ' ')
50 while (*sp->src && !strchr("<>();,-: ", *sp->src))
59 sp->lookahead = *sp->src;
66 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd);
68 static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
75 if (sp->lookahead != '(')
77 sp_lex(sp); /* skip ( */
80 if (!sp_expr(sp, n, wrd))
83 if (sp->lookahead != ',')
85 sp_lex(sp); /* skip , */
88 if (!sp_expr(sp, n, &tmp_w))
90 start = atoi_n(tmp_w.term_buf, tmp_w.term_len);
92 if (sp->lookahead == ',')
94 sp_lex(sp); /* skip , */
97 if (!sp_expr(sp, n, &tmp_w))
99 len = atoi_n(tmp_w.term_buf, tmp_w.term_len);
105 if (sp->lookahead != ')')
109 if (wrd->term_buf && wrd->term_len)
111 wrd->term_buf += start;
112 wrd->term_len -= start;
113 if (wrd->term_len > len)
119 static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
124 if (sp->lookahead != '(')
126 sp_lex(sp); /* skip ( */
127 if (!sp_expr(sp, n, wrd))
129 while (sp->lookahead == ',')
133 sp_lex(sp); /* skip , */
135 if (!sp_expr(sp, n, &search_w))
137 for (i = 0; i<wrd->term_len; i++)
140 for (j = 0; j<search_w.term_len && i+j < wrd->term_len; j++)
141 if (wrd->term_buf[i+j] != search_w.term_buf[j])
143 if (j == search_w.term_len) /* match ? */
145 if (min_pos == -1 || i < min_pos)
151 if (sp->lookahead != ')')
155 min_pos = 0; /* the default if not found */
156 sprintf(num_str, "%d", min_pos);
157 wrd->term_buf = nmem_strdup(sp->nmem, num_str);
158 wrd->term_len = strlen(wrd->term_buf);
162 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
164 if (sp->lookahead != 't')
166 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
168 if (n->which == DATA1N_data)
170 wrd->term_buf = n->u.data.data;
171 wrd->term_len = n->u.data.len;
175 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
177 if (n->which == DATA1N_tag)
179 wrd->term_buf = n->u.tag.tag;
180 wrd->term_len = strlen(n->u.tag.tag);
184 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
188 if (sp->lookahead != '(')
192 if (!sp_expr(sp, n, &tmp_w))
197 if (n->which == DATA1N_tag)
199 data1_xattr *p = n->u.tag.attributes;
200 while (p && strlen(p->name) != tmp_w.term_len &&
201 memcmp (p->name, tmp_w.term_buf, tmp_w.term_len))
205 wrd->term_buf = p->value;
206 wrd->term_len = strlen(p->value);
209 if (sp->lookahead != ')')
213 else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len))
215 return sp_first(sp, n, wrd);
217 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
219 return sp_range(sp, n, wrd);
221 else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok))
224 wrd->term_len = sp->len;
225 b = nmem_malloc(sp->nmem, sp->len);
226 memcpy(b, sp->tok, sp->len);
230 else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'')
233 wrd->term_len = sp->len - 2;
234 b = nmem_malloc(sp->nmem, wrd->term_len);
235 memcpy(b, sp->tok+1, wrd->term_len);
248 static struct source_parser *source_parser_create()
250 struct source_parser *sp = xmalloc(sizeof(*sp));
252 sp->nmem = nmem_create();
256 static void source_parser_destroy(struct source_parser *sp)
260 nmem_destroy(sp->nmem);
264 static int sp_parse(struct source_parser *sp,
265 data1_node *n, RecWord *wrd, const char *src)
271 nmem_reset(sp->nmem);
274 return sp_expr(sp, n, wrd);
277 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
286 if (p->which == XPATH_PREDICATE_RELATION) {
287 if (p->u.relation.name[0]) {
288 if (*p->u.relation.name != '@') {
290 " Only attributes (@) are supported in xelm xpath predicates");
291 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
294 attname = p->u.relation.name + 1;
296 /* looking for the attribute with a specified name */
297 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
298 yaz_log(YLOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
300 if (!strcmp(attr->name, attname)) {
301 if (p->u.relation.op[0]) {
302 if (*p->u.relation.op != '=') {
304 "Only '=' relation is supported (%s)",p->u.relation.op);
305 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
308 yaz_log(YLOG_DEBUG," - value %s <-> %s",
309 p->u.relation.value, attr->value );
310 if (!strcmp(attr->value, p->u.relation.value)) {
315 /* attribute exists, no value specified */
320 yaz_log(YLOG_DEBUG, "return %d", res);
326 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
327 if (!strcmp(p->u.boolean.op,"and")) {
328 return d1_check_xpath_predicate(n, p->u.boolean.left)
329 && d1_check_xpath_predicate(n, p->u.boolean.right);
331 else if (!strcmp(p->u.boolean.op,"or")) {
332 return (d1_check_xpath_predicate(n, p->u.boolean.left)
333 || d1_check_xpath_predicate(n, p->u.boolean.right));
335 yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
344 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
346 struct DFA_state *s = dfaar[0]; /* start state */
349 const char *p = text;
352 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
353 if (c >= t->ch[0] && c <= t->ch[1])
357 /* move to next state and return if we get a match */
363 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
364 if (c >= t->ch[0] && c <= t->ch[1])
373 New function, looking for xpath "element" definitions in abs, by
374 tagpath, using a kind of ugly regxp search.The DFA was built while
375 parsing abs, so here we just go trough them and try to match
376 against the given tagpath. The first matching entry is returned.
380 Added support for enhanced xelm. Now [] predicates are considered
381 as well, when selecting indexing rules... (why the hell it's called
388 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
390 data1_absyn *abs = n->root->u.root.absyn;
391 data1_xpelement *xpe = abs->xp_elements;
394 struct xpath_location_step *xp;
396 char *pexpr = xmalloc(strlen(tagpath)+5);
399 sprintf (pexpr, "/%s\n", tagpath);
400 for (; xpe; xpe = xpe->next)
403 ok = dfa_match_first(xpe->dfa->states, pexpr);
407 /* we have to check the perdicates up to the root node */
410 /* find the first tag up in the node structure */
411 for (nn = n; nn && nn->which != DATA1N_tag; nn = nn->parent)
414 /* go from inside out in the node structure, while going
415 backwards trough xpath location steps ... */
416 for (i = xpe->xpath_len - 1; i>0; i--)
418 yaz_log(YLOG_DEBUG, "Checking step %d: %s on tag %s",
419 i, xp[i].part, nn->u.tag.tag);
421 if (!d1_check_xpath_predicate(nn, xp[i].predicate))
423 yaz_log(YLOG_DEBUG, " Predicates didn't match");
428 if (nn->which == DATA1N_tag)
440 yaz_log(YLOG_DEBUG, "Got it");
441 return xpe->termlists;
448 1 start element (tag)
450 3 start attr (and attr-exact)
458 Now, if there is a matching xelm described in abs, for the
459 indexed element or the attribute, then the data is handled according
460 to those definitions...
462 modified by pop, 2002-12-13
465 /* add xpath index for an attribute */
466 static void index_xpath_attr (char *tag_path, char *name, char *value,
467 char *structure, struct recExtractCtrl *p,
471 wrd->index_name = ZEBRA_XPATH_ELM_BEGIN;
473 wrd->attrSet = VAL_IDXPATH;
476 wrd->index_type = '0';
477 wrd->term_buf = tag_path;
478 wrd->term_len = strlen(tag_path);
483 wrd->index_name = ZEBRA_XPATH_ATTR;
487 wrd->index_type = 'w';
488 wrd->term_buf = value;
489 wrd->term_len = strlen(value);
493 wrd->index_name = ZEBRA_XPATH_ELM_END;
497 wrd->index_type = '0';
498 wrd->term_buf = tag_path;
499 wrd->term_len = strlen(tag_path);
504 static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
509 /* we have to fetch the whole path to the data tag */
510 for (nn = n; nn; nn = nn->parent)
512 if (nn->which == DATA1N_tag)
514 size_t tlen = strlen(nn->u.tag.tag);
515 if (tlen + flen > (max - 2))
517 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
519 tag_path_full[flen++] = '/';
522 if (nn->which == DATA1N_root)
525 tag_path_full[flen] = 0;
529 static void index_xpath(struct source_parser *sp, data1_node *n,
530 struct recExtractCtrl *p,
531 int level, RecWord *wrd,
541 char tag_path_full[1024];
542 int termlist_only = 1;
547 int xpath_is_start = 0;
553 yaz_log(YLOG_DEBUG, "index_xpath level=%d xpath_index=%s",
556 yaz_log(YLOG_DEBUG, "index_xpath level=%d use=%d", level, use);
558 if ((!n->root->u.root.absyn) ||
559 (n->root->u.root.absyn->enable_xpath_indexing)) {
566 wrd->term_buf = n->u.data.data;
567 wrd->term_len = n->u.data.len;
570 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
572 /* If we have a matching termlist... */
573 if (n->root->u.root.absyn &&
574 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
576 for (; tl; tl = tl->next)
578 /* need to copy recword because it may be changed */
580 wrd->index_type = *tl->structure;
581 memcpy (&wrd_tl, wrd, sizeof(*wrd));
583 sp_parse(sp, n, &wrd_tl, tl->source);
590 /* this is the ! case, so structure is for the xpath index */
592 wrd_tl.index_name = xpath_index;
594 wrd_tl.attrSet = VAL_IDXPATH;
595 wrd_tl.attrUse = use;
597 if (p->flagShowRecords)
600 printf("%*sXPath index", (level + 1) * 4, "");
601 printf (" XData:\"");
602 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
603 fputc (wrd_tl.term_buf[i], stdout);
605 if (wrd_tl.term_len > 40)
607 fputc ('\n', stdout);
610 (*p->tokenAdd)(&wrd_tl);
613 /* this is just the old fashioned attribute based index */
615 wrd_tl.index_name = tl->index_name;
617 wrd_tl.attrSet = (int) (tl->att->parent->reference);
618 wrd_tl.attrUse = tl->att->locals->local;
620 if (p->flagShowRecords)
623 printf("%*sIdx: [%s]", (level + 1) * 4, "",
626 printf("%s %s", tl->index_name, tl->source);
628 printf("%s:%s [%d] %s",
629 tl->att->parent->name,
630 tl->att->name, tl->att->value,
633 printf (" XData:\"");
634 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
635 fputc (wrd_tl.term_buf[i], stdout);
637 if (wrd_tl.term_len > 40)
639 fputc ('\n', stdout);
642 (*p->tokenAdd)(&wrd_tl);
646 /* xpath indexing is done, if there was no termlist given,
647 or no ! in the termlist, and default indexing is enabled... */
648 if (!p->flagShowRecords && !xpdone && !termlist_only)
651 wrd->index_name = xpath_index;
653 wrd->attrSet = VAL_IDXPATH;
656 wrd->index_type = 'w';
661 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
663 wrd->index_type = '0';
664 wrd->term_buf = tag_path_full;
665 wrd->term_len = strlen(tag_path_full);
667 wrd->index_name = xpath_index;
669 wrd->attrSet = VAL_IDXPATH;
672 if (p->flagShowRecords)
674 printf("%*s tag=", (level + 1) * 4, "");
675 for (i = 0; i<wrd->term_len && i < 40; i++)
676 fputc (wrd->term_buf[i], stdout);
687 /* Add tag start/end xpath index, only when there is a ! in
688 the apropriate xelm directive, or default xpath indexing
691 if (!(do_xpindex = 1 - termlist_only))
693 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n)))
695 for (; tl; tl = tl->next)
707 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
710 if (xpath_is_start == 1) /* only for the starting tag... */
712 #define MAX_ATTR_COUNT 50
713 data1_termlist *tll[MAX_ATTR_COUNT];
717 /* get termlists for attributes, and find out, if we have to do xpath indexing */
718 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
723 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
725 int do_xpindex = 1 - termlist_only;
727 char attr_tag_path_full[1024];
729 /* this could be cached as well */
730 sprintf (attr_tag_path_full, "@%s/%s",
731 xp->name, tag_path_full);
733 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
735 /* if there is a ! in the xelm termlist, or default indexing is on,
736 proceed with xpath idx */
739 for (; tl; tl = tl->next)
753 /* attribute (no value) */
754 wrd->index_type = '0';
756 wrd->index_name = ZEBRA_XPATH_ATTR;
760 wrd->term_buf = xp->name;
761 wrd->term_len = strlen(xp->name);
767 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
769 /* attribute value exact */
770 strcpy (comb, xp->name);
772 strcat (comb, xp->value);
775 wrd->index_name = ZEBRA_XPATH_ATTR;
779 wrd->index_type = '0';
780 wrd->term_buf = comb;
781 wrd->term_len = strlen(comb);
791 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
793 char attr_tag_path_full[1024];
796 sprintf (attr_tag_path_full, "@%s/%s",
797 xp->name, tag_path_full);
801 /* If there is a termlist given (=xelm directive) */
802 for (; tl; tl = tl->next)
810 /* add xpath index for the attribute */
811 index_xpath_attr (attr_tag_path_full, xp->name,
812 xp->value, tl->structure,
816 /* if this fragment is enabled, we index
817 attribute values as well. See bug #460 */
821 wrd->index_name = tl->index_name;
824 (tl->att->parent->reference);
825 wrd->attrUse = tl->att->locals->local;
827 wrd->index_type = *tl->structure;
828 wrd->term_buf = xp->value;
829 wrd->term_len = strlen(xp->value);
835 /* if there was no termlist for the given path,
836 or the termlist didn't have a ! element, index
837 the attribute as "w" */
838 if ((!xpdone) && (!termlist_only))
840 index_xpath_attr (attr_tag_path_full, xp->name,
841 xp->value, "w", p, wrd);
850 static void index_termlist (struct source_parser *sp, data1_node *par,
852 struct recExtractCtrl *p, int level, RecWord *wrd)
854 data1_termlist *tlist = 0;
855 data1_datatype dtype = DATA1K_string;
858 * cycle up towards the root until we find a tag with an att..
859 * this has the effect of indexing locally defined tags with
860 * the attribute of their ancestor in the record.
863 while (!par->u.tag.element)
864 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
866 if (!par || !(tlist = par->u.tag.element->termlists))
868 if (par->u.tag.element->tag)
869 dtype = par->u.tag.element->tag->kind;
871 for (; tlist; tlist = tlist->next)
873 /* consider source */
875 assert(tlist->source);
876 sp_parse(sp, n, wrd, tlist->source);
878 if (wrd->term_buf && wrd->term_len)
880 if (p->flagShowRecords)
883 printf("%*sIdx: [%s]", (level + 1) * 4, "",
886 printf("%s %s", tlist->index_name, tlist->source);
888 printf("%s:%s [%d] %s",
889 tlist->att->parent->name,
890 tlist->att->name, tlist->att->value,
893 printf (" XData:\"");
894 for (i = 0; i<wrd->term_len && i < 40; i++)
895 fputc (wrd->term_buf[i], stdout);
897 if (wrd->term_len > 40)
899 fputc ('\n', stdout);
903 wrd->index_type = *tlist->structure;
905 wrd->index_name = tlist->index_name;
907 wrd->attrSet = (int) (tlist->att->parent->reference);
908 wrd->attrUse = tlist->att->locals->local;
916 static int dumpkeys_r(struct source_parser *sp,
917 data1_node *n, struct recExtractCtrl *p, int level,
920 for (; n; n = n->next)
922 if (p->flagShowRecords) /* display element description to user */
924 if (n->which == DATA1N_root)
926 printf("%*s", level * 4, "");
927 printf("Record type: '%s'\n", n->u.root.type);
929 else if (n->which == DATA1N_tag)
933 printf("%*s", level * 4, "");
934 if (!(e = n->u.tag.element))
935 printf("Local tag: '%s'\n", n->u.tag.tag);
938 printf("Elm: '%s' ", e->name);
941 data1_tag *t = e->tag;
943 printf("TagNam: '%s' ", t->names->name);
946 printf("%s[%d],", t->tagset->name, t->tagset->type);
949 if (t->which == DATA1T_numeric)
950 printf("%d)", t->value.numeric);
952 printf("'%s')", t->value.string);
959 if (n->which == DATA1N_tag)
961 index_termlist(sp, n, n, p, level, wrd);
962 /* index start tag */
964 if (n->root->u.root.absyn)
965 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_BEGIN,
968 if (n->root->u.root.absyn)
969 index_xpath(sp, n, p, level, wrd, 1);
974 if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0)
978 if (n->which == DATA1N_data)
980 data1_node *par = get_parent_tag(p->dh, n);
982 if (p->flagShowRecords)
984 printf("%*s", level * 4, "");
986 if (n->u.data.len > 256)
987 printf("'%.170s ... %.70s'\n", n->u.data.data,
988 n->u.data.data + n->u.data.len-70);
989 else if (n->u.data.len > 0)
990 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
996 index_termlist(sp, par, n, p, level, wrd);
999 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_CDATA,
1002 index_xpath(sp, n, p, level, wrd, 1016);
1006 if (n->which == DATA1N_tag)
1010 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_END,
1013 index_xpath(sp, n, p, level, wrd, 2);
1017 if (p->flagShowRecords && n->which == DATA1N_root)
1019 printf("%*s-------------\n\n", level * 4, "");
1025 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
1027 struct source_parser *sp = source_parser_create();
1028 int r = dumpkeys_r(sp, n, p, 0, wrd);
1029 source_parser_destroy(sp);
1033 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
1036 int oidtmp[OID_SIZE];
1039 oe.proto = PROTO_Z3950;
1040 oe.oclass = CLASS_SCHEMA;
1041 if (n->u.root.absyn)
1043 oe.value = n->u.root.absyn->reference;
1045 if ((oid_ent_to_oid (&oe, oidtmp)))
1046 (*p->schemaAdd)(p, oidtmp);
1048 (*p->init)(p, &wrd);
1050 return dumpkeys(n, p, &wrd);
1053 static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
1055 data1_node *(*grs_read)(struct grs_read_info *))
1058 struct grs_read_info gri;
1060 int oidtmp[OID_SIZE];
1063 gri.readf = p->readf;
1064 gri.seekf = p->seekf;
1065 gri.tellf = p->tellf;
1068 gri.offset = p->offset;
1071 gri.clientData = clientData;
1073 n = (*grs_read)(&gri);
1075 return RECCTRL_EXTRACT_EOF;
1076 oe.proto = PROTO_Z3950;
1077 oe.oclass = CLASS_SCHEMA;
1079 if (!n->u.root.absyn)
1080 return RECCTRL_EXTRACT_ERROR;
1082 if (n->u.root.absyn)
1084 oe.value = n->u.root.absyn->reference;
1085 if ((oid_ent_to_oid (&oe, oidtmp)))
1086 (*p->schemaAdd)(p, oidtmp);
1088 data1_concat_text(p->dh, mem, n);
1090 /* ensure our data1 tree is UTF-8 */
1091 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
1094 data1_pr_tree (p->dh, n, stdout);
1097 (*p->init)(p, &wrd);
1098 if (dumpkeys(n, p, &wrd) < 0)
1100 data1_free_tree(p->dh, n);
1101 return RECCTRL_EXTRACT_ERROR_GENERIC;
1103 data1_free_tree(p->dh, n);
1104 return RECCTRL_EXTRACT_OK;
1107 int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
1108 data1_node *(*grs_read)(struct grs_read_info *))
1111 NMEM mem = nmem_create ();
1112 ret = grs_extract_sub(clientData, p, mem, grs_read);
1118 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1120 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c,
1121 char **addinfo, ODR o)
1123 data1_esetname *eset;
1124 Z_Espec1 *espec = 0;
1129 case Z_RecordComp_simple:
1130 if (c->u.simple->which != Z_ElementSetNames_generic)
1131 return 26; /* only generic form supported. Fix this later */
1132 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1133 c->u.simple->u.generic)))
1135 yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1136 *addinfo = odr_strdup(o, c->u.simple->u.generic);
1137 return 25; /* invalid esetname */
1139 yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec",
1140 c->u.simple->u.generic);
1143 case Z_RecordComp_complex:
1144 if (c->u.complex->generic)
1146 /* insert check for schema */
1147 if ((p = c->u.complex->generic->elementSpec))
1151 case Z_ElementSpec_elementSetName:
1153 data1_getesetbyname(dh, n->u.root.absyn,
1154 p->u.elementSetName)))
1156 yaz_log(YLOG_DEBUG, "Unknown esetname '%s'",
1157 p->u.elementSetName);
1158 *addinfo = odr_strdup(o, p->u.elementSetName);
1159 return 25; /* invalid esetname */
1161 yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec",
1162 p->u.elementSetName);
1165 case Z_ElementSpec_externalSpec:
1166 if (p->u.externalSpec->which == Z_External_espec1)
1168 yaz_log(YLOG_DEBUG, "Got Espec-1");
1169 espec = p->u.externalSpec-> u.espec1;
1173 yaz_log(YLOG_LOG, "Unknown external espec.");
1174 return 25; /* bad. what is proper diagnostic? */
1181 return 26; /* fix */
1185 yaz_log(YLOG_DEBUG, "Element: Espec-1 match");
1186 return data1_doespec1(dh, n, espec);
1190 yaz_log(YLOG_DEBUG, "Element: all match");
1195 /* Add Zebra info in separate namespace ...
1198 <metadata xmlns="http://www.indexdata.dk/zebra/">
1200 <localnumber>447</localnumber>
1201 <filename>records/genera.xml</filename>
1206 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1209 const char *idzebra_ns[3];
1210 const char *i2 = "\n ";
1211 const char *i4 = "\n ";
1214 idzebra_ns[0] = "xmlns";
1215 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1218 data1_mk_text (p->dh, mem, i2, top);
1220 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1222 data1_mk_text (p->dh, mem, "\n", top);
1224 data1_mk_text (p->dh, mem, i4, n);
1226 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1230 data1_mk_text (p->dh, mem, i4, n);
1231 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1233 data1_mk_text (p->dh, mem, i4, n);
1234 data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem);
1237 data1_mk_text (p->dh, mem, i4, n);
1238 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1240 data1_mk_text (p->dh, mem, i2, n);
1243 int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
1244 data1_node *(*grs_read)(struct grs_read_info *))
1246 data1_node *node = 0, *onode = 0, *top;
1249 int res, selected = 0;
1251 struct grs_read_info gri;
1252 const char *tagname;
1254 int requested_schema = VAL_NONE;
1255 data1_marctab *marctab;
1258 mem = nmem_create();
1259 gri.readf = p->readf;
1260 gri.seekf = p->seekf;
1261 gri.tellf = p->tellf;
1267 gri.clientData = clientData;
1269 yaz_log(YLOG_DEBUG, "grs_retrieve");
1270 node = (*grs_read)(&gri);
1277 data1_concat_text(p->dh, mem, node);
1280 data1_pr_tree (p->dh, node, stdout);
1282 top = data1_get_root_tag (p->dh, node);
1284 yaz_log(YLOG_DEBUG, "grs_retrieve: size");
1285 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1287 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1289 dnew->u.data.what = DATA1I_text;
1290 dnew->u.data.data = dnew->lbuf;
1291 sprintf(dnew->u.data.data, "%d", p->recordSize);
1292 dnew->u.data.len = strlen(dnew->u.data.data);
1295 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1296 if (tagname && p->score >= 0 &&
1297 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1299 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1300 dnew->u.data.what = DATA1I_num;
1301 dnew->u.data.data = dnew->lbuf;
1302 sprintf(dnew->u.data.data, "%d", p->score);
1303 dnew->u.data.len = strlen(dnew->u.data.data);
1306 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1307 "localControlNumber");
1308 if (tagname && p->localno > 0 &&
1309 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1311 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1312 dnew->u.data.what = DATA1I_text;
1313 dnew->u.data.data = dnew->lbuf;
1315 sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
1316 dnew->u.data.len = strlen(dnew->u.data.data);
1319 if (p->input_format == VAL_TEXT_XML)
1320 zebra_xml_metadata (p, top, mem);
1323 data1_pr_tree (p->dh, node, stdout);
1325 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1326 p->comp->u.complex->generic &&
1327 p->comp->u.complex->generic->which == Z_Schema_oid &&
1328 p->comp->u.complex->generic->schema.oid)
1330 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1332 requested_schema = oe->value;
1334 /* If schema has been specified, map if possible, then check that
1335 * we got the right one
1337 if (requested_schema != VAL_NONE)
1339 yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping");
1340 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1342 if (map->target_absyn_ref == requested_schema)
1345 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1354 if (node->u.root.absyn &&
1355 requested_schema != node->u.root.absyn->reference)
1357 p->diagnostic = 238;
1363 * Does the requested format match a known syntax-mapping? (this reflects
1364 * the overlap of schema and formatting which is inherent in the MARC
1367 yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping");
1368 if (node->u.root.absyn)
1369 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1371 if (map->target_absyn_ref == p->input_format)
1374 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1383 yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier");
1384 if (node->u.root.absyn &&
1385 node->u.root.absyn->reference != VAL_NONE &&
1386 p->input_format == VAL_GRS1)
1390 int oidtmp[OID_SIZE];
1392 oe.proto = PROTO_Z3950;
1393 oe.oclass = CLASS_SCHEMA;
1394 oe.value = node->u.root.absyn->reference;
1396 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1399 data1_handle dh = p->dh;
1403 for (ii = oid; *ii >= 0; ii++)
1407 sprintf(p, "%d", *ii);
1410 if ((dnew = data1_mk_tag_data_wd(dh, top,
1411 "schemaIdentifier", mem)))
1413 dnew->u.data.what = DATA1I_oid;
1414 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1415 memcpy(dnew->u.data.data, tmp, p - tmp);
1416 dnew->u.data.len = p - tmp;
1421 yaz_log(YLOG_DEBUG, "grs_retrieve: element spec");
1422 if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo,
1425 p->diagnostic = res;
1427 data1_free_tree(p->dh, onode);
1428 data1_free_tree(p->dh, node);
1432 else if (p->comp && !res)
1436 data1_pr_tree (p->dh, node, stdout);
1438 yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1439 switch (p->output_format = (p->input_format != VAL_NONE ?
1440 p->input_format : VAL_SUTRS))
1444 data1_pr_tree (p->dh, node, stdout);
1446 /* default output encoding for XML is UTF-8 */
1447 data1_iconv (p->dh, mem, node,
1448 p->encoding ? p->encoding : "UTF-8",
1449 data1_get_encoding(p->dh, node));
1451 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1453 p->diagnostic = 238;
1456 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1457 memcpy (new_buf, p->rec_buf, p->rec_len);
1458 p->rec_buf = new_buf;
1462 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1464 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1466 p->diagnostic = 238; /* not available in requested syntax */
1468 p->rec_len = (size_t) (-1);
1471 /* ensure our data1 tree is UTF-8 */
1472 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1474 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1476 p->diagnostic = 238;
1478 p->rec_len = (size_t) (-1);
1481 /* ensure our data1 tree is UTF-8 */
1482 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1483 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1485 p->diagnostic = 238;
1487 p->rec_len = (size_t) (-1);
1491 data1_iconv (p->dh, mem, node, p->encoding,
1492 data1_get_encoding(p->dh, node));
1493 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1495 p->diagnostic = 238;
1498 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1499 memcpy (new_buf, p->rec_buf, p->rec_len);
1500 p->rec_buf = new_buf;
1505 data1_iconv (p->dh, mem, node, p->encoding,
1506 data1_get_encoding(p->dh, node));
1507 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1509 p->diagnostic = 238;
1512 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1513 memcpy (new_buf, p->rec_buf, p->rec_len);
1514 p->rec_buf = new_buf;
1518 if (!node->u.root.absyn)
1520 p->diagnostic = 238;
1523 for (marctab = node->u.root.absyn->marc; marctab;
1524 marctab = marctab->next)
1525 if (marctab->reference == p->input_format)
1529 p->diagnostic = 238;
1533 data1_iconv (p->dh, mem, node, p->encoding,
1534 data1_get_encoding(p->dh, node));
1535 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1536 selected, &p->rec_len)))
1537 p->diagnostic = 238;
1540 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1541 memcpy (new_buf, p->rec_buf, p->rec_len);
1542 p->rec_buf = new_buf;
1546 data1_free_tree(p->dh, node);
1548 data1_free_tree(p->dh, onode);