1 /* $Id: recgrs.c,v 1.110 2006-05-19 13:49:35 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 #include <sys/types.h>
32 #include <idzebra/recgrs.h>
34 #define GRS_MAX_WORD 512
36 struct source_parser {
44 static int sp_lex(struct source_parser *sp)
46 while (*sp->src == ' ')
50 while (*sp->src && !strchr("<>();,-: ", *sp->src))
59 sp->lookahead = *sp->src;
66 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd);
68 static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
75 if (sp->lookahead != '(')
77 sp_lex(sp); /* skip ( */
80 if (!sp_expr(sp, n, wrd))
83 if (sp->lookahead != ',')
85 sp_lex(sp); /* skip , */
88 if (!sp_expr(sp, n, &tmp_w))
90 start = atoi_n(tmp_w.term_buf, tmp_w.term_len);
92 if (sp->lookahead == ',')
94 sp_lex(sp); /* skip , */
97 if (!sp_expr(sp, n, &tmp_w))
99 len = atoi_n(tmp_w.term_buf, tmp_w.term_len);
105 if (sp->lookahead != ')')
109 if (wrd->term_buf && wrd->term_len)
111 wrd->term_buf += start;
112 wrd->term_len -= start;
113 if (wrd->term_len > len)
119 static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
124 if (sp->lookahead != '(')
126 sp_lex(sp); /* skip ( */
127 if (!sp_expr(sp, n, wrd))
129 while (sp->lookahead == ',')
133 sp_lex(sp); /* skip , */
135 if (!sp_expr(sp, n, &search_w))
137 for (i = 0; i<wrd->term_len; i++)
140 for (j = 0; j<search_w.term_len && i+j < wrd->term_len; j++)
141 if (wrd->term_buf[i+j] != search_w.term_buf[j])
143 if (j == search_w.term_len) /* match ? */
145 if (min_pos == -1 || i < min_pos)
151 if (sp->lookahead != ')')
155 min_pos = 0; /* the default if not found */
156 sprintf(num_str, "%d", min_pos);
157 wrd->term_buf = nmem_strdup(sp->nmem, num_str);
158 wrd->term_len = strlen(wrd->term_buf);
162 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
164 if (sp->lookahead != 't')
166 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
168 if (n->which == DATA1N_data)
170 wrd->term_buf = n->u.data.data;
171 wrd->term_len = n->u.data.len;
175 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
177 if (n->which == DATA1N_tag)
179 wrd->term_buf = n->u.tag.tag;
180 wrd->term_len = strlen(n->u.tag.tag);
184 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
188 if (sp->lookahead != '(')
192 if (!sp_expr(sp, n, &tmp_w))
197 if (n->which == DATA1N_tag)
199 data1_xattr *p = n->u.tag.attributes;
200 while (p && strlen(p->name) != tmp_w.term_len &&
201 memcmp (p->name, tmp_w.term_buf, tmp_w.term_len))
205 wrd->term_buf = p->value;
206 wrd->term_len = strlen(p->value);
209 if (sp->lookahead != ')')
213 else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len))
215 return sp_first(sp, n, wrd);
217 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
219 return sp_range(sp, n, wrd);
221 else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok))
224 wrd->term_len = sp->len;
225 b = nmem_malloc(sp->nmem, sp->len);
226 memcpy(b, sp->tok, sp->len);
230 else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'')
233 wrd->term_len = sp->len - 2;
234 b = nmem_malloc(sp->nmem, wrd->term_len);
235 memcpy(b, sp->tok+1, wrd->term_len);
248 static struct source_parser *source_parser_create()
250 struct source_parser *sp = xmalloc(sizeof(*sp));
252 sp->nmem = nmem_create();
256 static void source_parser_destroy(struct source_parser *sp)
260 nmem_destroy(sp->nmem);
264 static int sp_parse(struct source_parser *sp,
265 data1_node *n, RecWord *wrd, const char *src)
271 nmem_reset(sp->nmem);
274 return sp_expr(sp, n, wrd);
277 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
286 if (p->which == XPATH_PREDICATE_RELATION) {
287 if (p->u.relation.name[0]) {
288 if (*p->u.relation.name != '@') {
290 " Only attributes (@) are supported in xelm xpath predicates");
291 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
294 attname = p->u.relation.name + 1;
296 /* looking for the attribute with a specified name */
297 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
298 yaz_log(YLOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
300 if (!strcmp(attr->name, attname)) {
301 if (p->u.relation.op[0]) {
302 if (*p->u.relation.op != '=') {
304 "Only '=' relation is supported (%s)",p->u.relation.op);
305 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
308 yaz_log(YLOG_DEBUG," - value %s <-> %s",
309 p->u.relation.value, attr->value );
310 if (!strcmp(attr->value, p->u.relation.value)) {
315 /* attribute exists, no value specified */
320 yaz_log(YLOG_DEBUG, "return %d", res);
326 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
327 if (!strcmp(p->u.boolean.op,"and")) {
328 return d1_check_xpath_predicate(n, p->u.boolean.left)
329 && d1_check_xpath_predicate(n, p->u.boolean.right);
331 else if (!strcmp(p->u.boolean.op,"or")) {
332 return (d1_check_xpath_predicate(n, p->u.boolean.left)
333 || d1_check_xpath_predicate(n, p->u.boolean.right));
335 yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
344 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
346 struct DFA_state *s = dfaar[0]; /* start state */
349 const char *p = text;
352 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
354 if (c >= t->ch[0] && c <= t->ch[1])
358 /* move to next state and return if we get a match */
366 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
367 if (c >= t->ch[0] && c <= t->ch[1])
377 New function, looking for xpath "element" definitions in abs, by
378 tagpath, using a kind of ugly regxp search.The DFA was built while
379 parsing abs, so here we just go trough them and try to match
380 against the given tagpath. The first matching entry is returned.
384 Added support for enhanced xelm. Now [] predicates are considered
385 as well, when selecting indexing rules... (why the hell it's called
392 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
394 data1_absyn *abs = n->root->u.root.absyn;
395 data1_xpelement *xpe = abs->xp_elements;
398 struct xpath_location_step *xp;
400 char *pexpr = xmalloc(strlen(tagpath)+5);
403 sprintf (pexpr, "/%s\n", tagpath);
404 for (; xpe; xpe = xpe->next)
407 ok = dfa_match_first(xpe->dfa->states, pexpr);
411 /* we have to check the perdicates up to the root node */
414 /* find the first tag up in the node structure */
415 for (nn = n; nn && nn->which != DATA1N_tag; nn = nn->parent)
418 /* go from inside out in the node structure, while going
419 backwards trough xpath location steps ... */
420 for (i = xpe->xpath_len - 1; i>0; i--)
422 yaz_log(YLOG_DEBUG, "Checking step %d: %s on tag %s",
423 i, xp[i].part, nn->u.tag.tag);
425 if (!d1_check_xpath_predicate(nn, xp[i].predicate))
427 yaz_log(YLOG_DEBUG, " Predicates didn't match");
432 if (nn->which == DATA1N_tag)
444 yaz_log(YLOG_DEBUG, "Got it");
445 return xpe->termlists;
452 1 start element (tag)
454 3 start attr (and attr-exact)
462 Now, if there is a matching xelm described in abs, for the
463 indexed element or the attribute, then the data is handled according
464 to those definitions...
466 modified by pop, 2002-12-13
469 /* add xpath index for an attribute */
470 static void index_xpath_attr (char *tag_path, char *name, char *value,
471 char *structure, struct recExtractCtrl *p,
474 wrd->index_name = ZEBRA_XPATH_ELM_BEGIN;
475 wrd->index_type = '0';
476 wrd->term_buf = tag_path;
477 wrd->term_len = strlen(tag_path);
481 wrd->index_name = ZEBRA_XPATH_ATTR_CDATA;
482 wrd->index_type = 'w';
483 wrd->term_buf = value;
484 wrd->term_len = strlen(value);
487 wrd->index_name = ZEBRA_XPATH_ELM_END;
488 wrd->index_type = '0';
489 wrd->term_buf = tag_path;
490 wrd->term_len = strlen(tag_path);
495 static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
500 /* we have to fetch the whole path to the data tag */
501 for (nn = n; nn; nn = nn->parent)
503 if (nn->which == DATA1N_tag)
505 size_t tlen = strlen(nn->u.tag.tag);
506 if (tlen + flen > (max - 2))
508 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
510 tag_path_full[flen++] = '/';
513 if (nn->which == DATA1N_root)
516 tag_path_full[flen] = 0;
520 static void index_xpath(struct source_parser *sp, data1_node *n,
521 struct recExtractCtrl *p,
522 int level, RecWord *wrd,
528 char tag_path_full[1024];
529 int termlist_only = 1;
532 yaz_log(YLOG_DEBUG, "index_xpath level=%d xpath_index=%s",
534 if ((!n->root->u.root.absyn) ||
535 (n->root->u.root.absyn->enable_xpath_indexing)) {
542 wrd->term_buf = n->u.data.data;
543 wrd->term_len = n->u.data.len;
546 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
548 /* If we have a matching termlist... */
549 if (n->root->u.root.absyn &&
550 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
552 for (; tl; tl = tl->next)
554 /* need to copy recword because it may be changed */
556 wrd->index_type = *tl->structure;
557 memcpy (&wrd_tl, wrd, sizeof(*wrd));
559 sp_parse(sp, n, &wrd_tl, tl->source);
562 /* this is the ! case, so structure is for the xpath index */
563 wrd_tl.index_name = xpath_index;
564 if (p->flagShowRecords)
567 printf("%*sXPath index", (level + 1) * 4, "");
568 printf (" XData:\"");
569 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
570 fputc (wrd_tl.term_buf[i], stdout);
572 if (wrd_tl.term_len > 40)
574 fputc ('\n', stdout);
577 (*p->tokenAdd)(&wrd_tl);
580 /* this is just the old fashioned attribute based index */
581 wrd_tl.index_name = tl->index_name;
582 if (p->flagShowRecords)
585 printf("%*sIdx: [%s]", (level + 1) * 4, "",
587 printf("%s %s", tl->index_name, tl->source);
588 printf (" XData:\"");
589 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
590 fputc (wrd_tl.term_buf[i], stdout);
592 if (wrd_tl.term_len > 40)
594 fputc ('\n', stdout);
597 (*p->tokenAdd)(&wrd_tl);
601 /* xpath indexing is done, if there was no termlist given,
602 or no ! in the termlist, and default indexing is enabled... */
603 if (!p->flagShowRecords && !xpdone && !termlist_only)
605 wrd->index_name = xpath_index;
606 wrd->index_type = 'w';
611 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
613 wrd->index_type = '0';
614 wrd->term_buf = tag_path_full;
615 wrd->term_len = strlen(tag_path_full);
616 wrd->index_name = xpath_index;
617 if (p->flagShowRecords)
619 printf("%*s tag=", (level + 1) * 4, "");
620 for (i = 0; i<wrd->term_len && i < 40; i++)
621 fputc (wrd->term_buf[i], stdout);
632 /* Add tag start/end xpath index, only when there is a ! in
633 the apropriate xelm directive, or default xpath indexing
636 if (!(do_xpindex = 1 - termlist_only))
638 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n)))
640 for (; tl; tl = tl->next)
648 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
651 if (xpath_is_start == 1) /* only for the starting tag... */
653 #define MAX_ATTR_COUNT 50
654 data1_termlist *tll[MAX_ATTR_COUNT];
658 /* get termlists for attributes, and find out, if we have to do xpath indexing */
659 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
664 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
666 int do_xpindex = 1 - termlist_only;
668 char attr_tag_path_full[1024];
670 /* this could be cached as well */
671 sprintf (attr_tag_path_full, "@%s/%s",
672 xp->name, tag_path_full);
674 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
676 /* if there is a ! in the xelm termlist, or default indexing is on,
677 proceed with xpath idx */
680 for (; tl; tl = tl->next)
689 /* attribute (no value) */
690 wrd->index_type = '0';
691 wrd->index_name = ZEBRA_XPATH_ATTR_NAME;
692 wrd->term_buf = xp->name;
693 wrd->term_len = strlen(xp->name);
699 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
701 /* attribute value exact */
702 strcpy (comb, xp->name);
704 strcat (comb, xp->value);
706 wrd->index_name = ZEBRA_XPATH_ATTR_NAME;
707 wrd->index_type = '0';
708 wrd->term_buf = comb;
709 wrd->term_len = strlen(comb);
719 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
721 char attr_tag_path_full[1024];
724 sprintf (attr_tag_path_full, "@%s/%s",
725 xp->name, tag_path_full);
729 /* If there is a termlist given (=xelm directive) */
730 for (; tl; tl = tl->next)
734 /* add xpath index for the attribute */
735 index_xpath_attr (attr_tag_path_full, xp->name,
736 xp->value, tl->structure,
740 /* index attribute value (only path/@attr) */
743 wrd->index_name = tl->index_name;
744 wrd->index_type = *tl->structure;
745 wrd->term_buf = xp->value;
746 wrd->term_len = strlen(xp->value);
752 /* if there was no termlist for the given path,
753 or the termlist didn't have a ! element, index
754 the attribute as "w" */
755 if ((!xpdone) && (!termlist_only))
757 index_xpath_attr (attr_tag_path_full, xp->name,
758 xp->value, "w", p, wrd);
767 static void index_termlist (struct source_parser *sp, data1_node *par,
769 struct recExtractCtrl *p, int level, RecWord *wrd)
771 data1_termlist *tlist = 0;
772 data1_datatype dtype = DATA1K_string;
775 * cycle up towards the root until we find a tag with an att..
776 * this has the effect of indexing locally defined tags with
777 * the attribute of their ancestor in the record.
780 while (!par->u.tag.element)
781 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
783 if (!par || !(tlist = par->u.tag.element->termlists))
785 if (par->u.tag.element->tag)
786 dtype = par->u.tag.element->tag->kind;
788 for (; tlist; tlist = tlist->next)
790 /* consider source */
792 assert(tlist->source);
793 sp_parse(sp, n, wrd, tlist->source);
795 if (wrd->term_buf && wrd->term_len)
797 if (p->flagShowRecords)
800 printf("%*sIdx: [%s]", (level + 1) * 4, "",
802 printf("%s %s", tlist->index_name, tlist->source);
803 printf (" XData:\"");
804 for (i = 0; i<wrd->term_len && i < 40; i++)
805 fputc (wrd->term_buf[i], stdout);
807 if (wrd->term_len > 40)
809 fputc ('\n', stdout);
813 wrd->index_type = *tlist->structure;
814 wrd->index_name = tlist->index_name;
821 static int dumpkeys_r(struct source_parser *sp,
822 data1_node *n, struct recExtractCtrl *p, int level,
825 for (; n; n = n->next)
827 if (p->flagShowRecords) /* display element description to user */
829 if (n->which == DATA1N_root)
831 printf("%*s", level * 4, "");
832 printf("Record type: '%s'\n", n->u.root.type);
834 else if (n->which == DATA1N_tag)
838 printf("%*s", level * 4, "");
839 if (!(e = n->u.tag.element))
840 printf("Local tag: '%s'\n", n->u.tag.tag);
843 printf("Elm: '%s' ", e->name);
846 data1_tag *t = e->tag;
848 printf("TagNam: '%s' ", t->names->name);
851 printf("%s[%d],", t->tagset->name, t->tagset->type);
854 if (t->which == DATA1T_numeric)
855 printf("%d)", t->value.numeric);
857 printf("'%s')", t->value.string);
864 if (n->which == DATA1N_tag)
866 index_termlist(sp, n, n, p, level, wrd);
867 /* index start tag */
868 if (n->root->u.root.absyn)
869 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_BEGIN,
874 if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0)
878 if (n->which == DATA1N_data)
880 data1_node *par = get_parent_tag(p->dh, n);
882 if (p->flagShowRecords)
884 printf("%*s", level * 4, "");
886 if (n->u.data.len > 256)
887 printf("'%.170s ... %.70s'\n", n->u.data.data,
888 n->u.data.data + n->u.data.len-70);
889 else if (n->u.data.len > 0)
890 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
896 index_termlist(sp, par, n, p, level, wrd);
898 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_CDATA,
902 if (n->which == DATA1N_tag)
905 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_END,
909 if (p->flagShowRecords && n->which == DATA1N_root)
911 printf("%*s-------------\n\n", level * 4, "");
917 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
919 struct source_parser *sp = source_parser_create();
920 int r = dumpkeys_r(sp, n, p, 0, wrd);
921 source_parser_destroy(sp);
925 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
928 int oidtmp[OID_SIZE];
931 oe.proto = PROTO_Z3950;
932 oe.oclass = CLASS_SCHEMA;
935 oe.value = n->u.root.absyn->reference;
937 if ((oid_ent_to_oid (&oe, oidtmp)))
938 (*p->schemaAdd)(p, oidtmp);
942 return dumpkeys(n, p, &wrd);
945 static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
947 data1_node *(*grs_read)(struct grs_read_info *))
950 struct grs_read_info gri;
952 int oidtmp[OID_SIZE];
955 gri.readf = p->readf;
956 gri.seekf = p->seekf;
957 gri.tellf = p->tellf;
960 gri.offset = p->offset;
963 gri.clientData = clientData;
965 n = (*grs_read)(&gri);
967 return RECCTRL_EXTRACT_EOF;
968 oe.proto = PROTO_Z3950;
969 oe.oclass = CLASS_SCHEMA;
971 if (!n->u.root.absyn)
972 return RECCTRL_EXTRACT_ERROR;
976 oe.value = n->u.root.absyn->reference;
977 if ((oid_ent_to_oid (&oe, oidtmp)))
978 (*p->schemaAdd)(p, oidtmp);
980 data1_concat_text(p->dh, mem, n);
982 /* ensure our data1 tree is UTF-8 */
983 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
986 data1_pr_tree (p->dh, n, stdout);
990 if (dumpkeys(n, p, &wrd) < 0)
992 data1_free_tree(p->dh, n);
993 return RECCTRL_EXTRACT_ERROR_GENERIC;
995 data1_free_tree(p->dh, n);
996 return RECCTRL_EXTRACT_OK;
999 int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
1000 data1_node *(*grs_read)(struct grs_read_info *))
1003 NMEM mem = nmem_create ();
1004 ret = grs_extract_sub(clientData, p, mem, grs_read);
1010 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1012 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c,
1013 char **addinfo, ODR o)
1015 data1_esetname *eset;
1016 Z_Espec1 *espec = 0;
1021 case Z_RecordComp_simple:
1022 if (c->u.simple->which != Z_ElementSetNames_generic)
1023 return 26; /* only generic form supported. Fix this later */
1024 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1025 c->u.simple->u.generic)))
1027 yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1028 *addinfo = odr_strdup(o, c->u.simple->u.generic);
1029 return 25; /* invalid esetname */
1031 yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec",
1032 c->u.simple->u.generic);
1035 case Z_RecordComp_complex:
1036 if (c->u.complex->generic)
1038 /* insert check for schema */
1039 if ((p = c->u.complex->generic->elementSpec))
1043 case Z_ElementSpec_elementSetName:
1045 data1_getesetbyname(dh, n->u.root.absyn,
1046 p->u.elementSetName)))
1048 yaz_log(YLOG_DEBUG, "Unknown esetname '%s'",
1049 p->u.elementSetName);
1050 *addinfo = odr_strdup(o, p->u.elementSetName);
1051 return 25; /* invalid esetname */
1053 yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec",
1054 p->u.elementSetName);
1057 case Z_ElementSpec_externalSpec:
1058 if (p->u.externalSpec->which == Z_External_espec1)
1060 yaz_log(YLOG_DEBUG, "Got Espec-1");
1061 espec = p->u.externalSpec-> u.espec1;
1065 yaz_log(YLOG_LOG, "Unknown external espec.");
1066 return 25; /* bad. what is proper diagnostic? */
1073 return 26; /* fix */
1077 yaz_log(YLOG_DEBUG, "Element: Espec-1 match");
1078 return data1_doespec1(dh, n, espec);
1082 yaz_log(YLOG_DEBUG, "Element: all match");
1087 /* Add Zebra info in separate namespace ...
1090 <metadata xmlns="http://www.indexdata.dk/zebra/">
1092 <localnumber>447</localnumber>
1093 <filename>records/genera.xml</filename>
1098 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1101 const char *idzebra_ns[3];
1102 const char *i2 = "\n ";
1103 const char *i4 = "\n ";
1106 idzebra_ns[0] = "xmlns";
1107 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1110 data1_mk_text (p->dh, mem, i2, top);
1112 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1114 data1_mk_text (p->dh, mem, "\n", top);
1116 data1_mk_text (p->dh, mem, i4, n);
1118 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1122 data1_mk_text (p->dh, mem, i4, n);
1123 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1125 data1_mk_text (p->dh, mem, i4, n);
1126 data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem);
1129 data1_mk_text (p->dh, mem, i4, n);
1130 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1132 data1_mk_text (p->dh, mem, i2, n);
1135 int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
1136 data1_node *(*grs_read)(struct grs_read_info *))
1138 data1_node *node = 0, *onode = 0, *top;
1141 int res, selected = 0;
1143 struct grs_read_info gri;
1144 const char *tagname;
1146 int requested_schema = VAL_NONE;
1147 data1_marctab *marctab;
1150 mem = nmem_create();
1151 gri.readf = p->readf;
1152 gri.seekf = p->seekf;
1153 gri.tellf = p->tellf;
1159 gri.clientData = clientData;
1161 yaz_log(YLOG_DEBUG, "grs_retrieve");
1162 node = (*grs_read)(&gri);
1169 data1_concat_text(p->dh, mem, node);
1172 data1_pr_tree (p->dh, node, stdout);
1174 top = data1_get_root_tag (p->dh, node);
1176 yaz_log(YLOG_DEBUG, "grs_retrieve: size");
1177 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1179 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1181 dnew->u.data.what = DATA1I_text;
1182 dnew->u.data.data = dnew->lbuf;
1183 sprintf(dnew->u.data.data, "%d", p->recordSize);
1184 dnew->u.data.len = strlen(dnew->u.data.data);
1187 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1188 if (tagname && p->score >= 0 &&
1189 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1191 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1192 dnew->u.data.what = DATA1I_num;
1193 dnew->u.data.data = dnew->lbuf;
1194 sprintf(dnew->u.data.data, "%d", p->score);
1195 dnew->u.data.len = strlen(dnew->u.data.data);
1198 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1199 "localControlNumber");
1200 if (tagname && p->localno > 0 &&
1201 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1203 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1204 dnew->u.data.what = DATA1I_text;
1205 dnew->u.data.data = dnew->lbuf;
1207 sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
1208 dnew->u.data.len = strlen(dnew->u.data.data);
1211 if (p->input_format == VAL_TEXT_XML)
1212 zebra_xml_metadata (p, top, mem);
1215 data1_pr_tree (p->dh, node, stdout);
1217 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1218 p->comp->u.complex->generic &&
1219 p->comp->u.complex->generic->which == Z_Schema_oid &&
1220 p->comp->u.complex->generic->schema.oid)
1222 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1224 requested_schema = oe->value;
1226 /* If schema has been specified, map if possible, then check that
1227 * we got the right one
1229 if (requested_schema != VAL_NONE)
1231 yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping");
1232 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1234 if (map->target_absyn_ref == requested_schema)
1237 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1246 if (node->u.root.absyn &&
1247 requested_schema != node->u.root.absyn->reference)
1249 p->diagnostic = 238;
1255 * Does the requested format match a known syntax-mapping? (this reflects
1256 * the overlap of schema and formatting which is inherent in the MARC
1259 yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping");
1260 if (node->u.root.absyn)
1261 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1263 if (map->target_absyn_ref == p->input_format)
1266 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1275 yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier");
1276 if (node->u.root.absyn &&
1277 node->u.root.absyn->reference != VAL_NONE &&
1278 p->input_format == VAL_GRS1)
1282 int oidtmp[OID_SIZE];
1284 oe.proto = PROTO_Z3950;
1285 oe.oclass = CLASS_SCHEMA;
1286 oe.value = node->u.root.absyn->reference;
1288 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1291 data1_handle dh = p->dh;
1295 for (ii = oid; *ii >= 0; ii++)
1299 sprintf(p, "%d", *ii);
1302 if ((dnew = data1_mk_tag_data_wd(dh, top,
1303 "schemaIdentifier", mem)))
1305 dnew->u.data.what = DATA1I_oid;
1306 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1307 memcpy(dnew->u.data.data, tmp, p - tmp);
1308 dnew->u.data.len = p - tmp;
1313 yaz_log(YLOG_DEBUG, "grs_retrieve: element spec");
1314 if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo,
1317 p->diagnostic = res;
1319 data1_free_tree(p->dh, onode);
1320 data1_free_tree(p->dh, node);
1324 else if (p->comp && !res)
1328 data1_pr_tree (p->dh, node, stdout);
1330 yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1331 switch (p->output_format = (p->input_format != VAL_NONE ?
1332 p->input_format : VAL_SUTRS))
1336 data1_pr_tree (p->dh, node, stdout);
1338 /* default output encoding for XML is UTF-8 */
1339 data1_iconv (p->dh, mem, node,
1340 p->encoding ? p->encoding : "UTF-8",
1341 data1_get_encoding(p->dh, node));
1343 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1345 p->diagnostic = 238;
1348 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1349 memcpy (new_buf, p->rec_buf, p->rec_len);
1350 p->rec_buf = new_buf;
1354 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1356 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1358 p->diagnostic = 238; /* not available in requested syntax */
1360 p->rec_len = (size_t) (-1);
1363 /* ensure our data1 tree is UTF-8 */
1364 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1366 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1368 p->diagnostic = 238;
1370 p->rec_len = (size_t) (-1);
1373 /* ensure our data1 tree is UTF-8 */
1374 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1375 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1377 p->diagnostic = 238;
1379 p->rec_len = (size_t) (-1);
1383 data1_iconv (p->dh, mem, node, p->encoding,
1384 data1_get_encoding(p->dh, node));
1385 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1387 p->diagnostic = 238;
1390 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1391 memcpy (new_buf, p->rec_buf, p->rec_len);
1392 p->rec_buf = new_buf;
1397 data1_iconv (p->dh, mem, node, p->encoding,
1398 data1_get_encoding(p->dh, node));
1399 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1401 p->diagnostic = 238;
1404 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1405 memcpy (new_buf, p->rec_buf, p->rec_len);
1406 p->rec_buf = new_buf;
1410 if (!node->u.root.absyn)
1412 p->diagnostic = 238;
1415 for (marctab = node->u.root.absyn->marc; marctab;
1416 marctab = marctab->next)
1417 if (marctab->reference == p->input_format)
1421 p->diagnostic = 238;
1425 data1_iconv (p->dh, mem, node, p->encoding,
1426 data1_get_encoding(p->dh, node));
1427 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1428 selected, &p->rec_len)))
1429 p->diagnostic = 238;
1432 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1433 memcpy (new_buf, p->rec_buf, p->rec_len);
1434 p->rec_buf = new_buf;
1438 data1_free_tree(p->dh, node);
1440 data1_free_tree(p->dh, onode);
1448 * indent-tabs-mode: nil
1450 * vim: shiftwidth=4 tabstop=8 expandtab