1 /* $Id: recgrs.c,v 1.3 2006-07-06 12:42:22 marc Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
25 #include <sys/types.h>
32 #include <idzebra/recgrs.h>
34 #define GRS_MAX_WORD 512
36 struct source_parser {
44 static int sp_lex(struct source_parser *sp)
46 while (*sp->src == ' ')
50 while (*sp->src && !strchr("<>();,-: ", *sp->src))
59 sp->lookahead = *sp->src;
66 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd);
68 static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
75 if (sp->lookahead != '(')
77 sp_lex(sp); /* skip ( */
80 if (!sp_expr(sp, n, wrd))
83 if (sp->lookahead != ',')
85 sp_lex(sp); /* skip , */
88 if (!sp_expr(sp, n, &tmp_w))
90 start = atoi_n(tmp_w.term_buf, tmp_w.term_len);
92 if (sp->lookahead == ',')
94 sp_lex(sp); /* skip , */
97 if (!sp_expr(sp, n, &tmp_w))
99 len = atoi_n(tmp_w.term_buf, tmp_w.term_len);
105 if (sp->lookahead != ')')
109 if (wrd->term_buf && wrd->term_len)
111 wrd->term_buf += start;
112 wrd->term_len -= start;
113 if (wrd->term_len > len)
119 static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
124 if (sp->lookahead != '(')
126 sp_lex(sp); /* skip ( */
127 if (!sp_expr(sp, n, wrd))
129 while (sp->lookahead == ',')
133 sp_lex(sp); /* skip , */
135 if (!sp_expr(sp, n, &search_w))
137 for (i = 0; i<wrd->term_len; i++)
140 for (j = 0; j<search_w.term_len && i+j < wrd->term_len; j++)
141 if (wrd->term_buf[i+j] != search_w.term_buf[j])
143 if (j == search_w.term_len) /* match ? */
145 if (min_pos == -1 || i < min_pos)
151 if (sp->lookahead != ')')
155 min_pos = 0; /* the default if not found */
156 sprintf(num_str, "%d", min_pos);
157 wrd->term_buf = nmem_strdup(sp->nmem, num_str);
158 wrd->term_len = strlen(wrd->term_buf);
162 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
164 if (sp->lookahead != 't')
166 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
168 if (n->which == DATA1N_data)
170 wrd->term_buf = n->u.data.data;
171 wrd->term_len = n->u.data.len;
175 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
177 if (n->which == DATA1N_tag)
179 wrd->term_buf = n->u.tag.tag;
180 wrd->term_len = strlen(n->u.tag.tag);
184 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
188 if (sp->lookahead != '(')
192 if (!sp_expr(sp, n, &tmp_w))
197 if (n->which == DATA1N_tag)
199 data1_xattr *p = n->u.tag.attributes;
200 while (p && strlen(p->name) != tmp_w.term_len &&
201 memcmp (p->name, tmp_w.term_buf, tmp_w.term_len))
205 wrd->term_buf = p->value;
206 wrd->term_len = strlen(p->value);
209 if (sp->lookahead != ')')
213 else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len))
215 return sp_first(sp, n, wrd);
217 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
219 return sp_range(sp, n, wrd);
221 else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok))
224 wrd->term_len = sp->len;
225 b = nmem_malloc(sp->nmem, sp->len);
226 memcpy(b, sp->tok, sp->len);
230 else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'')
233 wrd->term_len = sp->len - 2;
234 b = nmem_malloc(sp->nmem, wrd->term_len);
235 memcpy(b, sp->tok+1, wrd->term_len);
248 static struct source_parser *source_parser_create()
250 struct source_parser *sp = xmalloc(sizeof(*sp));
252 sp->nmem = nmem_create();
256 static void source_parser_destroy(struct source_parser *sp)
260 nmem_destroy(sp->nmem);
264 static int sp_parse(struct source_parser *sp,
265 data1_node *n, RecWord *wrd, const char *src)
271 nmem_reset(sp->nmem);
274 return sp_expr(sp, n, wrd);
277 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
286 if (p->which == XPATH_PREDICATE_RELATION) {
287 if (p->u.relation.name[0]) {
288 if (*p->u.relation.name != '@') {
290 " Only attributes (@) are supported in xelm xpath predicates");
291 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
294 attname = p->u.relation.name + 1;
296 /* looking for the attribute with a specified name */
297 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
298 yaz_log(YLOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
300 if (!strcmp(attr->name, attname)) {
301 if (p->u.relation.op[0]) {
302 if (*p->u.relation.op != '=') {
304 "Only '=' relation is supported (%s)",p->u.relation.op);
305 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
308 yaz_log(YLOG_DEBUG," - value %s <-> %s",
309 p->u.relation.value, attr->value );
310 if (!strcmp(attr->value, p->u.relation.value)) {
315 /* attribute exists, no value specified */
320 yaz_log(YLOG_DEBUG, "return %d", res);
326 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
327 if (!strcmp(p->u.boolean.op,"and")) {
328 return d1_check_xpath_predicate(n, p->u.boolean.left)
329 && d1_check_xpath_predicate(n, p->u.boolean.right);
331 else if (!strcmp(p->u.boolean.op,"or")) {
332 return (d1_check_xpath_predicate(n, p->u.boolean.left)
333 || d1_check_xpath_predicate(n, p->u.boolean.right));
335 yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
344 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
346 struct DFA_state *s = dfaar[0]; /* start state */
349 const char *p = text;
352 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
354 if (c >= t->ch[0] && c <= t->ch[1])
358 /* move to next state and return if we get a match */
366 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
367 if (c >= t->ch[0] && c <= t->ch[1])
377 New function, looking for xpath "element" definitions in abs, by
378 tagpath, using a kind of ugly regxp search.The DFA was built while
379 parsing abs, so here we just go trough them and try to match
380 against the given tagpath. The first matching entry is returned.
384 Added support for enhanced xelm. Now [] predicates are considered
385 as well, when selecting indexing rules... (why the hell it's called
392 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
394 data1_absyn *abs = n->root->u.root.absyn;
395 data1_xpelement *xpe = abs->xp_elements;
398 struct xpath_location_step *xp;
400 char *pexpr = xmalloc(strlen(tagpath)+5);
403 sprintf (pexpr, "/%s\n", tagpath);
404 for (; xpe; xpe = xpe->next)
407 ok = dfa_match_first(xpe->dfa->states, pexpr);
411 /* we have to check the perdicates up to the root node */
414 /* find the first tag up in the node structure */
415 for (nn = n; nn && nn->which != DATA1N_tag; nn = nn->parent)
418 /* go from inside out in the node structure, while going
419 backwards trough xpath location steps ... */
420 for (i = xpe->xpath_len - 1; i>0; i--)
422 yaz_log(YLOG_DEBUG, "Checking step %d: %s on tag %s",
423 i, xp[i].part, nn->u.tag.tag);
425 if (!d1_check_xpath_predicate(nn, xp[i].predicate))
427 yaz_log(YLOG_DEBUG, " Predicates didn't match");
432 if (nn->which == DATA1N_tag)
444 yaz_log(YLOG_DEBUG, "Got it");
445 return xpe->termlists;
452 1 start element (tag)
454 3 start attr (and attr-exact)
462 Now, if there is a matching xelm described in abs, for the
463 indexed element or the attribute, then the data is handled according
464 to those definitions...
466 modified by pop, 2002-12-13
469 /* add xpath index for an attribute */
470 static void index_xpath_attr (char *tag_path, char *name, char *value,
471 char *structure, struct recExtractCtrl *p,
474 wrd->index_name = ZEBRA_XPATH_ELM_BEGIN;
475 wrd->index_type = '0';
476 wrd->term_buf = tag_path;
477 wrd->term_len = strlen(tag_path);
481 wrd->index_name = ZEBRA_XPATH_ATTR_CDATA;
482 wrd->index_type = 'w';
483 wrd->term_buf = value;
484 wrd->term_len = strlen(value);
487 wrd->index_name = ZEBRA_XPATH_ELM_END;
488 wrd->index_type = '0';
489 wrd->term_buf = tag_path;
490 wrd->term_len = strlen(tag_path);
495 static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
500 /* we have to fetch the whole path to the data tag */
501 for (nn = n; nn; nn = nn->parent)
503 if (nn->which == DATA1N_tag)
505 size_t tlen = strlen(nn->u.tag.tag);
506 if (tlen + flen > (max - 2))
508 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
510 tag_path_full[flen++] = '/';
513 if (nn->which == DATA1N_root)
516 tag_path_full[flen] = 0;
520 static void index_xpath(struct source_parser *sp, data1_node *n,
521 struct recExtractCtrl *p,
522 int level, RecWord *wrd,
528 char tag_path_full[1024];
529 int termlist_only = 1;
532 if ((!n->root->u.root.absyn) ||
533 (n->root->u.root.absyn->xpath_indexing == DATA1_XPATH_INDEXING_ENABLE)) {
540 wrd->term_buf = n->u.data.data;
541 wrd->term_len = n->u.data.len;
544 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
546 /* If we have a matching termlist... */
547 if (n->root->u.root.absyn &&
548 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
550 for (; tl; tl = tl->next)
552 /* need to copy recword because it may be changed */
554 wrd->index_type = *tl->structure;
555 memcpy (&wrd_tl, wrd, sizeof(*wrd));
557 sp_parse(sp, n, &wrd_tl, tl->source);
560 /* this is the ! case, so structure is for the xpath index */
561 wrd_tl.index_name = xpath_index;
562 if (p->flagShowRecords)
565 printf("%*sXPath index", (level + 1) * 4, "");
566 printf (" XData:\"");
567 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
568 fputc (wrd_tl.term_buf[i], stdout);
570 if (wrd_tl.term_len > 40)
572 fputc ('\n', stdout);
575 (*p->tokenAdd)(&wrd_tl);
578 /* this is just the old fashioned attribute based index */
579 wrd_tl.index_name = tl->index_name;
580 if (p->flagShowRecords)
583 printf("%*sIdx: [%s]", (level + 1) * 4, "",
585 printf("%s %s", tl->index_name, tl->source);
586 printf (" XData:\"");
587 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
588 fputc (wrd_tl.term_buf[i], stdout);
590 if (wrd_tl.term_len > 40)
592 fputc ('\n', stdout);
595 (*p->tokenAdd)(&wrd_tl);
599 /* xpath indexing is done, if there was no termlist given,
600 or no ! in the termlist, and default indexing is enabled... */
601 if (!p->flagShowRecords && !xpdone && !termlist_only)
603 wrd->index_name = xpath_index;
604 wrd->index_type = 'w';
609 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
611 wrd->index_type = '0';
612 wrd->term_buf = tag_path_full;
613 wrd->term_len = strlen(tag_path_full);
614 wrd->index_name = xpath_index;
615 if (p->flagShowRecords)
617 printf("%*s tag=", (level + 1) * 4, "");
618 for (i = 0; i<wrd->term_len && i < 40; i++)
619 fputc (wrd->term_buf[i], stdout);
630 /* Add tag start/end xpath index, only when there is a ! in
631 the apropriate xelm directive, or default xpath indexing
634 if (!(do_xpindex = 1 - termlist_only))
636 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n)))
638 for (; tl; tl = tl->next)
646 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
649 if (xpath_is_start == 1) /* only for the starting tag... */
651 #define MAX_ATTR_COUNT 50
652 data1_termlist *tll[MAX_ATTR_COUNT];
656 /* get termlists for attributes, and find out, if we have to do xpath indexing */
657 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
662 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
664 int do_xpindex = 1 - termlist_only;
666 char attr_tag_path_full[1024];
668 /* this could be cached as well */
669 sprintf (attr_tag_path_full, "@%s/%s",
670 xp->name, tag_path_full);
672 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
674 /* if there is a ! in the xelm termlist, or default indexing is on,
675 proceed with xpath idx */
678 for (; tl; tl = tl->next)
687 /* attribute (no value) */
688 wrd->index_type = '0';
689 wrd->index_name = ZEBRA_XPATH_ATTR_NAME;
690 wrd->term_buf = xp->name;
691 wrd->term_len = strlen(xp->name);
697 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
699 /* attribute value exact */
700 strcpy (comb, xp->name);
702 strcat (comb, xp->value);
704 wrd->index_name = ZEBRA_XPATH_ATTR_NAME;
705 wrd->index_type = '0';
706 wrd->term_buf = comb;
707 wrd->term_len = strlen(comb);
717 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
719 char attr_tag_path_full[1024];
722 sprintf (attr_tag_path_full, "@%s/%s",
723 xp->name, tag_path_full);
727 /* If there is a termlist given (=xelm directive) */
728 for (; tl; tl = tl->next)
732 /* add xpath index for the attribute */
733 index_xpath_attr (attr_tag_path_full, xp->name,
734 xp->value, tl->structure,
738 /* index attribute value (only path/@attr) */
741 wrd->index_name = tl->index_name;
742 wrd->index_type = *tl->structure;
743 wrd->term_buf = xp->value;
744 wrd->term_len = strlen(xp->value);
750 /* if there was no termlist for the given path,
751 or the termlist didn't have a ! element, index
752 the attribute as "w" */
753 if ((!xpdone) && (!termlist_only))
755 index_xpath_attr (attr_tag_path_full, xp->name,
756 xp->value, "w", p, wrd);
765 static void index_termlist (struct source_parser *sp, data1_node *par,
767 struct recExtractCtrl *p, int level, RecWord *wrd)
769 data1_termlist *tlist = 0;
770 data1_datatype dtype = DATA1K_string;
773 * cycle up towards the root until we find a tag with an att..
774 * this has the effect of indexing locally defined tags with
775 * the attribute of their ancestor in the record.
778 while (!par->u.tag.element)
779 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
781 if (!par || !(tlist = par->u.tag.element->termlists))
783 if (par->u.tag.element->tag)
784 dtype = par->u.tag.element->tag->kind;
786 for (; tlist; tlist = tlist->next)
788 /* consider source */
790 assert(tlist->source);
791 sp_parse(sp, n, wrd, tlist->source);
793 if (wrd->term_buf && wrd->term_len)
795 if (p->flagShowRecords)
798 printf("%*sIdx: [%s]", (level + 1) * 4, "",
800 printf("%s %s", tlist->index_name, tlist->source);
801 printf (" XData:\"");
802 for (i = 0; i<wrd->term_len && i < 40; i++)
803 fputc (wrd->term_buf[i], stdout);
805 if (wrd->term_len > 40)
807 fputc ('\n', stdout);
811 wrd->index_type = *tlist->structure;
812 wrd->index_name = tlist->index_name;
819 static int dumpkeys_r(struct source_parser *sp,
820 data1_node *n, struct recExtractCtrl *p, int level,
823 for (; n; n = n->next)
825 if (p->flagShowRecords) /* display element description to user */
827 if (n->which == DATA1N_root)
829 printf("%*s", level * 4, "");
830 printf("Record type: '%s'\n", n->u.root.type);
832 else if (n->which == DATA1N_tag)
836 printf("%*s", level * 4, "");
837 if (!(e = n->u.tag.element))
838 printf("Local tag: '%s'\n", n->u.tag.tag);
841 printf("Elm: '%s' ", e->name);
844 data1_tag *t = e->tag;
846 printf("TagNam: '%s' ", t->names->name);
849 printf("%s[%d],", t->tagset->name, t->tagset->type);
852 if (t->which == DATA1T_numeric)
853 printf("%d)", t->value.numeric);
855 printf("'%s')", t->value.string);
862 if (n->which == DATA1N_tag)
864 index_termlist(sp, n, n, p, level, wrd);
865 /* index start tag */
866 if (n->root->u.root.absyn)
867 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_BEGIN,
872 if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0)
876 if (n->which == DATA1N_data)
878 data1_node *par = get_parent_tag(p->dh, n);
880 if (p->flagShowRecords)
882 printf("%*s", level * 4, "");
884 if (n->u.data.len > 256)
885 printf("'%.170s ... %.70s'\n", n->u.data.data,
886 n->u.data.data + n->u.data.len-70);
887 else if (n->u.data.len > 0)
888 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
894 index_termlist(sp, par, n, p, level, wrd);
896 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_CDATA,
900 if (n->which == DATA1N_tag)
903 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_END,
907 if (p->flagShowRecords && n->which == DATA1N_root)
909 printf("%*s-------------\n\n", level * 4, "");
915 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
917 struct source_parser *sp = source_parser_create();
918 int r = dumpkeys_r(sp, n, p, 0, wrd);
919 source_parser_destroy(sp);
923 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
926 int oidtmp[OID_SIZE];
929 oe.proto = PROTO_Z3950;
930 oe.oclass = CLASS_SCHEMA;
933 oe.value = n->u.root.absyn->reference;
935 if ((oid_ent_to_oid (&oe, oidtmp)))
936 (*p->schemaAdd)(p, oidtmp);
940 /* data1_pr_tree(p->dh, n, stdout); */
942 return dumpkeys(n, p, &wrd);
945 static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
947 data1_node *(*grs_read)(struct grs_read_info *))
950 struct grs_read_info gri;
952 int oidtmp[OID_SIZE];
955 gri.readf = p->readf;
956 gri.seekf = p->seekf;
957 gri.tellf = p->tellf;
960 gri.offset = p->offset;
963 gri.clientData = clientData;
965 n = (*grs_read)(&gri);
967 return RECCTRL_EXTRACT_EOF;
968 oe.proto = PROTO_Z3950;
969 oe.oclass = CLASS_SCHEMA;
971 if (!n->u.root.absyn)
972 return RECCTRL_EXTRACT_ERROR;
976 oe.value = n->u.root.absyn->reference;
977 if ((oid_ent_to_oid (&oe, oidtmp)))
978 (*p->schemaAdd)(p, oidtmp);
980 data1_concat_text(p->dh, mem, n);
982 /* ensure our data1 tree is UTF-8 */
983 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
986 data1_remove_idzebra_subtree (p->dh, n);
989 data1_pr_tree (p->dh, n, stdout);
993 if (dumpkeys(n, p, &wrd) < 0)
995 return RECCTRL_EXTRACT_ERROR_GENERIC;
997 return RECCTRL_EXTRACT_OK;
1000 int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
1001 data1_node *(*grs_read)(struct grs_read_info *))
1004 NMEM mem = nmem_create ();
1005 ret = grs_extract_sub(clientData, p, mem, grs_read);
1011 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1013 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c,
1014 char **addinfo, ODR o)
1016 data1_esetname *eset;
1017 Z_Espec1 *espec = 0;
1022 case Z_RecordComp_simple:
1023 if (c->u.simple->which != Z_ElementSetNames_generic)
1024 return 26; /* only generic form supported. Fix this later */
1025 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1026 c->u.simple->u.generic)))
1028 yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1029 *addinfo = odr_strdup(o, c->u.simple->u.generic);
1030 return 25; /* invalid esetname */
1032 yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec",
1033 c->u.simple->u.generic);
1036 case Z_RecordComp_complex:
1037 if (c->u.complex->generic)
1039 /* insert check for schema */
1040 if ((p = c->u.complex->generic->elementSpec))
1044 case Z_ElementSpec_elementSetName:
1046 data1_getesetbyname(dh, n->u.root.absyn,
1047 p->u.elementSetName)))
1049 yaz_log(YLOG_DEBUG, "Unknown esetname '%s'",
1050 p->u.elementSetName);
1051 *addinfo = odr_strdup(o, p->u.elementSetName);
1052 return 25; /* invalid esetname */
1054 yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec",
1055 p->u.elementSetName);
1058 case Z_ElementSpec_externalSpec:
1059 if (p->u.externalSpec->which == Z_External_espec1)
1061 yaz_log(YLOG_DEBUG, "Got Espec-1");
1062 espec = p->u.externalSpec-> u.espec1;
1066 yaz_log(YLOG_LOG, "Unknown external espec.");
1067 return 25; /* bad. what is proper diagnostic? */
1074 return 26; /* fix */
1078 yaz_log(YLOG_DEBUG, "Element: Espec-1 match");
1079 return data1_doespec1(dh, n, espec);
1083 yaz_log(YLOG_DEBUG, "Element: all match");
1088 /* Add Zebra info in separate namespace ...
1091 <metadata xmlns="http://www.indexdata.dk/zebra/">
1093 <localnumber>447</localnumber>
1094 <filename>records/genera.xml</filename>
1099 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1102 const char *idzebra_ns[3];
1103 const char *i2 = "\n ";
1104 const char *i4 = "\n ";
1107 idzebra_ns[0] = "xmlns";
1108 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1111 data1_mk_text (p->dh, mem, i2, top);
1113 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1115 data1_mk_text (p->dh, mem, "\n", top);
1117 data1_mk_text (p->dh, mem, i4, n);
1119 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1123 data1_mk_text (p->dh, mem, i4, n);
1124 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1126 data1_mk_text (p->dh, mem, i4, n);
1127 data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem);
1130 data1_mk_text (p->dh, mem, i4, n);
1131 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1133 data1_mk_text (p->dh, mem, i2, n);
1136 int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
1137 data1_node *(*grs_read)(struct grs_read_info *))
1139 data1_node *node = 0, *onode = 0, *top;
1142 int res, selected = 0;
1144 struct grs_read_info gri;
1145 const char *tagname;
1147 int requested_schema = VAL_NONE;
1148 data1_marctab *marctab;
1151 mem = nmem_create();
1152 gri.readf = p->readf;
1153 gri.seekf = p->seekf;
1154 gri.tellf = p->tellf;
1160 gri.clientData = clientData;
1162 yaz_log(YLOG_DEBUG, "grs_retrieve");
1163 node = (*grs_read)(&gri);
1170 data1_concat_text(p->dh, mem, node);
1172 data1_remove_idzebra_subtree (p->dh, node);
1175 data1_pr_tree (p->dh, node, stdout);
1177 top = data1_get_root_tag (p->dh, node);
1179 yaz_log(YLOG_DEBUG, "grs_retrieve: size");
1180 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1182 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1184 dnew->u.data.what = DATA1I_text;
1185 dnew->u.data.data = dnew->lbuf;
1186 sprintf(dnew->u.data.data, "%d", p->recordSize);
1187 dnew->u.data.len = strlen(dnew->u.data.data);
1190 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1191 if (tagname && p->score >= 0 &&
1192 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1194 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1195 dnew->u.data.what = DATA1I_num;
1196 dnew->u.data.data = dnew->lbuf;
1197 sprintf(dnew->u.data.data, "%d", p->score);
1198 dnew->u.data.len = strlen(dnew->u.data.data);
1201 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1202 "localControlNumber");
1203 if (tagname && p->localno > 0 &&
1204 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1206 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1207 dnew->u.data.what = DATA1I_text;
1208 dnew->u.data.data = dnew->lbuf;
1210 sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
1211 dnew->u.data.len = strlen(dnew->u.data.data);
1214 if (p->input_format == VAL_TEXT_XML)
1215 zebra_xml_metadata (p, top, mem);
1218 data1_pr_tree (p->dh, node, stdout);
1220 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1221 p->comp->u.complex->generic &&
1222 p->comp->u.complex->generic->which == Z_Schema_oid &&
1223 p->comp->u.complex->generic->schema.oid)
1225 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1227 requested_schema = oe->value;
1229 /* If schema has been specified, map if possible, then check that
1230 * we got the right one
1232 if (requested_schema != VAL_NONE)
1234 yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping");
1235 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1237 if (map->target_absyn_ref == requested_schema)
1240 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1249 if (node->u.root.absyn &&
1250 requested_schema != node->u.root.absyn->reference)
1252 p->diagnostic = 238;
1258 * Does the requested format match a known syntax-mapping? (this reflects
1259 * the overlap of schema and formatting which is inherent in the MARC
1262 yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping");
1263 if (node->u.root.absyn)
1264 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1266 if (map->target_absyn_ref == p->input_format)
1269 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1278 yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier");
1279 if (node->u.root.absyn &&
1280 node->u.root.absyn->reference != VAL_NONE &&
1281 p->input_format == VAL_GRS1)
1285 int oidtmp[OID_SIZE];
1287 oe.proto = PROTO_Z3950;
1288 oe.oclass = CLASS_SCHEMA;
1289 oe.value = node->u.root.absyn->reference;
1291 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1294 data1_handle dh = p->dh;
1298 for (ii = oid; *ii >= 0; ii++)
1302 sprintf(p, "%d", *ii);
1305 if ((dnew = data1_mk_tag_data_wd(dh, top,
1306 "schemaIdentifier", mem)))
1308 dnew->u.data.what = DATA1I_oid;
1309 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1310 memcpy(dnew->u.data.data, tmp, p - tmp);
1311 dnew->u.data.len = p - tmp;
1316 yaz_log(YLOG_DEBUG, "grs_retrieve: element spec");
1317 if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo,
1320 p->diagnostic = res;
1324 else if (p->comp && !res)
1328 data1_pr_tree (p->dh, node, stdout);
1330 yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1331 switch (p->output_format = (p->input_format != VAL_NONE ?
1332 p->input_format : VAL_SUTRS))
1336 data1_pr_tree (p->dh, node, stdout);
1338 /* default output encoding for XML is UTF-8 */
1339 data1_iconv (p->dh, mem, node,
1340 p->encoding ? p->encoding : "UTF-8",
1341 data1_get_encoding(p->dh, node));
1343 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1345 p->diagnostic = 238;
1348 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1349 memcpy (new_buf, p->rec_buf, p->rec_len);
1350 p->rec_buf = new_buf;
1354 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1356 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1358 p->diagnostic = 238; /* not available in requested syntax */
1363 /* ensure our data1 tree is UTF-8 */
1364 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1366 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1368 p->diagnostic = 238;
1373 /* ensure our data1 tree is UTF-8 */
1374 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1375 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1377 p->diagnostic = 238;
1383 data1_iconv (p->dh, mem, node, p->encoding,
1384 data1_get_encoding(p->dh, node));
1385 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1387 p->diagnostic = 238;
1390 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1391 memcpy (new_buf, p->rec_buf, p->rec_len);
1392 p->rec_buf = new_buf;
1397 data1_iconv (p->dh, mem, node, p->encoding,
1398 data1_get_encoding(p->dh, node));
1399 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1401 p->diagnostic = 238;
1404 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1405 memcpy (new_buf, p->rec_buf, p->rec_len);
1406 p->rec_buf = new_buf;
1410 if (!node->u.root.absyn)
1412 p->diagnostic = 238;
1415 for (marctab = node->u.root.absyn->marc; marctab;
1416 marctab = marctab->next)
1417 if (marctab->reference == p->input_format)
1421 p->diagnostic = 238;
1425 data1_iconv (p->dh, mem, node, p->encoding,
1426 data1_get_encoding(p->dh, node));
1427 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1428 selected, &p->rec_len)))
1429 p->diagnostic = 238;
1432 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1433 memcpy (new_buf, p->rec_buf, p->rec_len);
1434 p->rec_buf = new_buf;
1444 * indent-tabs-mode: nil
1446 * vim: shiftwidth=4 tabstop=8 expandtab