1 /* $Id: recgrs.c,v 1.7 2006-09-29 10:02:47 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include <sys/types.h>
32 #include <idzebra/recgrs.h>
34 #define GRS_MAX_WORD 512
36 struct source_parser {
44 static int sp_lex(struct source_parser *sp)
46 while (*sp->src == ' ')
50 while (*sp->src && !strchr("<>();,-: ", *sp->src))
59 sp->lookahead = *sp->src;
66 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd);
68 static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
75 if (sp->lookahead != '(')
77 sp_lex(sp); /* skip ( */
80 if (!sp_expr(sp, n, wrd))
83 if (sp->lookahead != ',')
85 sp_lex(sp); /* skip , */
88 if (!sp_expr(sp, n, &tmp_w))
90 start = atoi_n(tmp_w.term_buf, tmp_w.term_len);
92 if (sp->lookahead == ',')
94 sp_lex(sp); /* skip , */
97 if (!sp_expr(sp, n, &tmp_w))
99 len = atoi_n(tmp_w.term_buf, tmp_w.term_len);
105 if (sp->lookahead != ')')
109 if (wrd->term_buf && wrd->term_len)
111 wrd->term_buf += start;
112 wrd->term_len -= start;
113 if (wrd->term_len > len)
119 static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
124 if (sp->lookahead != '(')
126 sp_lex(sp); /* skip ( */
127 if (!sp_expr(sp, n, wrd))
129 while (sp->lookahead == ',')
133 sp_lex(sp); /* skip , */
135 if (!sp_expr(sp, n, &search_w))
137 for (i = 0; i<wrd->term_len; i++)
140 for (j = 0; j<search_w.term_len && i+j < wrd->term_len; j++)
141 if (wrd->term_buf[i+j] != search_w.term_buf[j])
143 if (j == search_w.term_len) /* match ? */
145 if (min_pos == -1 || i < min_pos)
151 if (sp->lookahead != ')')
155 min_pos = 0; /* the default if not found */
156 sprintf(num_str, "%d", min_pos);
157 wrd->term_buf = nmem_strdup(sp->nmem, num_str);
158 wrd->term_len = strlen(wrd->term_buf);
162 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
164 if (sp->lookahead != 't')
166 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
168 if (n->which == DATA1N_data)
170 wrd->term_buf = n->u.data.data;
171 wrd->term_len = n->u.data.len;
175 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
177 if (n->which == DATA1N_tag)
179 wrd->term_buf = n->u.tag.tag;
180 wrd->term_len = strlen(n->u.tag.tag);
184 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
188 if (sp->lookahead != '(')
192 if (!sp_expr(sp, n, &tmp_w))
197 if (n->which == DATA1N_tag)
199 data1_xattr *p = n->u.tag.attributes;
200 while (p && strlen(p->name) != tmp_w.term_len &&
201 memcmp (p->name, tmp_w.term_buf, tmp_w.term_len))
205 wrd->term_buf = p->value;
206 wrd->term_len = strlen(p->value);
209 if (sp->lookahead != ')')
213 else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len))
215 return sp_first(sp, n, wrd);
217 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
219 return sp_range(sp, n, wrd);
221 else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok))
224 wrd->term_len = sp->len;
225 b = nmem_malloc(sp->nmem, sp->len);
226 memcpy(b, sp->tok, sp->len);
230 else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'')
233 wrd->term_len = sp->len - 2;
234 b = nmem_malloc(sp->nmem, wrd->term_len);
235 memcpy(b, sp->tok+1, wrd->term_len);
248 static struct source_parser *source_parser_create()
250 struct source_parser *sp = xmalloc(sizeof(*sp));
252 sp->nmem = nmem_create();
256 static void source_parser_destroy(struct source_parser *sp)
260 nmem_destroy(sp->nmem);
264 static int sp_parse(struct source_parser *sp,
265 data1_node *n, RecWord *wrd, const char *src)
271 nmem_reset(sp->nmem);
274 return sp_expr(sp, n, wrd);
277 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
286 if (p->which == XPATH_PREDICATE_RELATION) {
287 if (p->u.relation.name[0]) {
288 if (*p->u.relation.name != '@') {
290 " Only attributes (@) are supported in xelm xpath predicates");
291 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
294 attname = p->u.relation.name + 1;
296 /* looking for the attribute with a specified name */
297 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
298 yaz_log(YLOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
300 if (!strcmp(attr->name, attname)) {
301 if (p->u.relation.op[0]) {
302 if (*p->u.relation.op != '=') {
304 "Only '=' relation is supported (%s)",p->u.relation.op);
305 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
308 yaz_log(YLOG_DEBUG," - value %s <-> %s",
309 p->u.relation.value, attr->value );
310 if (!strcmp(attr->value, p->u.relation.value)) {
315 /* attribute exists, no value specified */
320 yaz_log(YLOG_DEBUG, "return %d", res);
326 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
327 if (!strcmp(p->u.boolean.op,"and")) {
328 return d1_check_xpath_predicate(n, p->u.boolean.left)
329 && d1_check_xpath_predicate(n, p->u.boolean.right);
331 else if (!strcmp(p->u.boolean.op,"or")) {
332 return (d1_check_xpath_predicate(n, p->u.boolean.left)
333 || d1_check_xpath_predicate(n, p->u.boolean.right));
335 yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
344 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
346 struct DFA_state *s = dfaar[0]; /* start state */
349 const char *p = text;
352 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
354 if (c >= t->ch[0] && c <= t->ch[1])
358 /* move to next state and return if we get a match */
366 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
367 if (c >= t->ch[0] && c <= t->ch[1])
377 New function, looking for xpath "element" definitions in abs, by
378 tagpath, using a kind of ugly regxp search.The DFA was built while
379 parsing abs, so here we just go trough them and try to match
380 against the given tagpath. The first matching entry is returned.
384 Added support for enhanced xelm. Now [] predicates are considered
385 as well, when selecting indexing rules... (why the hell it's called
392 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
394 data1_absyn *abs = n->root->u.root.absyn;
396 data1_xpelement *xpe = 0;
399 struct xpath_location_step *xp;
401 char *pexpr = xmalloc(strlen(tagpath)+5);
403 sprintf (pexpr, "/%s\n", tagpath);
405 for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
406 xpe->match_state = -1; /* don't know if it matches yet */
408 for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
411 int ok = xpe->match_state;
413 { /* don't know whether there is a match yet */
414 data1_xpelement *xpe1;
417 ok = dfa_match_first(xpe->dfa->states, pexpr);
420 /* mark this and following ones with same regexp */
421 for (xpe1 = xpe; xpe1; xpe1 = xpe1->match_next)
422 xpe1->match_state = ok;
425 assert (ok == 0 || ok == 1);
428 /* we have to check the perdicates up to the root node */
431 /* find the first tag up in the node structure */
432 for (nn = n; nn && nn->which != DATA1N_tag; nn = nn->parent)
435 /* go from inside out in the node structure, while going
436 backwards trough xpath location steps ... */
437 for (i = xpe->xpath_len - 1; i>0; i--)
439 yaz_log(YLOG_DEBUG, "Checking step %d: %s on tag %s",
440 i, xp[i].part, nn->u.tag.tag);
442 if (!d1_check_xpath_predicate(nn, xp[i].predicate))
444 yaz_log(YLOG_DEBUG, " Predicates didn't match");
449 if (nn->which == DATA1N_tag)
461 yaz_log(YLOG_DEBUG, "Got it");
462 return xpe->termlists;
469 1 start element (tag)
471 3 start attr (and attr-exact)
479 Now, if there is a matching xelm described in abs, for the
480 indexed element or the attribute, then the data is handled according
481 to those definitions...
483 modified by pop, 2002-12-13
486 /* add xpath index for an attribute */
487 static void index_xpath_attr (char *tag_path, char *name, char *value,
488 char *structure, struct recExtractCtrl *p,
491 wrd->index_name = ZEBRA_XPATH_ELM_BEGIN;
492 wrd->index_type = '0';
493 wrd->term_buf = tag_path;
494 wrd->term_len = strlen(tag_path);
498 wrd->index_name = ZEBRA_XPATH_ATTR_CDATA;
499 wrd->index_type = 'w';
500 wrd->term_buf = value;
501 wrd->term_len = strlen(value);
504 wrd->index_name = ZEBRA_XPATH_ELM_END;
505 wrd->index_type = '0';
506 wrd->term_buf = tag_path;
507 wrd->term_len = strlen(tag_path);
512 static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
517 /* we have to fetch the whole path to the data tag */
518 for (nn = n; nn; nn = nn->parent)
520 if (nn->which == DATA1N_tag)
522 size_t tlen = strlen(nn->u.tag.tag);
523 if (tlen + flen > (max - 2))
525 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
527 tag_path_full[flen++] = '/';
530 if (nn->which == DATA1N_root)
533 tag_path_full[flen] = 0;
537 static void index_xpath(struct source_parser *sp, data1_node *n,
538 struct recExtractCtrl *p,
539 int level, RecWord *wrd,
545 char tag_path_full[1024];
546 int termlist_only = 1;
549 if ((!n->root->u.root.absyn) ||
550 (n->root->u.root.absyn->xpath_indexing == DATA1_XPATH_INDEXING_ENABLE)) {
557 wrd->term_buf = n->u.data.data;
558 wrd->term_len = n->u.data.len;
561 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
563 /* If we have a matching termlist... */
564 if (n->root->u.root.absyn &&
565 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
567 for (; tl; tl = tl->next)
569 /* need to copy recword because it may be changed */
571 wrd->index_type = *tl->structure;
572 memcpy (&wrd_tl, wrd, sizeof(*wrd));
574 sp_parse(sp, n, &wrd_tl, tl->source);
577 /* this is the ! case, so structure is for the xpath index */
578 wrd_tl.index_name = xpath_index;
579 if (p->flagShowRecords)
582 printf("%*sXPath index", (level + 1) * 4, "");
583 printf (" XData:\"");
584 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
585 fputc (wrd_tl.term_buf[i], stdout);
587 if (wrd_tl.term_len > 40)
589 fputc ('\n', stdout);
592 (*p->tokenAdd)(&wrd_tl);
595 /* this is just the old fashioned attribute based index */
596 wrd_tl.index_name = tl->index_name;
597 if (p->flagShowRecords)
600 printf("%*sIdx: [%s]", (level + 1) * 4, "",
602 printf("%s %s", tl->index_name, tl->source);
603 printf (" XData:\"");
604 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
605 fputc (wrd_tl.term_buf[i], stdout);
607 if (wrd_tl.term_len > 40)
609 fputc ('\n', stdout);
612 (*p->tokenAdd)(&wrd_tl);
616 /* xpath indexing is done, if there was no termlist given,
617 or no ! in the termlist, and default indexing is enabled... */
618 if (!p->flagShowRecords && !xpdone && !termlist_only)
620 wrd->index_name = xpath_index;
621 wrd->index_type = 'w';
626 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
628 wrd->index_type = '0';
629 wrd->term_buf = tag_path_full;
630 wrd->term_len = strlen(tag_path_full);
631 wrd->index_name = xpath_index;
632 if (p->flagShowRecords)
634 printf("%*s tag=", (level + 1) * 4, "");
635 for (i = 0; i<wrd->term_len && i < 40; i++)
636 fputc (wrd->term_buf[i], stdout);
647 /* Add tag start/end xpath index, only when there is a ! in
648 the apropriate xelm directive, or default xpath indexing
651 if (!(do_xpindex = 1 - termlist_only))
653 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n)))
655 for (; tl; tl = tl->next)
663 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
666 if (xpath_is_start == 1) /* only for the starting tag... */
668 #define MAX_ATTR_COUNT 50
669 data1_termlist *tll[MAX_ATTR_COUNT];
673 /* get termlists for attributes, and find out, if we have to do xpath indexing */
674 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
679 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
681 int do_xpindex = 1 - termlist_only;
683 char attr_tag_path_full[1024];
685 /* this could be cached as well */
686 sprintf (attr_tag_path_full, "@%s/%s",
687 xp->name, tag_path_full);
689 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
691 /* if there is a ! in the xelm termlist, or default indexing is on,
692 proceed with xpath idx */
695 for (; tl; tl = tl->next)
704 /* attribute (no value) */
705 wrd->index_type = '0';
706 wrd->index_name = ZEBRA_XPATH_ATTR_NAME;
707 wrd->term_buf = xp->name;
708 wrd->term_len = strlen(xp->name);
714 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
716 /* attribute value exact */
717 strcpy (comb, xp->name);
719 strcat (comb, xp->value);
721 wrd->index_name = ZEBRA_XPATH_ATTR_NAME;
722 wrd->index_type = '0';
723 wrd->term_buf = comb;
724 wrd->term_len = strlen(comb);
734 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
736 char attr_tag_path_full[1024];
739 sprintf (attr_tag_path_full, "@%s/%s",
740 xp->name, tag_path_full);
744 /* If there is a termlist given (=xelm directive) */
745 for (; tl; tl = tl->next)
749 /* add xpath index for the attribute */
750 index_xpath_attr (attr_tag_path_full, xp->name,
751 xp->value, tl->structure,
755 /* index attribute value (only path/@attr) */
758 wrd->index_name = tl->index_name;
759 wrd->index_type = *tl->structure;
760 wrd->term_buf = xp->value;
761 wrd->term_len = strlen(xp->value);
767 /* if there was no termlist for the given path,
768 or the termlist didn't have a ! element, index
769 the attribute as "w" */
770 if ((!xpdone) && (!termlist_only))
772 index_xpath_attr (attr_tag_path_full, xp->name,
773 xp->value, "w", p, wrd);
782 static void index_termlist (struct source_parser *sp, data1_node *par,
784 struct recExtractCtrl *p, int level, RecWord *wrd)
786 data1_termlist *tlist = 0;
787 data1_datatype dtype = DATA1K_string;
790 * cycle up towards the root until we find a tag with an att..
791 * this has the effect of indexing locally defined tags with
792 * the attribute of their ancestor in the record.
795 while (!par->u.tag.element)
796 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
798 if (!par || !(tlist = par->u.tag.element->termlists))
800 if (par->u.tag.element->tag)
801 dtype = par->u.tag.element->tag->kind;
803 for (; tlist; tlist = tlist->next)
805 /* consider source */
807 assert(tlist->source);
808 sp_parse(sp, n, wrd, tlist->source);
810 if (wrd->term_buf && wrd->term_len)
812 if (p->flagShowRecords)
815 printf("%*sIdx: [%s]", (level + 1) * 4, "",
817 printf("%s %s", tlist->index_name, tlist->source);
818 printf (" XData:\"");
819 for (i = 0; i<wrd->term_len && i < 40; i++)
820 fputc (wrd->term_buf[i], stdout);
822 if (wrd->term_len > 40)
824 fputc ('\n', stdout);
828 wrd->index_type = *tlist->structure;
829 wrd->index_name = tlist->index_name;
836 static int dumpkeys_r(struct source_parser *sp,
837 data1_node *n, struct recExtractCtrl *p, int level,
840 for (; n; n = n->next)
842 if (p->flagShowRecords) /* display element description to user */
844 if (n->which == DATA1N_root)
846 printf("%*s", level * 4, "");
847 printf("Record type: '%s'\n", n->u.root.type);
849 else if (n->which == DATA1N_tag)
853 printf("%*s", level * 4, "");
854 if (!(e = n->u.tag.element))
855 printf("Local tag: '%s'\n", n->u.tag.tag);
858 printf("Elm: '%s' ", e->name);
861 data1_tag *t = e->tag;
863 printf("TagNam: '%s' ", t->names->name);
866 printf("%s[%d],", t->tagset->name, t->tagset->type);
869 if (t->which == DATA1T_numeric)
870 printf("%d)", t->value.numeric);
872 printf("'%s')", t->value.string);
879 if (n->which == DATA1N_tag)
881 index_termlist(sp, n, n, p, level, wrd);
882 /* index start tag */
883 if (n->root->u.root.absyn)
884 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_BEGIN,
889 if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0)
893 if (n->which == DATA1N_data)
895 data1_node *par = get_parent_tag(p->dh, n);
897 if (p->flagShowRecords)
899 printf("%*s", level * 4, "");
901 if (n->u.data.len > 256)
902 printf("'%.170s ... %.70s'\n", n->u.data.data,
903 n->u.data.data + n->u.data.len-70);
904 else if (n->u.data.len > 0)
905 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
911 index_termlist(sp, par, n, p, level, wrd);
913 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_CDATA,
917 if (n->which == DATA1N_tag)
920 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_END,
924 if (p->flagShowRecords && n->which == DATA1N_root)
926 printf("%*s-------------\n\n", level * 4, "");
932 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
934 struct source_parser *sp = source_parser_create();
935 int r = dumpkeys_r(sp, n, p, 0, wrd);
936 source_parser_destroy(sp);
940 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
943 int oidtmp[OID_SIZE];
946 oe.proto = PROTO_Z3950;
947 oe.oclass = CLASS_SCHEMA;
950 oe.value = n->u.root.absyn->reference;
952 if ((oid_ent_to_oid (&oe, oidtmp)))
953 (*p->schemaAdd)(p, oidtmp);
957 /* data1_pr_tree(p->dh, n, stdout); */
959 return dumpkeys(n, p, &wrd);
962 static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
964 data1_node *(*grs_read)(struct grs_read_info *))
967 struct grs_read_info gri;
969 int oidtmp[OID_SIZE];
972 gri.stream = p->stream;
975 gri.clientData = clientData;
977 n = (*grs_read)(&gri);
979 return RECCTRL_EXTRACT_EOF;
980 oe.proto = PROTO_Z3950;
981 oe.oclass = CLASS_SCHEMA;
983 if (!n->u.root.absyn)
984 return RECCTRL_EXTRACT_ERROR;
988 oe.value = n->u.root.absyn->reference;
989 if ((oid_ent_to_oid (&oe, oidtmp)))
990 (*p->schemaAdd)(p, oidtmp);
992 data1_concat_text(p->dh, mem, n);
994 /* ensure our data1 tree is UTF-8 */
995 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
998 data1_remove_idzebra_subtree (p->dh, n);
1001 data1_pr_tree (p->dh, n, stdout);
1004 (*p->init)(p, &wrd);
1005 if (dumpkeys(n, p, &wrd) < 0)
1007 return RECCTRL_EXTRACT_ERROR_GENERIC;
1009 return RECCTRL_EXTRACT_OK;
1012 int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
1013 data1_node *(*grs_read)(struct grs_read_info *))
1016 NMEM mem = nmem_create ();
1017 ret = grs_extract_sub(clientData, p, mem, grs_read);
1023 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1025 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c,
1026 char **addinfo, ODR o)
1028 data1_esetname *eset;
1029 Z_Espec1 *espec = 0;
1034 case Z_RecordComp_simple:
1035 if (c->u.simple->which != Z_ElementSetNames_generic)
1036 return 26; /* only generic form supported. Fix this later */
1037 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1038 c->u.simple->u.generic)))
1040 yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1041 *addinfo = odr_strdup(o, c->u.simple->u.generic);
1042 return 25; /* invalid esetname */
1044 yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec",
1045 c->u.simple->u.generic);
1048 case Z_RecordComp_complex:
1049 if (c->u.complex->generic)
1051 /* insert check for schema */
1052 if ((p = c->u.complex->generic->elementSpec))
1056 case Z_ElementSpec_elementSetName:
1058 data1_getesetbyname(dh, n->u.root.absyn,
1059 p->u.elementSetName)))
1061 yaz_log(YLOG_DEBUG, "Unknown esetname '%s'",
1062 p->u.elementSetName);
1063 *addinfo = odr_strdup(o, p->u.elementSetName);
1064 return 25; /* invalid esetname */
1066 yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec",
1067 p->u.elementSetName);
1070 case Z_ElementSpec_externalSpec:
1071 if (p->u.externalSpec->which == Z_External_espec1)
1073 yaz_log(YLOG_DEBUG, "Got Espec-1");
1074 espec = p->u.externalSpec-> u.espec1;
1078 yaz_log(YLOG_LOG, "Unknown external espec.");
1079 return 25; /* bad. what is proper diagnostic? */
1086 return 26; /* fix */
1090 yaz_log(YLOG_DEBUG, "Element: Espec-1 match");
1091 return data1_doespec1(dh, n, espec);
1095 yaz_log(YLOG_DEBUG, "Element: all match");
1100 /* Add Zebra info in separate namespace ...
1103 <metadata xmlns="http://www.indexdata.dk/zebra/">
1105 <localnumber>447</localnumber>
1106 <filename>records/genera.xml</filename>
1111 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1114 const char *idzebra_ns[3];
1115 const char *i2 = "\n ";
1116 const char *i4 = "\n ";
1119 idzebra_ns[0] = "xmlns";
1120 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1123 data1_mk_text (p->dh, mem, i2, top);
1125 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1127 data1_mk_text (p->dh, mem, "\n", top);
1129 data1_mk_text (p->dh, mem, i4, n);
1131 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1135 data1_mk_text (p->dh, mem, i4, n);
1136 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1138 data1_mk_text (p->dh, mem, i4, n);
1139 data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem);
1142 data1_mk_text (p->dh, mem, i4, n);
1143 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1145 data1_mk_text (p->dh, mem, i2, n);
1148 int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
1149 data1_node *(*grs_read)(struct grs_read_info *))
1151 data1_node *node = 0, *onode = 0, *top;
1154 int res, selected = 0;
1156 struct grs_read_info gri;
1157 const char *tagname;
1159 int requested_schema = VAL_NONE;
1160 data1_marctab *marctab;
1163 mem = nmem_create();
1164 gri.stream = p->stream;
1167 gri.clientData = clientData;
1169 yaz_log(YLOG_DEBUG, "grs_retrieve");
1170 node = (*grs_read)(&gri);
1177 data1_concat_text(p->dh, mem, node);
1179 data1_remove_idzebra_subtree (p->dh, node);
1182 data1_pr_tree (p->dh, node, stdout);
1184 top = data1_get_root_tag (p->dh, node);
1186 yaz_log(YLOG_DEBUG, "grs_retrieve: size");
1187 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1189 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1191 dnew->u.data.what = DATA1I_text;
1192 dnew->u.data.data = dnew->lbuf;
1193 sprintf(dnew->u.data.data, "%d", p->recordSize);
1194 dnew->u.data.len = strlen(dnew->u.data.data);
1197 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1198 if (tagname && p->score >= 0 &&
1199 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1201 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1202 dnew->u.data.what = DATA1I_num;
1203 dnew->u.data.data = dnew->lbuf;
1204 sprintf(dnew->u.data.data, "%d", p->score);
1205 dnew->u.data.len = strlen(dnew->u.data.data);
1208 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1209 "localControlNumber");
1210 if (tagname && p->localno > 0 &&
1211 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1213 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1214 dnew->u.data.what = DATA1I_text;
1215 dnew->u.data.data = dnew->lbuf;
1217 sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
1218 dnew->u.data.len = strlen(dnew->u.data.data);
1221 if (p->input_format == VAL_TEXT_XML)
1222 zebra_xml_metadata (p, top, mem);
1225 data1_pr_tree (p->dh, node, stdout);
1227 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1228 p->comp->u.complex->generic &&
1229 p->comp->u.complex->generic->which == Z_Schema_oid &&
1230 p->comp->u.complex->generic->schema.oid)
1232 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1234 requested_schema = oe->value;
1236 /* If schema has been specified, map if possible, then check that
1237 * we got the right one
1239 if (requested_schema != VAL_NONE)
1241 yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping");
1242 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1244 if (map->target_absyn_ref == requested_schema)
1247 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1256 if (node->u.root.absyn &&
1257 requested_schema != node->u.root.absyn->reference)
1259 p->diagnostic = 238;
1265 * Does the requested format match a known syntax-mapping? (this reflects
1266 * the overlap of schema and formatting which is inherent in the MARC
1269 yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping");
1270 if (node->u.root.absyn)
1271 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1273 if (map->target_absyn_ref == p->input_format)
1276 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1285 yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier");
1286 if (node->u.root.absyn &&
1287 node->u.root.absyn->reference != VAL_NONE &&
1288 p->input_format == VAL_GRS1)
1292 int oidtmp[OID_SIZE];
1294 oe.proto = PROTO_Z3950;
1295 oe.oclass = CLASS_SCHEMA;
1296 oe.value = node->u.root.absyn->reference;
1298 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1301 data1_handle dh = p->dh;
1305 for (ii = oid; *ii >= 0; ii++)
1309 sprintf(p, "%d", *ii);
1312 if ((dnew = data1_mk_tag_data_wd(dh, top,
1313 "schemaIdentifier", mem)))
1315 dnew->u.data.what = DATA1I_oid;
1316 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1317 memcpy(dnew->u.data.data, tmp, p - tmp);
1318 dnew->u.data.len = p - tmp;
1323 yaz_log(YLOG_DEBUG, "grs_retrieve: element spec");
1324 if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo,
1327 p->diagnostic = res;
1331 else if (p->comp && !res)
1335 data1_pr_tree (p->dh, node, stdout);
1337 yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1338 switch (p->output_format = (p->input_format != VAL_NONE ?
1339 p->input_format : VAL_SUTRS))
1343 data1_pr_tree (p->dh, node, stdout);
1345 /* default output encoding for XML is UTF-8 */
1346 data1_iconv (p->dh, mem, node,
1347 p->encoding ? p->encoding : "UTF-8",
1348 data1_get_encoding(p->dh, node));
1350 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1352 p->diagnostic = 238;
1355 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1356 memcpy (new_buf, p->rec_buf, p->rec_len);
1357 p->rec_buf = new_buf;
1361 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1363 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1365 p->diagnostic = 238; /* not available in requested syntax */
1370 /* ensure our data1 tree is UTF-8 */
1371 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1373 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1375 p->diagnostic = 238;
1380 /* ensure our data1 tree is UTF-8 */
1381 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1382 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1384 p->diagnostic = 238;
1390 data1_iconv (p->dh, mem, node, p->encoding,
1391 data1_get_encoding(p->dh, node));
1392 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1394 p->diagnostic = 238;
1397 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1398 memcpy (new_buf, p->rec_buf, p->rec_len);
1399 p->rec_buf = new_buf;
1404 data1_iconv (p->dh, mem, node, p->encoding,
1405 data1_get_encoding(p->dh, node));
1406 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1408 p->diagnostic = 238;
1411 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1412 memcpy (new_buf, p->rec_buf, p->rec_len);
1413 p->rec_buf = new_buf;
1417 if (!node->u.root.absyn)
1419 p->diagnostic = 238;
1422 for (marctab = node->u.root.absyn->marc; marctab;
1423 marctab = marctab->next)
1424 if (marctab->reference == p->input_format)
1428 p->diagnostic = 238;
1432 data1_iconv (p->dh, mem, node, p->encoding,
1433 data1_get_encoding(p->dh, node));
1434 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1435 selected, &p->rec_len)))
1436 p->diagnostic = 238;
1439 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1440 memcpy (new_buf, p->rec_buf, p->rec_len);
1441 p->rec_buf = new_buf;
1451 * indent-tabs-mode: nil
1453 * vim: shiftwidth=4 tabstop=8 expandtab