1 /* $Id: recgrs.c,v 1.8 2006-10-26 23:45:46 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include <sys/types.h>
32 #include <idzebra/recgrs.h>
34 #define GRS_MAX_WORD 512
36 struct source_parser {
44 static int sp_lex(struct source_parser *sp)
46 while (*sp->src == ' ')
50 while (*sp->src && !strchr("<>();,-: ", *sp->src))
59 sp->lookahead = *sp->src;
66 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd);
68 static int sp_range(struct source_parser *sp, data1_node *n, RecWord *wrd)
75 if (sp->lookahead != '(')
77 sp_lex(sp); /* skip ( */
80 if (!sp_expr(sp, n, wrd))
83 if (sp->lookahead != ',')
85 sp_lex(sp); /* skip , */
88 if (!sp_expr(sp, n, &tmp_w))
90 start = atoi_n(tmp_w.term_buf, tmp_w.term_len);
92 if (sp->lookahead == ',')
94 sp_lex(sp); /* skip , */
97 if (!sp_expr(sp, n, &tmp_w))
99 len = atoi_n(tmp_w.term_buf, tmp_w.term_len);
105 if (sp->lookahead != ')')
109 if (wrd->term_buf && wrd->term_len)
111 wrd->term_buf += start;
112 wrd->term_len -= start;
113 if (wrd->term_len > len)
119 static int sp_first(struct source_parser *sp, data1_node *n, RecWord *wrd)
124 if (sp->lookahead != '(')
126 sp_lex(sp); /* skip ( */
127 if (!sp_expr(sp, n, wrd))
129 while (sp->lookahead == ',')
133 sp_lex(sp); /* skip , */
135 if (!sp_expr(sp, n, &search_w))
137 for (i = 0; i<wrd->term_len; i++)
140 for (j = 0; j<search_w.term_len && i+j < wrd->term_len; j++)
141 if (wrd->term_buf[i+j] != search_w.term_buf[j])
143 if (j == search_w.term_len) /* match ? */
145 if (min_pos == -1 || i < min_pos)
151 if (sp->lookahead != ')')
155 min_pos = 0; /* the default if not found */
156 sprintf(num_str, "%d", min_pos);
157 wrd->term_buf = nmem_strdup(sp->nmem, num_str);
158 wrd->term_len = strlen(wrd->term_buf);
162 static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
164 if (sp->lookahead != 't')
166 if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
168 if (n->which == DATA1N_data)
170 wrd->term_buf = n->u.data.data;
171 wrd->term_len = n->u.data.len;
175 else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
177 if (n->which == DATA1N_tag)
179 wrd->term_buf = n->u.tag.tag;
180 wrd->term_len = strlen(n->u.tag.tag);
184 else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
188 if (sp->lookahead != '(')
192 if (!sp_expr(sp, n, &tmp_w))
197 if (n->which == DATA1N_tag)
199 data1_xattr *p = n->u.tag.attributes;
200 while (p && strlen(p->name) != tmp_w.term_len &&
201 memcmp (p->name, tmp_w.term_buf, tmp_w.term_len))
205 wrd->term_buf = p->value;
206 wrd->term_len = strlen(p->value);
209 if (sp->lookahead != ')')
213 else if (sp->len == 5 && !memcmp(sp->tok, "first", sp->len))
215 return sp_first(sp, n, wrd);
217 else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
219 return sp_range(sp, n, wrd);
221 else if (sp->len > 0 && isdigit(*(unsigned char *)sp->tok))
224 wrd->term_len = sp->len;
225 b = nmem_malloc(sp->nmem, sp->len);
226 memcpy(b, sp->tok, sp->len);
230 else if (sp->len > 2 && sp->tok[0] == '\'' && sp->tok[sp->len-1] == '\'')
233 wrd->term_len = sp->len - 2;
234 b = nmem_malloc(sp->nmem, wrd->term_len);
235 memcpy(b, sp->tok+1, wrd->term_len);
248 static struct source_parser *source_parser_create()
250 struct source_parser *sp = xmalloc(sizeof(*sp));
252 sp->nmem = nmem_create();
256 static void source_parser_destroy(struct source_parser *sp)
260 nmem_destroy(sp->nmem);
264 static int sp_parse(struct source_parser *sp,
265 data1_node *n, RecWord *wrd, const char *src)
271 nmem_reset(sp->nmem);
274 return sp_expr(sp, n, wrd);
277 int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
286 if (p->which == XPATH_PREDICATE_RELATION) {
287 if (p->u.relation.name[0]) {
288 if (*p->u.relation.name != '@') {
290 " Only attributes (@) are supported in xelm xpath predicates");
291 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
294 attname = p->u.relation.name + 1;
296 /* looking for the attribute with a specified name */
297 for (attr = n->u.tag.attributes; attr; attr = attr->next) {
298 yaz_log(YLOG_DEBUG," - attribute %s <-> %s", attname, attr->name );
300 if (!strcmp(attr->name, attname)) {
301 if (p->u.relation.op[0]) {
302 if (*p->u.relation.op != '=') {
304 "Only '=' relation is supported (%s)",p->u.relation.op);
305 yaz_log(YLOG_WARN, "predicate %s ignored", p->u.relation.name);
308 yaz_log(YLOG_DEBUG," - value %s <-> %s",
309 p->u.relation.value, attr->value );
310 if (!strcmp(attr->value, p->u.relation.value)) {
315 /* attribute exists, no value specified */
320 yaz_log(YLOG_DEBUG, "return %d", res);
326 else if (p->which == XPATH_PREDICATE_BOOLEAN) {
327 if (!strcmp(p->u.boolean.op,"and")) {
328 return d1_check_xpath_predicate(n, p->u.boolean.left)
329 && d1_check_xpath_predicate(n, p->u.boolean.right);
331 else if (!strcmp(p->u.boolean.op,"or")) {
332 return (d1_check_xpath_predicate(n, p->u.boolean.left)
333 || d1_check_xpath_predicate(n, p->u.boolean.right));
335 yaz_log(YLOG_WARN, "Unknown boolean relation %s, ignored",p->u.boolean.op);
344 static int dfa_match_first(struct DFA_state **dfaar, const char *text)
346 struct DFA_state *s = dfaar[0]; /* start state */
349 const char *p = text;
352 for (c = *p++, t = s->trans, i = s->tran_no; --i >= 0; t++)
354 if (c >= t->ch[0] && c <= t->ch[1])
358 /* move to next state and return if we get a match */
366 for (t = s->trans, i = s->tran_no; --i >= 0; t++)
367 if (c >= t->ch[0] && c <= t->ch[1])
377 New function, looking for xpath "element" definitions in abs, by
378 tagpath, using a kind of ugly regxp search.The DFA was built while
379 parsing abs, so here we just go trough them and try to match
380 against the given tagpath. The first matching entry is returned.
384 Added support for enhanced xelm. Now [] predicates are considered
385 as well, when selecting indexing rules... (why the hell it's called
392 data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
394 data1_absyn *abs = n->root->u.root.absyn;
396 data1_xpelement *xpe = 0;
399 struct xpath_location_step *xp;
401 char *pexpr = xmalloc(strlen(tagpath)+5);
403 sprintf (pexpr, "/%s\n", tagpath);
405 for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
406 xpe->match_state = -1; /* don't know if it matches yet */
408 for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
411 int ok = xpe->match_state;
413 { /* don't know whether there is a match yet */
414 data1_xpelement *xpe1;
417 ok = dfa_match_first(xpe->dfa->states, pexpr);
420 /* mark this and following ones with same regexp */
421 for (xpe1 = xpe; xpe1; xpe1 = xpe1->match_next)
422 xpe1->match_state = ok;
425 assert (ok == 0 || ok == 1);
428 /* we have to check the perdicates up to the root node */
431 /* find the first tag up in the node structure */
432 for (nn = n; nn && nn->which != DATA1N_tag; nn = nn->parent)
435 /* go from inside out in the node structure, while going
436 backwards trough xpath location steps ... */
437 for (i = xpe->xpath_len - 1; i>0; i--)
439 yaz_log(YLOG_DEBUG, "Checking step %d: %s on tag %s",
440 i, xp[i].part, nn->u.tag.tag);
442 if (!d1_check_xpath_predicate(nn, xp[i].predicate))
444 yaz_log(YLOG_DEBUG, " Predicates didn't match");
449 if (nn->which == DATA1N_tag)
461 yaz_log(YLOG_DEBUG, "Got it");
462 return xpe->termlists;
469 1 start element (tag)
471 3 start attr (and attr-exact)
479 Now, if there is a matching xelm described in abs, for the
480 indexed element or the attribute, then the data is handled according
481 to those definitions...
483 modified by pop, 2002-12-13
486 /* add xpath index for an attribute */
487 static void index_xpath_attr (char *tag_path, char *name, char *value,
488 char *structure, struct recExtractCtrl *p,
491 wrd->index_name = ZEBRA_XPATH_ELM_BEGIN;
492 wrd->index_type = '0';
493 wrd->term_buf = tag_path;
494 wrd->term_len = strlen(tag_path);
498 wrd->index_name = ZEBRA_XPATH_ATTR_CDATA;
499 wrd->index_type = 'w';
500 wrd->term_buf = value;
501 wrd->term_len = strlen(value);
504 wrd->index_name = ZEBRA_XPATH_ELM_END;
505 wrd->index_type = '0';
506 wrd->term_buf = tag_path;
507 wrd->term_len = strlen(tag_path);
512 static void mk_tag_path_full(char *tag_path_full, size_t max, data1_node *n)
517 /* we have to fetch the whole path to the data tag */
518 for (nn = n; nn; nn = nn->parent)
520 if (nn->which == DATA1N_tag)
522 size_t tlen = strlen(nn->u.tag.tag);
523 if (tlen + flen > (max - 2))
525 memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
527 tag_path_full[flen++] = '/';
530 if (nn->which == DATA1N_root)
533 tag_path_full[flen] = 0;
537 static void index_xpath(struct source_parser *sp, data1_node *n,
538 struct recExtractCtrl *p,
539 int level, RecWord *wrd,
545 char tag_path_full[1024];
546 int termlist_only = 1;
549 if ((!n->root->u.root.absyn) ||
550 (n->root->u.root.absyn->xpath_indexing == DATA1_XPATH_INDEXING_ENABLE)) {
557 wrd->term_buf = n->u.data.data;
558 wrd->term_len = n->u.data.len;
561 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
563 /* If we have a matching termlist... */
564 if (n->root->u.root.absyn &&
565 (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
568 for (; tl; tl = tl->next)
570 /* need to copy recword because it may be changed */
572 wrd->index_type = *tl->structure;
573 memcpy (&wrd_tl, wrd, sizeof(*wrd));
575 sp_parse(sp, n, &wrd_tl, tl->source);
578 /* this is the ! case, so structure is for the xpath index */
579 wrd_tl.index_name = xpath_index;
580 if (p->flagShowRecords)
583 printf("%*sXPath index", (level + 1) * 4, "");
584 printf (" XData:\"");
585 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
586 fputc (wrd_tl.term_buf[i], stdout);
588 if (wrd_tl.term_len > 40)
590 fputc ('\n', stdout);
593 (*p->tokenAdd)(&wrd_tl);
596 /* this is just the old fashioned attribute based index */
597 wrd_tl.index_name = tl->index_name;
598 if (p->flagShowRecords)
601 printf("%*sIdx: [%s]", (level + 1) * 4, "",
603 printf("%s %s", tl->index_name, tl->source);
604 printf (" XData:\"");
605 for (i = 0; i<wrd_tl.term_len && i < 40; i++)
606 fputc (wrd_tl.term_buf[i], stdout);
608 if (wrd_tl.term_len > 40)
610 fputc ('\n', stdout);
613 (*p->tokenAdd)(&wrd_tl);
615 if (wrd_tl.seqno > max_seqno)
616 max_seqno = wrd_tl.seqno;
619 wrd->seqno = max_seqno;
622 /* xpath indexing is done, if there was no termlist given,
623 or no ! in the termlist, and default indexing is enabled... */
624 if (!p->flagShowRecords && !xpdone && !termlist_only)
626 wrd->index_name = xpath_index;
627 wrd->index_type = 'w';
632 mk_tag_path_full(tag_path_full, sizeof(tag_path_full), n);
634 wrd->index_type = '0';
635 wrd->term_buf = tag_path_full;
636 wrd->term_len = strlen(tag_path_full);
637 wrd->index_name = xpath_index;
638 if (p->flagShowRecords)
640 printf("%*s tag=", (level + 1) * 4, "");
641 for (i = 0; i<wrd->term_len && i < 40; i++)
642 fputc (wrd->term_buf[i], stdout);
653 /* Add tag start/end xpath index, only when there is a ! in
654 the apropriate xelm directive, or default xpath indexing
657 if (!(do_xpindex = 1 - termlist_only))
659 if ((tl = xpath_termlist_by_tagpath(tag_path_full, n)))
661 for (; tl; tl = tl->next)
669 (*p->tokenAdd)(wrd); /* index element pag (AKA tag path) */
672 if (xpath_is_start == 1) /* only for the starting tag... */
674 #define MAX_ATTR_COUNT 50
675 data1_termlist *tll[MAX_ATTR_COUNT];
679 /* get termlists for attributes, and find out, if we have to do xpath indexing */
680 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
685 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
687 int do_xpindex = 1 - termlist_only;
689 char attr_tag_path_full[1024];
691 /* this could be cached as well */
692 sprintf (attr_tag_path_full, "@%s/%s",
693 xp->name, tag_path_full);
695 tll[i] = xpath_termlist_by_tagpath(attr_tag_path_full,n);
697 /* if there is a ! in the xelm termlist, or default indexing is on,
698 proceed with xpath idx */
701 for (; tl; tl = tl->next)
710 /* attribute (no value) */
711 wrd->index_type = '0';
712 wrd->index_name = ZEBRA_XPATH_ATTR_NAME;
713 wrd->term_buf = xp->name;
714 wrd->term_len = strlen(xp->name);
720 strlen(xp->name) + strlen(xp->value) < sizeof(comb)-2) {
722 /* attribute value exact */
723 strcpy (comb, xp->name);
725 strcat (comb, xp->value);
727 wrd->index_name = ZEBRA_XPATH_ATTR_NAME;
728 wrd->index_type = '0';
729 wrd->term_buf = comb;
730 wrd->term_len = strlen(comb);
740 for (xp = n->u.tag.attributes; xp; xp = xp->next) {
742 char attr_tag_path_full[1024];
745 sprintf (attr_tag_path_full, "@%s/%s",
746 xp->name, tag_path_full);
750 /* If there is a termlist given (=xelm directive) */
751 for (; tl; tl = tl->next)
755 /* add xpath index for the attribute */
756 index_xpath_attr (attr_tag_path_full, xp->name,
757 xp->value, tl->structure,
761 /* index attribute value (only path/@attr) */
764 wrd->index_name = tl->index_name;
765 wrd->index_type = *tl->structure;
766 wrd->term_buf = xp->value;
767 wrd->term_len = strlen(xp->value);
773 /* if there was no termlist for the given path,
774 or the termlist didn't have a ! element, index
775 the attribute as "w" */
776 if ((!xpdone) && (!termlist_only))
778 index_xpath_attr (attr_tag_path_full, xp->name,
779 xp->value, "w", p, wrd);
788 static void index_termlist (struct source_parser *sp, data1_node *par,
790 struct recExtractCtrl *p, int level, RecWord *wrd)
792 data1_termlist *tlist = 0;
793 data1_datatype dtype = DATA1K_string;
796 * cycle up towards the root until we find a tag with an att..
797 * this has the effect of indexing locally defined tags with
798 * the attribute of their ancestor in the record.
801 while (!par->u.tag.element)
802 if (!par->parent || !(par=get_parent_tag(p->dh, par->parent)))
804 if (!par || !(tlist = par->u.tag.element->termlists))
806 if (par->u.tag.element->tag)
807 dtype = par->u.tag.element->tag->kind;
809 for (; tlist; tlist = tlist->next)
811 /* consider source */
813 assert(tlist->source);
814 sp_parse(sp, n, wrd, tlist->source);
816 if (wrd->term_buf && wrd->term_len)
818 if (p->flagShowRecords)
821 printf("%*sIdx: [%s]", (level + 1) * 4, "",
823 printf("%s %s", tlist->index_name, tlist->source);
824 printf (" XData:\"");
825 for (i = 0; i<wrd->term_len && i < 40; i++)
826 fputc (wrd->term_buf[i], stdout);
828 if (wrd->term_len > 40)
830 fputc ('\n', stdout);
834 wrd->index_type = *tlist->structure;
835 wrd->index_name = tlist->index_name;
842 static int dumpkeys_r(struct source_parser *sp,
843 data1_node *n, struct recExtractCtrl *p, int level,
846 for (; n; n = n->next)
848 if (p->flagShowRecords) /* display element description to user */
850 if (n->which == DATA1N_root)
852 printf("%*s", level * 4, "");
853 printf("Record type: '%s'\n", n->u.root.type);
855 else if (n->which == DATA1N_tag)
859 printf("%*s", level * 4, "");
860 if (!(e = n->u.tag.element))
861 printf("Local tag: '%s'\n", n->u.tag.tag);
864 printf("Elm: '%s' ", e->name);
867 data1_tag *t = e->tag;
869 printf("TagNam: '%s' ", t->names->name);
872 printf("%s[%d],", t->tagset->name, t->tagset->type);
875 if (t->which == DATA1T_numeric)
876 printf("%d)", t->value.numeric);
878 printf("'%s')", t->value.string);
885 if (n->which == DATA1N_tag)
887 index_termlist(sp, n, n, p, level, wrd);
888 /* index start tag */
889 if (n->root->u.root.absyn)
890 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_BEGIN,
895 if (dumpkeys_r(sp, n->child, p, level + 1, wrd) < 0)
899 if (n->which == DATA1N_data)
901 data1_node *par = get_parent_tag(p->dh, n);
903 if (p->flagShowRecords)
905 printf("%*s", level * 4, "");
907 if (n->u.data.len > 256)
908 printf("'%.170s ... %.70s'\n", n->u.data.data,
909 n->u.data.data + n->u.data.len-70);
910 else if (n->u.data.len > 0)
911 printf("'%.*s'\n", n->u.data.len, n->u.data.data);
917 index_termlist(sp, par, n, p, level, wrd);
919 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_CDATA,
923 if (n->which == DATA1N_tag)
926 index_xpath(sp, n, p, level, wrd, ZEBRA_XPATH_ELM_END,
930 if (p->flagShowRecords && n->which == DATA1N_root)
932 printf("%*s-------------\n\n", level * 4, "");
938 static int dumpkeys(data1_node *n, struct recExtractCtrl *p, RecWord *wrd)
940 struct source_parser *sp = source_parser_create();
941 int r = dumpkeys_r(sp, n, p, 0, wrd);
942 source_parser_destroy(sp);
946 int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
949 int oidtmp[OID_SIZE];
952 oe.proto = PROTO_Z3950;
953 oe.oclass = CLASS_SCHEMA;
956 oe.value = n->u.root.absyn->reference;
958 if ((oid_ent_to_oid (&oe, oidtmp)))
959 (*p->schemaAdd)(p, oidtmp);
963 /* data1_pr_tree(p->dh, n, stdout); */
965 return dumpkeys(n, p, &wrd);
968 static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
970 data1_node *(*grs_read)(struct grs_read_info *))
973 struct grs_read_info gri;
975 int oidtmp[OID_SIZE];
978 gri.stream = p->stream;
981 gri.clientData = clientData;
983 n = (*grs_read)(&gri);
985 return RECCTRL_EXTRACT_EOF;
986 oe.proto = PROTO_Z3950;
987 oe.oclass = CLASS_SCHEMA;
989 if (!n->u.root.absyn)
990 return RECCTRL_EXTRACT_ERROR;
994 oe.value = n->u.root.absyn->reference;
995 if ((oid_ent_to_oid (&oe, oidtmp)))
996 (*p->schemaAdd)(p, oidtmp);
998 data1_concat_text(p->dh, mem, n);
1000 /* ensure our data1 tree is UTF-8 */
1001 data1_iconv (p->dh, mem, n, "UTF-8", data1_get_encoding(p->dh, n));
1004 data1_remove_idzebra_subtree (p->dh, n);
1007 data1_pr_tree (p->dh, n, stdout);
1010 (*p->init)(p, &wrd);
1011 if (dumpkeys(n, p, &wrd) < 0)
1013 return RECCTRL_EXTRACT_ERROR_GENERIC;
1015 return RECCTRL_EXTRACT_OK;
1018 int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
1019 data1_node *(*grs_read)(struct grs_read_info *))
1022 NMEM mem = nmem_create ();
1023 ret = grs_extract_sub(clientData, p, mem, grs_read);
1029 * Return: -1: Nothing done. 0: Ok. >0: Bib-1 diagnostic.
1031 static int process_comp(data1_handle dh, data1_node *n, Z_RecordComposition *c,
1032 char **addinfo, ODR o)
1034 data1_esetname *eset;
1035 Z_Espec1 *espec = 0;
1040 case Z_RecordComp_simple:
1041 if (c->u.simple->which != Z_ElementSetNames_generic)
1042 return 26; /* only generic form supported. Fix this later */
1043 if (!(eset = data1_getesetbyname(dh, n->u.root.absyn,
1044 c->u.simple->u.generic)))
1046 yaz_log(YLOG_LOG, "Unknown esetname '%s'", c->u.simple->u.generic);
1047 *addinfo = odr_strdup(o, c->u.simple->u.generic);
1048 return 25; /* invalid esetname */
1050 yaz_log(YLOG_DEBUG, "Esetname '%s' in simple compspec",
1051 c->u.simple->u.generic);
1054 case Z_RecordComp_complex:
1055 if (c->u.complex->generic)
1057 /* insert check for schema */
1058 if ((p = c->u.complex->generic->elementSpec))
1062 case Z_ElementSpec_elementSetName:
1064 data1_getesetbyname(dh, n->u.root.absyn,
1065 p->u.elementSetName)))
1067 yaz_log(YLOG_DEBUG, "Unknown esetname '%s'",
1068 p->u.elementSetName);
1069 *addinfo = odr_strdup(o, p->u.elementSetName);
1070 return 25; /* invalid esetname */
1072 yaz_log(YLOG_DEBUG, "Esetname '%s' in complex compspec",
1073 p->u.elementSetName);
1076 case Z_ElementSpec_externalSpec:
1077 if (p->u.externalSpec->which == Z_External_espec1)
1079 yaz_log(YLOG_DEBUG, "Got Espec-1");
1080 espec = p->u.externalSpec-> u.espec1;
1084 yaz_log(YLOG_LOG, "Unknown external espec.");
1085 return 25; /* bad. what is proper diagnostic? */
1092 return 26; /* fix */
1096 yaz_log(YLOG_DEBUG, "Element: Espec-1 match");
1097 return data1_doespec1(dh, n, espec);
1101 yaz_log(YLOG_DEBUG, "Element: all match");
1106 /* Add Zebra info in separate namespace ...
1109 <metadata xmlns="http://www.indexdata.dk/zebra/">
1111 <localnumber>447</localnumber>
1112 <filename>records/genera.xml</filename>
1117 static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
1120 const char *idzebra_ns[3];
1121 const char *i2 = "\n ";
1122 const char *i4 = "\n ";
1125 idzebra_ns[0] = "xmlns";
1126 idzebra_ns[1] = "http://www.indexdata.dk/zebra/";
1129 data1_mk_text (p->dh, mem, i2, top);
1131 n = data1_mk_tag (p->dh, mem, "idzebra", idzebra_ns, top);
1133 data1_mk_text (p->dh, mem, "\n", top);
1135 data1_mk_text (p->dh, mem, i4, n);
1137 data1_mk_tag_data_int (p->dh, n, "size", p->recordSize, mem);
1141 data1_mk_text (p->dh, mem, i4, n);
1142 data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
1144 data1_mk_text (p->dh, mem, i4, n);
1145 data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem);
1148 data1_mk_text (p->dh, mem, i4, n);
1149 data1_mk_tag_data_text(p->dh, n, "filename", p->fname, mem);
1151 data1_mk_text (p->dh, mem, i2, n);
1154 int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
1155 data1_node *(*grs_read)(struct grs_read_info *))
1157 data1_node *node = 0, *onode = 0, *top;
1160 int res, selected = 0;
1162 struct grs_read_info gri;
1163 const char *tagname;
1165 int requested_schema = VAL_NONE;
1166 data1_marctab *marctab;
1169 mem = nmem_create();
1170 gri.stream = p->stream;
1173 gri.clientData = clientData;
1175 yaz_log(YLOG_DEBUG, "grs_retrieve");
1176 node = (*grs_read)(&gri);
1183 data1_concat_text(p->dh, mem, node);
1185 data1_remove_idzebra_subtree (p->dh, node);
1188 data1_pr_tree (p->dh, node, stdout);
1190 top = data1_get_root_tag (p->dh, node);
1192 yaz_log(YLOG_DEBUG, "grs_retrieve: size");
1193 tagname = data1_systag_lookup(node->u.root.absyn, "size", "size");
1195 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1197 dnew->u.data.what = DATA1I_text;
1198 dnew->u.data.data = dnew->lbuf;
1199 sprintf(dnew->u.data.data, "%d", p->recordSize);
1200 dnew->u.data.len = strlen(dnew->u.data.data);
1203 tagname = data1_systag_lookup(node->u.root.absyn, "rank", "rank");
1204 if (tagname && p->score >= 0 &&
1205 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1207 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1208 dnew->u.data.what = DATA1I_num;
1209 dnew->u.data.data = dnew->lbuf;
1210 sprintf(dnew->u.data.data, "%d", p->score);
1211 dnew->u.data.len = strlen(dnew->u.data.data);
1214 tagname = data1_systag_lookup(node->u.root.absyn, "sysno",
1215 "localControlNumber");
1216 if (tagname && p->localno > 0 &&
1217 (dnew = data1_mk_tag_data_wd(p->dh, top, tagname, mem)))
1219 yaz_log(YLOG_DEBUG, "grs_retrieve: %s", tagname);
1220 dnew->u.data.what = DATA1I_text;
1221 dnew->u.data.data = dnew->lbuf;
1223 sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
1224 dnew->u.data.len = strlen(dnew->u.data.data);
1227 if (p->input_format == VAL_TEXT_XML)
1228 zebra_xml_metadata (p, top, mem);
1231 data1_pr_tree (p->dh, node, stdout);
1233 if (p->comp && p->comp->which == Z_RecordComp_complex &&
1234 p->comp->u.complex->generic &&
1235 p->comp->u.complex->generic->which == Z_Schema_oid &&
1236 p->comp->u.complex->generic->schema.oid)
1238 oident *oe = oid_getentbyoid (p->comp->u.complex->generic->schema.oid);
1240 requested_schema = oe->value;
1242 /* If schema has been specified, map if possible, then check that
1243 * we got the right one
1245 if (requested_schema != VAL_NONE)
1247 yaz_log(YLOG_DEBUG, "grs_retrieve: schema mapping");
1248 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1250 if (map->target_absyn_ref == requested_schema)
1253 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1262 if (node->u.root.absyn &&
1263 requested_schema != node->u.root.absyn->reference)
1265 p->diagnostic = 238;
1271 * Does the requested format match a known syntax-mapping? (this reflects
1272 * the overlap of schema and formatting which is inherent in the MARC
1275 yaz_log(YLOG_DEBUG, "grs_retrieve: syntax mapping");
1276 if (node->u.root.absyn)
1277 for (map = node->u.root.absyn->maptabs; map; map = map->next)
1279 if (map->target_absyn_ref == p->input_format)
1282 if (!(node = data1_map_record(p->dh, onode, map, mem)))
1291 yaz_log(YLOG_DEBUG, "grs_retrieve: schemaIdentifier");
1292 if (node->u.root.absyn &&
1293 node->u.root.absyn->reference != VAL_NONE &&
1294 p->input_format == VAL_GRS1)
1298 int oidtmp[OID_SIZE];
1300 oe.proto = PROTO_Z3950;
1301 oe.oclass = CLASS_SCHEMA;
1302 oe.value = node->u.root.absyn->reference;
1304 if ((oid = oid_ent_to_oid (&oe, oidtmp)))
1307 data1_handle dh = p->dh;
1311 for (ii = oid; *ii >= 0; ii++)
1315 sprintf(p, "%d", *ii);
1318 if ((dnew = data1_mk_tag_data_wd(dh, top,
1319 "schemaIdentifier", mem)))
1321 dnew->u.data.what = DATA1I_oid;
1322 dnew->u.data.data = (char *) nmem_malloc(mem, p - tmp);
1323 memcpy(dnew->u.data.data, tmp, p - tmp);
1324 dnew->u.data.len = p - tmp;
1329 yaz_log(YLOG_DEBUG, "grs_retrieve: element spec");
1330 if (p->comp && (res = process_comp(p->dh, node, p->comp, &p->addinfo,
1333 p->diagnostic = res;
1337 else if (p->comp && !res)
1341 data1_pr_tree (p->dh, node, stdout);
1343 yaz_log(YLOG_DEBUG, "grs_retrieve: transfer syntax mapping");
1344 switch (p->output_format = (p->input_format != VAL_NONE ?
1345 p->input_format : VAL_SUTRS))
1349 data1_pr_tree (p->dh, node, stdout);
1351 /* default output encoding for XML is UTF-8 */
1352 data1_iconv (p->dh, mem, node,
1353 p->encoding ? p->encoding : "UTF-8",
1354 data1_get_encoding(p->dh, node));
1356 if (!(p->rec_buf = data1_nodetoidsgml(p->dh, node, selected,
1358 p->diagnostic = 238;
1361 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1362 memcpy (new_buf, p->rec_buf, p->rec_len);
1363 p->rec_buf = new_buf;
1367 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1369 if (!(p->rec_buf = data1_nodetogr(p->dh, node, selected,
1371 p->diagnostic = 238; /* not available in requested syntax */
1376 /* ensure our data1 tree is UTF-8 */
1377 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1379 if (!(p->rec_buf = data1_nodetoexplain(p->dh, node, selected,
1381 p->diagnostic = 238;
1386 /* ensure our data1 tree is UTF-8 */
1387 data1_iconv (p->dh, mem, node, "UTF-8", data1_get_encoding(p->dh, node));
1388 if (!(p->rec_buf = data1_nodetosummary(p->dh, node, selected,
1390 p->diagnostic = 238;
1396 data1_iconv (p->dh, mem, node, p->encoding,
1397 data1_get_encoding(p->dh, node));
1398 if (!(p->rec_buf = data1_nodetobuf(p->dh, node, selected,
1400 p->diagnostic = 238;
1403 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1404 memcpy (new_buf, p->rec_buf, p->rec_len);
1405 p->rec_buf = new_buf;
1410 data1_iconv (p->dh, mem, node, p->encoding,
1411 data1_get_encoding(p->dh, node));
1412 if (!(p->rec_buf = data1_nodetosoif(p->dh, node, selected,
1414 p->diagnostic = 238;
1417 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1418 memcpy (new_buf, p->rec_buf, p->rec_len);
1419 p->rec_buf = new_buf;
1423 if (!node->u.root.absyn)
1425 p->diagnostic = 238;
1428 for (marctab = node->u.root.absyn->marc; marctab;
1429 marctab = marctab->next)
1430 if (marctab->reference == p->input_format)
1434 p->diagnostic = 238;
1438 data1_iconv (p->dh, mem, node, p->encoding,
1439 data1_get_encoding(p->dh, node));
1440 if (!(p->rec_buf = data1_nodetomarc(p->dh, marctab, node,
1441 selected, &p->rec_len)))
1442 p->diagnostic = 238;
1445 char *new_buf = (char*) odr_malloc (p->odr, p->rec_len);
1446 memcpy (new_buf, p->rec_buf, p->rec_len);
1447 p->rec_buf = new_buf;
1457 * indent-tabs-mode: nil
1459 * vim: shiftwidth=4 tabstop=8 expandtab