1 /* $Id: marcread.c,v 1.27 2004-11-19 10:27:12 heikki Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
28 #include <yaz/yaz-util.h>
29 #include <yaz/marcdisp.h>
30 #include <idzebra/recgrs.h>
35 #define MARCOMP_DEBUG 0
41 static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml)
43 struct marc_info *mi = (struct marc_info*) p->clientData;
48 int identifier_length;
50 int length_data_entry;
52 int length_implementation;
57 data1_node *res_root, *res_top;
59 data1_marctab *marctab;
61 if ((*p->readf)(p->fh, buf, 5) != 5)
63 record_length = atoi_n (buf, 5);
64 if (record_length < 25)
66 yaz_log (YLOG_WARN, "MARC record length < 25, is %d", record_length);
69 /* read remaining part - attempt to read one byte furhter... */
70 read_bytes = (*p->readf)(p->fh, buf+5, record_length-4);
71 if (read_bytes < record_length-5)
73 yaz_log (YLOG_WARN, "Couldn't read whole MARC record");
76 if (read_bytes == record_length - 4)
78 off_t cur_offset = (*p->tellf)(p->fh);
82 (*p->endf)(p->fh, cur_offset - 1);
85 res_root = data1_mk_root (p->dh, p->mem, absynName);
88 yaz_log (YLOG_WARN, "cannot read MARC without an abstract syntax");
94 const char *attr[] = { "xmlns", "http://www.loc.gov/MARC21/slim", 0};
96 res_top = data1_mk_tag (p->dh, p->mem, "record", attr, res_root);
98 lead = data1_mk_tag(p->dh, p->mem, "leader", 0, res_top);
99 data1_mk_text_n(p->dh, p->mem, buf, 24, lead);
102 res_top = data1_mk_tag (p->dh, p->mem, absynName, 0, res_root);
104 if ((marctab = data1_absyn_getmarctab(p->dh, res_root->u.root.absyn)))
106 memcpy(marctab->leader, buf, 24);
107 memcpy(marctab->implementation_codes, buf+6, 4);
108 marctab->implementation_codes[4] = '\0';
109 memcpy(marctab->user_systems, buf+17, 3);
110 marctab->user_systems[3] = '\0';
113 if (marctab && marctab->force_indicator_length >= 0)
114 indicator_length = marctab->force_indicator_length;
116 indicator_length = atoi_n (buf+10, 1);
117 if (marctab && marctab->force_identifier_length >= 0)
118 identifier_length = marctab->force_identifier_length;
120 identifier_length = atoi_n (buf+11, 1);
121 base_address = atoi_n (buf+12, 5);
123 length_data_entry = atoi_n (buf+20, 1);
124 length_starting = atoi_n (buf+21, 1);
125 length_implementation = atoi_n (buf+22, 1);
127 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
128 entry_p += 3+length_data_entry+length_starting;
129 base_address = entry_p+1;
130 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
138 data1_node *parent = res_top;
140 memcpy (tag, buf+entry_p, 3);
147 res = data1_mk_tag_n (p->dh, p->mem, tag, 3, 0 /* attr */, parent);
150 fprintf (outf, "%s ", tag);
152 data_length = atoi_n (buf+entry_p, length_data_entry);
153 entry_p += length_data_entry;
154 data_offset = atoi_n (buf+entry_p, length_starting);
155 entry_p += length_starting;
156 i = data_offset + base_address;
157 end_offset = i+data_length-1;
159 if (memcmp (tag, "00", 2) && indicator_length)
161 /* generate indicator node */
164 const char *attr[10];
171 res = data1_mk_tag(p->dh, p->mem, "datafield", attr, res);
173 for (j = 0; j<indicator_length; j++)
175 char str1[18], str2[2];
176 sprintf (str1, "ind%d", j+1);
183 data1_tag_add_attr (p->dh, p->mem, res, attr);
191 res = data1_mk_tag_n (p->dh, p->mem,
192 buf+i, indicator_length, 0 /* attr */, res);
194 for (j = 0; j<indicator_length; j++)
195 fprintf (outf, "%c", buf[j+i]);
198 i += indicator_length;
204 const char *attr[10];
210 res = data1_mk_tag(p->dh, p->mem, "controlfield", attr, res);
214 /* traverse sub fields */
216 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
218 if (memcmp (tag, "00", 2) && identifier_length)
227 for (j = 1; j<identifier_length && j < 9; j++)
228 code[j-1] = buf[i+j];
233 res = data1_mk_tag(p->dh, p->mem, "subfield",
238 res = data1_mk_tag_n (p->dh, p->mem,
239 buf+i+1, identifier_length-1,
240 0 /* attr */, parent);
243 fprintf (outf, " $");
244 for (j = 1; j<identifier_length; j++)
245 fprintf (outf, "%c", buf[j+i]);
248 i += identifier_length;
250 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
251 buf[i] != ISO2709_FS && i < end_offset)
254 fprintf (outf, "%c", buf[i]);
258 data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, res);
264 fprintf (outf, "%c", buf[i]);
271 data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, parent);
274 fprintf (outf, "\n");
276 fprintf (outf, "-- separator but not at end of field\n");
277 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
278 fprintf (outf, "-- no separator at end of field\n");
285 * Locate some data under this node. This routine should handle variants
288 static char *get_data(data1_node *n, int *len)
294 if (n->which == DATA1N_data)
297 *len = n->u.data.len;
299 for (i = 0; i<*len; i++)
300 if (!d1_isspace(n->u.data.data[i]))
302 while (*len && d1_isspace(n->u.data.data[*len - 1]))
306 return n->u.data.data + i;
308 if (n->which == DATA1N_tag)
310 else if (n->which == DATA1N_data)
320 static data1_node *lookup_subfield(data1_node *node, const char *name)
324 for (p=node; p; p=p->next)
326 if (!yaz_matchstr(p->u.tag.tag, name))
332 static inline_subfield *lookup_inline_subfield(inline_subfield *pisf,
337 for (p=pisf; p; p=p->next)
339 if (!yaz_matchstr(p->name, name))
345 static inline_subfield *cat_inline_subfield(mc_subfield *psf, WRBUF buf,
346 inline_subfield *pisf)
350 for (p = psf; p && pisf; p = p->next)
352 if (p->which == MC_SF)
354 inline_subfield *found = lookup_inline_subfield(pisf, p->name);
358 if (strcmp(p->prefix, "_"))
360 wrbuf_puts(buf, " ");
361 wrbuf_puts(buf, p->prefix);
363 if (p->interval.start == -1)
365 wrbuf_puts(buf, found->data);
369 wrbuf_write(buf, found->data+p->interval.start,
370 p->interval.end-p->interval.start);
373 if (strcmp(p->suffix, "_"))
375 wrbuf_puts(buf, p->suffix);
376 wrbuf_puts(buf, " ");
379 yaz_log(YLOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name);
384 else if (p->which == MC_SFVARIANT)
386 inline_subfield *next;
389 next = cat_inline_subfield(p->u.child, buf, pisf);
395 else if (p->which == MC_SFGROUP)
400 for (pp = p->u.child, found = 0; pp; pp = pp->next)
402 if (!yaz_matchstr(pisf->name, p->name))
410 wrbuf_puts(buf, " (");
411 pisf = cat_inline_subfield(p->u.child, buf, pisf);
412 wrbuf_puts(buf, ") ");
419 static void cat_inline_field(mc_field *pf, WRBUF buf, data1_node *subfield)
421 if (!pf || !subfield)
427 inline_field *pif=NULL;
430 if (yaz_matchstr(subfield->u.tag.tag, "1"))
432 subfield = subfield->next;
437 pif = inline_mk_field();
441 if ((i=inline_parse(pif, psubf->u.tag.tag, get_data(psubf, &len)))<0)
443 yaz_log(YLOG_WARN, "inline subfield ($%s): parse error",
445 inline_destroy_field(pif);
449 } while (psubf && yaz_matchstr(psubf->u.tag.tag, "1"));
453 if (pif && !yaz_matchstr(pif->name, pf->name))
455 if (!pf->list && pif->list)
457 wrbuf_puts(buf, pif->list->data);
467 ind1 = (pif->ind1[0] == ' ') ? '_':pif->ind1[0];
468 ind2 = (pif->ind2[0] == ' ') ? '_':pif->ind2[0];
470 if (((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) &&
471 ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0])))
473 cat_inline_subfield(pf->list, buf, pif->list);
476 add separator for inline fields
480 wrbuf_puts(buf, "\n");
485 yaz_log(YLOG_WARN, "In-line field %s missed -- indicators do not match", pif->name);
489 inline_destroy_field(pif);
492 yaz_log(YLOG_LOG, "cat_inline_field(): got buffer {%s}", buf);
496 static data1_node *cat_subfield(mc_subfield *psf, WRBUF buf,
497 data1_node *subfield)
501 for (p = psf; p && subfield; p = p->next)
503 if (p->which == MC_SF)
505 data1_node *found = lookup_subfield(subfield, p->name);
511 if (strcmp(p->prefix, "_"))
513 wrbuf_puts(buf, " ");
514 wrbuf_puts(buf, p->prefix);
519 cat_inline_field(p->u.in_line, buf, found);
521 else if (p->interval.start == -1)
523 wrbuf_puts(buf, get_data(found, &len));
527 wrbuf_write(buf, get_data(found, &len)+p->interval.start,
528 p->interval.end-p->interval.start);
531 if (strcmp(p->suffix, "_"))
533 wrbuf_puts(buf, p->suffix);
534 wrbuf_puts(buf, " ");
537 yaz_log(YLOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag);
539 subfield = found->next;
542 else if (p->which == MC_SFVARIANT)
546 next = cat_subfield(p->u.child, buf, subfield);
547 if (next == subfield)
552 else if (p->which == MC_SFGROUP)
557 for (pp = p->u.child, found = 0; pp; pp = pp->next)
559 if (!yaz_matchstr(subfield->u.tag.tag, pp->name))
567 wrbuf_puts(buf, " (");
568 subfield = cat_subfield(p->u.child, buf, subfield);
569 wrbuf_puts(buf, ") ");
576 static data1_node *cat_field(struct grs_read_info *p, mc_field *pf,
577 WRBUF buf, data1_node *field)
579 data1_node *subfield;
586 if (yaz_matchstr(field->u.tag.tag, pf->name))
589 subfield = field->child;
595 check subfield without indicators
598 if (!pf->list && subfield->which == DATA1N_data)
602 if (pf->interval.start == -1)
604 wrbuf_puts(buf, get_data(field, &len));
608 wrbuf_write(buf, get_data(field, &len)+pf->interval.start,
609 pf->interval.end-pf->interval.start);
613 yaz_log(YLOG_LOG, "cat_field(): got buffer {%s}", buf);
622 ind1 = (subfield->u.tag.tag[0] == ' ') ? '_':subfield->u.tag.tag[0];
623 ind2 = (subfield->u.tag.tag[1] == ' ') ? '_':subfield->u.tag.tag[1];
626 ((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) &&
627 ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0]))
631 yaz_log(YLOG_WARN, "Field %s missed -- does not match indicators", field->u.tag.tag);
636 subfield = subfield->child;
641 cat_subfield(pf->list, buf, subfield);
644 yaz_log(YLOG_LOG, "cat_field(): got buffer {%s}", buf);
650 static int is_empty(char *s)
662 static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt,
665 data1_marctab *marctab = data1_absyn_getmarctab(p->dh, root->u.root.absyn);
666 data1_node *top = root->child;
672 c = mc_mk_context(mc_stmnt+3);
681 mc_destroy_context(c);
686 yaz_log(YLOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt);
688 if (!yaz_matchstr(pf->name, "ldr"))
692 yaz_log(YLOG_LOG,"parse_data1_tree(): try LEADER from {%d} to {%d} positions",
693 pf->interval.start, pf->interval.end);
695 new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top);
696 data1_mk_text_n(p->dh, p->mem, marctab->leader+pf->interval.start,
697 pf->interval.end-pf->interval.start+1, new);
705 if (!yaz_matchstr(field->u.tag.tag, pf->name))
710 yaz_log(YLOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag);
715 field = cat_field(p, pf, buf, field);
718 for (pb = strtok(pb, "\n"); pb; pb = strtok(NULL, "\n"))
722 new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top);
723 data1_mk_text_n(p->dh, p->mem, pb, strlen(pb), new);
733 mc_destroy_field(pf);
734 mc_destroy_context(c);
738 data1_node *grs_read_marcxml(struct grs_read_info *p)
740 data1_node *root = grs_read_iso2709(p, 1);
746 for (e = data1_absyn_getelements(p->dh, root->u.root.absyn); e; e=e->next)
748 data1_tag *tag = e->tag;
750 if (tag && tag->which == DATA1T_string &&
751 !yaz_matchstr(tag->value.string, "mc?"))
752 parse_data1_tree(p, tag->value.string, root);
757 data1_node *grs_read_marc(struct grs_read_info *p)
759 data1_node *root = grs_read_iso2709(p, 0);
765 for (e = data1_absyn_getelements(p->dh, root->u.root.absyn); e; e=e->next)
767 data1_tag *tag = e->tag;
769 if (tag && tag->which == DATA1T_string &&
770 !yaz_matchstr(tag->value.string, "mc?"))
771 parse_data1_tree(p, tag->value.string, root);
776 static void *init_marc(Res res, RecType rt)
778 struct marc_info *p = xmalloc(sizeof(*p));
783 static void config_marc(void *clientData, Res res, const char *args)
785 struct marc_info *p = (struct marc_info*) clientData;
786 if (strlen(args) < sizeof(p->type))
787 strcpy(p->type, args);
790 static void destroy_marc(void *clientData)
792 struct marc_info *p = (struct marc_info*) clientData;
797 static int extract_marc(void *clientData, struct recExtractCtrl *ctrl)
799 return zebra_grs_extract(clientData, ctrl, grs_read_marc);
802 static int retrieve_marc(void *clientData, struct recRetrieveCtrl *ctrl)
804 return zebra_grs_retrieve(clientData, ctrl, grs_read_marc);
807 static struct recType marc_type = {
816 static int extract_marcxml(void *clientData, struct recExtractCtrl *ctrl)
818 return zebra_grs_extract(clientData, ctrl, grs_read_marcxml);
821 static int retrieve_marcxml(void *clientData, struct recRetrieveCtrl *ctrl)
823 return zebra_grs_retrieve(clientData, ctrl, grs_read_marcxml);
826 static struct recType marcxml_type = {
836 #ifdef IDZEBRA_STATIC_GRS_MARC
837 idzebra_filter_grs_marc