1 /* $Id: marcread.c,v 1.20 2003-03-05 16:43:31 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
28 #include <yaz/yaz-util.h>
29 #include <yaz/marcdisp.h>
35 #define MARCOMP_DEBUG 0
37 static data1_node *grs_read_iso2709 (struct grs_read_info *p)
43 int identifier_length;
45 int length_data_entry;
47 int length_implementation;
52 data1_node *res_root, *res_top;
54 data1_marctab *marctab;
56 if ((*p->readf)(p->fh, buf, 5) != 5)
58 record_length = atoi_n (buf, 5);
59 if (record_length < 25)
61 logf (LOG_WARN, "MARC record length < 25, is %d", record_length);
64 /* read remaining part - attempt to read one byte furhter... */
65 read_bytes = (*p->readf)(p->fh, buf+5, record_length-4);
66 if (read_bytes < record_length-5)
68 logf (LOG_WARN, "Couldn't read whole MARC record");
71 if (read_bytes == record_length - 4)
73 off_t cur_offset = (*p->tellf)(p->fh);
77 (*p->endf)(p->fh, cur_offset - 1);
80 res_root = data1_mk_root (p->dh, p->mem, absynName);
83 yaz_log (LOG_WARN, "cannot read MARC without an abstract syntax");
86 res_top = data1_mk_tag (p->dh, p->mem, absynName, 0, res_root);
88 if ((marctab = res_root->u.root.absyn->marc))
90 memcpy(marctab->leader, buf, 24);
91 memcpy(marctab->implementation_codes, buf+6, 4);
92 marctab->implementation_codes[4] = '\0';
93 memcpy(marctab->user_systems, buf+17, 3);
94 marctab->user_systems[3] = '\0';
97 if (marctab && marctab->force_indicator_length >= 0)
98 indicator_length = marctab->force_indicator_length;
100 indicator_length = atoi_n (buf+10, 1);
101 if (marctab && marctab->force_identifier_length >= 0)
102 identifier_length = marctab->force_identifier_length;
104 identifier_length = atoi_n (buf+11, 1);
105 base_address = atoi_n (buf+12, 4);
107 length_data_entry = atoi_n (buf+20, 1);
108 length_starting = atoi_n (buf+21, 1);
109 length_implementation = atoi_n (buf+22, 1);
111 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
112 entry_p += 3+length_data_entry+length_starting;
113 base_address = entry_p+1;
114 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
122 data1_node *parent = res_top;
124 memcpy (tag, buf+entry_p, 3);
129 /* generate field node */
130 res = data1_mk_tag_n (p->dh, p->mem, tag, 3, 0 /* attr */, parent);
133 fprintf (outf, "%s ", tag);
135 data_length = atoi_n (buf+entry_p, length_data_entry);
136 entry_p += length_data_entry;
137 data_offset = atoi_n (buf+entry_p, length_starting);
138 entry_p += length_starting;
139 i = data_offset + base_address;
140 end_offset = i+data_length-1;
142 if (memcmp (tag, "00", 2) && indicator_length)
144 /* generate indicator node */
148 res = data1_mk_tag_n (p->dh, p->mem,
149 buf+i, indicator_length, 0 /* attr */, res);
151 for (j = 0; j<indicator_length; j++)
152 fprintf (outf, "%c", buf[j+i]);
154 i += indicator_length;
157 /* traverse sub fields */
159 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
162 if (!memcmp(tag, "4", 1) && (!yaz_matchstr(absynName, "UNIMARC")||
163 !yaz_matchstr(absynName, "RUSMARC")))
167 data1_mk_tag_n (p->dh, p->mem,
168 buf+i+1, identifier_length-1,
169 0 /* attr */, parent);
170 i += identifier_length;
173 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
174 buf[i] != ISO2709_FS && i < end_offset)
178 if (!memcmp(buf+i+1, "1", 1) && i<end_offset)
186 } while (go && i < end_offset);
188 data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, res);
191 else if (memcmp (tag, "00", 2) && identifier_length)
194 data1_mk_tag_n (p->dh, p->mem,
195 buf+i+1, identifier_length-1,
196 0 /* attr */, parent);
198 fprintf (outf, " $");
199 for (j = 1; j<identifier_length; j++)
200 fprintf (outf, "%c", buf[j+i]);
203 i += identifier_length;
205 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
206 buf[i] != ISO2709_FS && i < end_offset)
209 fprintf (outf, "%c", buf[i]);
213 data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, res);
219 fprintf (outf, "%c", buf[i]);
226 data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, parent);
229 fprintf (outf, "\n");
231 fprintf (outf, "-- separator but not at end of field\n");
232 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
233 fprintf (outf, "-- no separator at end of field\n");
239 * Locate some data under this node. This routine should handle variants
242 static char *get_data(data1_node *n, int *len)
248 if (n->which == DATA1N_data)
251 *len = n->u.data.len;
253 for (i = 0; i<*len; i++)
254 if (!d1_isspace(n->u.data.data[i]))
256 while (*len && d1_isspace(n->u.data.data[*len - 1]))
260 return n->u.data.data + i;
262 if (n->which == DATA1N_tag)
264 else if (n->which == DATA1N_data)
274 static data1_node *lookup_subfield(data1_node *node, const char *name)
278 for (p=node; p; p=p->next)
280 if (!yaz_matchstr(p->u.tag.tag, name))
285 static inline_subfield *lookup_inline_subfield(inline_subfield *pisf, const char *name)
289 for (p=pisf; p; p=p->next)
291 if (!yaz_matchstr(p->name, name))
296 static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_subfield *pisf)
300 for (p = psf; p && pisf; p = p->next)
302 if (p->which == MC_SF)
304 inline_subfield *found = lookup_inline_subfield(pisf, p->name);
308 if (strcmp(p->prefix, "_"))
310 strcat(strcat(buf, " "), p->prefix);
312 if (p->interval.start == -1)
314 strcat(buf, found->data);
318 strncat(buf, found->data+p->interval.start,
319 p->interval.end-p->interval.start+1);
321 if (strcmp(p->suffix, "_"))
323 strcat(strcat(buf, p->suffix), " ");
326 logf(LOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name);
331 else if (p->which == MC_SFVARIANT)
333 inline_subfield *next;
336 next = cat_inline_subfield(p->u.child, buf, pisf);
342 else if (p->which == MC_SFGROUP)
347 for (pp = p->u.child, found = 0; pp; pp = pp->next)
349 if (!yaz_matchstr(pisf->name, p->name))
358 pisf = cat_inline_subfield(p->u.child, buf, pisf);
365 static void cat_inline_field(mc_field *pf, char *buf, data1_node *subfield)
368 if (!pf || !subfield)
371 for (;subfield; subfield = subfield->next)
374 inline_field *pif = inline_parse(get_data(subfield,&len));
376 if (pif && !yaz_matchstr(pif->name, pf->name))
378 if (!pf->list && pif->list)
380 strcat(buf, pif->list->data);
390 ind1 = (pif->ind1[0] == ' ') ? '_':pif->ind1[0];
391 ind2 = (pif->ind2[0] == ' ') ? '_':pif->ind2[0];
393 if (((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) &&
394 ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0])))
396 cat_inline_subfield(pf->list, buf, pif->list);
399 add separator for inline fields
408 logf(LOG_WARN, "In-line field %s missed -- indicators does not match", pif->name);
412 inline_destroy_field(pif);
415 logf(LOG_LOG, "cat_inline_field(): got buffer {%s}", buf);
418 static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfield)
422 for (p = psf; p && subfield; p = p->next)
424 if (p->which == MC_SF)
426 data1_node *found = lookup_subfield(subfield, p->name);
432 if (strcmp(p->prefix, "_"))
434 strcat(strcat(buf, " "), p->prefix);
439 cat_inline_field(p->u.in_line, buf, found);
441 else if (p->interval.start == -1)
443 strcat(buf, get_data(found, &len));
447 strncat(buf, get_data(found, &len)+p->interval.start,
448 p->interval.end-p->interval.start+1);
450 if (strcmp(p->suffix, "_"))
452 strcat(strcat(buf, p->suffix), " ");
455 logf(LOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag);
457 subfield = found->next;
460 else if (p->which == MC_SFVARIANT)
464 next = cat_subfield(p->u.child, buf, subfield);
465 if (next == subfield)
470 else if (p->which == MC_SFGROUP)
475 for (pp = p->u.child, found = 0; pp; pp = pp->next)
477 if (!yaz_matchstr(subfield->u.tag.tag, pp->name))
486 subfield = cat_subfield(p->u.child, buf, subfield);
493 static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, data1_node *field)
495 data1_node *subfield;
502 if (yaz_matchstr(field->u.tag.tag, pf->name))
505 subfield = field->child;
511 check subfield without indicators
514 if (!pf->list && subfield->which == DATA1N_data)
518 if (pf->interval.start == -1)
520 strcat(buf, get_data(field, &len));
524 strncat(buf, get_data(field, &len)+pf->interval.start,
525 pf->interval.end-pf->interval.start+1);
528 logf(LOG_LOG, "cat_field(): got buffer {%s}", buf);
537 ind1 = (subfield->u.tag.tag[0] == ' ') ? '_':subfield->u.tag.tag[0];
538 ind2 = (subfield->u.tag.tag[1] == ' ') ? '_':subfield->u.tag.tag[1];
541 ((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) &&
542 ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0]))
546 logf(LOG_WARN, "Field %s missed -- does not match indicators", field->u.tag.tag);
551 subfield = subfield->child;
556 cat_subfield(pf->list, buf, subfield);
559 logf(LOG_LOG, "cat_field(): got buffer {%s}", buf);
564 static int is_empty(char *s)
575 static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data1_node *root)
577 data1_marctab *marctab = root->u.root.absyn->marc;
578 data1_node *top = root->child;
584 c = mc_mk_context(mc_stmnt+3);
593 mc_destroy_context(c);
597 logf(LOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt);
599 if (!yaz_matchstr(pf->name, "ldr"))
603 logf(LOG_LOG,"parse_data1_tree(): try LEADER from {%d} to {%d} positions",
604 pf->interval.start, pf->interval.end);
606 new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top);
607 data1_mk_text_n(p->dh, p->mem, marctab->leader+pf->interval.start,
608 pf->interval.end-pf->interval.start+1, new);
616 if (!yaz_matchstr(field->u.tag.tag, pf->name))
621 logf(LOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag);
624 field = cat_field(p, pf, buf, field);
626 for (pb = strtok(pb, "\n"); pb; pb = strtok(NULL, "\n"))
630 new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top);
631 data1_mk_text_n(p->dh, p->mem, pb, strlen(pb), new);
641 mc_destroy_field(pf);
642 mc_destroy_context(c);
645 data1_node *grs_read_marc(struct grs_read_info *p)
647 data1_node *root = grs_read_iso2709(p);
653 for (e=root->u.root.absyn->main_elements; e; e=e->next)
655 data1_tag *tag = e->tag;
657 if (tag && tag->which == DATA1T_string &&
658 !yaz_matchstr(tag->value.string, "mc?"))
659 parse_data1_tree(p, tag->value.string, root);
663 static void *grs_init_marc(void)
668 static void grs_destroy_marc(void *clientData)
672 static struct recTypeGrs marc_type = {
679 RecTypeGrs recTypeGrs_marc = &marc_type;