-/*
- * Copyright (C) 1997-2002, Index Data
- * All rights reserved.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Id: marcread.c,v 1.16 2002-07-05 12:43:30 adam Exp $
- */
+/* $Id: marcread.c,v 1.20 2003-03-05 16:43:31 adam Exp $
+ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+ Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
#include <stdio.h>
#include <ctype.h>
#include <assert.h>
#include <yaz/yaz-util.h>
#include <yaz/marcdisp.h>
#include "grsread.h"
+#include "marcomp.h"
+#include "inline.h"
#define MARC_DEBUG 0
+#define MARCOMP_DEBUG 0
-data1_node *grs_read_marc (struct grs_read_info *p)
+static data1_node *grs_read_iso2709 (struct grs_read_info *p)
{
char buf[100000];
int entry_p;
}
res_top = data1_mk_tag (p->dh, p->mem, absynName, 0, res_root);
- marctab = res_root->u.root.absyn->marc;
+ if ((marctab = res_root->u.root.absyn->marc))
+ {
+ memcpy(marctab->leader, buf, 24);
+ memcpy(marctab->implementation_codes, buf+6, 4);
+ marctab->implementation_codes[4] = '\0';
+ memcpy(marctab->user_systems, buf+17, 3);
+ marctab->user_systems[3] = '\0';
+ }
if (marctab && marctab->force_indicator_length >= 0)
indicator_length = marctab->force_indicator_length;
i0 = i;
while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
{
- if (memcmp (tag, "00", 2) && identifier_length)
+
+ if (!memcmp(tag, "4", 1) && (!yaz_matchstr(absynName, "UNIMARC")||
+ !yaz_matchstr(absynName, "RUSMARC")))
+ {
+ int go = 1;
+ data1_node *res =
+ data1_mk_tag_n (p->dh, p->mem,
+ buf+i+1, identifier_length-1,
+ 0 /* attr */, parent);
+ i += identifier_length;
+ i0 = i;
+ do {
+ while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
+ buf[i] != ISO2709_FS && i < end_offset)
+ {
+ i++;
+ }
+ if (!memcmp(buf+i+1, "1", 1) && i<end_offset)
+ {
+ go = 0;
+ }
+ else
+ {
+ buf[i] = '$';
+ }
+ } while (go && i < end_offset);
+
+ data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, res);
+ i0 = i;
+ }
+ else if (memcmp (tag, "00", 2) && identifier_length)
{
data1_node *res =
data1_mk_tag_n (p->dh, p->mem,
#endif
i += identifier_length;
i0 = i;
- while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
- buf[i] != ISO2709_FS && i < end_offset)
- {
+ while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
+ buf[i] != ISO2709_FS && i < end_offset)
+ {
#if MARC_DEBUG
- fprintf (outf, "%c", buf[i]);
+ fprintf (outf, "%c", buf[i]);
#endif
- i++;
- }
- data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, res);
+ i++;
+ }
+ data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, res);
i0 = i;
}
else
#endif
}
return res_root;
-}
+}
+/*
+ * Locate some data under this node. This routine should handle variants
+ * prettily.
+ */
+static char *get_data(data1_node *n, int *len)
+{
+ char *r;
+
+ while (n)
+ {
+ if (n->which == DATA1N_data)
+ {
+ int i;
+ *len = n->u.data.len;
+
+ for (i = 0; i<*len; i++)
+ if (!d1_isspace(n->u.data.data[i]))
+ break;
+ while (*len && d1_isspace(n->u.data.data[*len - 1]))
+ (*len)--;
+ *len = *len - i;
+ if (*len > 0)
+ return n->u.data.data + i;
+ }
+ if (n->which == DATA1N_tag)
+ n = n->child;
+ else if (n->which == DATA1N_data)
+ n = n->next;
+ else
+ break;
+ }
+ r = "";
+ *len = strlen(r);
+ return r;
+}
+static data1_node *lookup_subfield(data1_node *node, const char *name)
+{
+ data1_node *p;
+
+ for (p=node; p; p=p->next)
+ {
+ if (!yaz_matchstr(p->u.tag.tag, name))
+ return p;
+ }
+ return 0;
+}
+static inline_subfield *lookup_inline_subfield(inline_subfield *pisf, const char *name)
+{
+ inline_subfield *p;
+
+ for (p=pisf; p; p=p->next)
+ {
+ if (!yaz_matchstr(p->name, name))
+ return p;
+ }
+ return 0;
+}
+static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_subfield *pisf)
+{
+ mc_subfield *p;
+
+ for (p = psf; p && pisf; p = p->next)
+ {
+ if (p->which == MC_SF)
+ {
+ inline_subfield *found = lookup_inline_subfield(pisf, p->name);
+
+ if (found)
+ {
+ if (strcmp(p->prefix, "_"))
+ {
+ strcat(strcat(buf, " "), p->prefix);
+ }
+ if (p->interval.start == -1)
+ {
+ strcat(buf, found->data);
+ }
+ else
+ {
+ strncat(buf, found->data+p->interval.start,
+ p->interval.end-p->interval.start+1);
+ }
+ if (strcmp(p->suffix, "_"))
+ {
+ strcat(strcat(buf, p->suffix), " ");
+ }
+#if MARCOMP_DEBUG
+ logf(LOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name);
+#endif
+ pisf = found->next;
+ }
+ }
+ else if (p->which == MC_SFVARIANT)
+ {
+ inline_subfield *next;
+
+ do {
+ next = cat_inline_subfield(p->u.child, buf, pisf);
+ if (next == pisf)
+ break;
+ pisf = next;
+ } while (pisf);
+ }
+ else if (p->which == MC_SFGROUP)
+ {
+ mc_subfield *pp;
+ int found;
+
+ for (pp = p->u.child, found = 0; pp; pp = pp->next)
+ {
+ if (!yaz_matchstr(pisf->name, p->name))
+ {
+ found = 1;
+ break;
+ }
+ }
+ if (found)
+ {
+ strcat(buf, " (");
+ pisf = cat_inline_subfield(p->u.child, buf, pisf);
+ strcat(buf, ") ");
+ }
+ }
+ }
+ return pisf;
+}
+static void cat_inline_field(mc_field *pf, char *buf, data1_node *subfield)
+{
+
+ if (!pf || !subfield)
+ return;
+
+ for (;subfield; subfield = subfield->next)
+ {
+ int len;
+ inline_field *pif = inline_parse(get_data(subfield,&len));
+
+ if (pif && !yaz_matchstr(pif->name, pf->name))
+ {
+ if (!pf->list && pif->list)
+ {
+ strcat(buf, pif->list->data);
+ }
+ else
+ {
+ int ind1, ind2;
+
+ /*
+ check indicators
+ */
+
+ ind1 = (pif->ind1[0] == ' ') ? '_':pif->ind1[0];
+ ind2 = (pif->ind2[0] == ' ') ? '_':pif->ind2[0];
+
+ if (((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) &&
+ ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0])))
+ {
+ cat_inline_subfield(pf->list, buf, pif->list);
+
+ /*
+ add separator for inline fields
+ */
+ if (strlen(buf))
+ {
+ strcat(buf, "\n");
+ }
+ }
+ else
+ {
+ logf(LOG_WARN, "In-line field %s missed -- indicators does not match", pif->name);
+ }
+ }
+ }
+ inline_destroy_field(pif);
+ }
+#if MARCOMP_DEBUG
+ logf(LOG_LOG, "cat_inline_field(): got buffer {%s}", buf);
+#endif
+}
+static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfield)
+{
+ mc_subfield *p;
+
+ for (p = psf; p && subfield; p = p->next)
+ {
+ if (p->which == MC_SF)
+ {
+ data1_node *found = lookup_subfield(subfield, p->name);
+
+ if (found)
+ {
+ int len;
+
+ if (strcmp(p->prefix, "_"))
+ {
+ strcat(strcat(buf, " "), p->prefix);
+ }
+
+ if (p->u.in_line)
+ {
+ cat_inline_field(p->u.in_line, buf, found);
+ }
+ else if (p->interval.start == -1)
+ {
+ strcat(buf, get_data(found, &len));
+ }
+ else
+ {
+ strncat(buf, get_data(found, &len)+p->interval.start,
+ p->interval.end-p->interval.start+1);
+ }
+ if (strcmp(p->suffix, "_"))
+ {
+ strcat(strcat(buf, p->suffix), " ");
+ }
+#if MARCOMP_DEBUG
+ logf(LOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag);
+#endif
+ subfield = found->next;
+ }
+ }
+ else if (p->which == MC_SFVARIANT)
+ {
+ data1_node *next;
+ do {
+ next = cat_subfield(p->u.child, buf, subfield);
+ if (next == subfield)
+ break;
+ subfield = next;
+ } while (subfield);
+ }
+ else if (p->which == MC_SFGROUP)
+ {
+ mc_subfield *pp;
+ int found;
+
+ for (pp = p->u.child, found = 0; pp; pp = pp->next)
+ {
+ if (!yaz_matchstr(subfield->u.tag.tag, pp->name))
+ {
+ found = 1;
+ break;
+ }
+ }
+ if (found)
+ {
+ strcat(buf, " (");
+ subfield = cat_subfield(p->u.child, buf, subfield);
+ strcat(buf, ") ");
+ }
+ }
+ }
+ return subfield;
+}
+static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, data1_node *field)
+{
+ data1_node *subfield;
+ int ind1, ind2;
+
+ if (!pf || !field)
+ return 0;
+
+
+ if (yaz_matchstr(field->u.tag.tag, pf->name))
+ return field->next;
+
+ subfield = field->child;
+
+ if (!subfield)
+ return field->next;
+
+ /*
+ check subfield without indicators
+ */
+
+ if (!pf->list && subfield->which == DATA1N_data)
+ {
+ int len;
+
+ if (pf->interval.start == -1)
+ {
+ strcat(buf, get_data(field, &len));
+ }
+ else
+ {
+ strncat(buf, get_data(field, &len)+pf->interval.start,
+ pf->interval.end-pf->interval.start+1);
+ }
+#if MARCOMP_DEBUG
+ logf(LOG_LOG, "cat_field(): got buffer {%s}", buf);
+#endif
+ return field->next;
+ }
+
+ /*
+ check indicators
+ */
+
+ ind1 = (subfield->u.tag.tag[0] == ' ') ? '_':subfield->u.tag.tag[0];
+ ind2 = (subfield->u.tag.tag[1] == ' ') ? '_':subfield->u.tag.tag[1];
+
+ if (!(
+ ((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) &&
+ ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0]))
+ ))
+ {
+#if MARCOMP_DEBUG
+ logf(LOG_WARN, "Field %s missed -- does not match indicators", field->u.tag.tag);
+#endif
+ return field->next;
+ }
+
+ subfield = subfield->child;
+
+ if (!subfield)
+ return field->next;
+
+ cat_subfield(pf->list, buf, subfield);
+
+#if MARCOMP_DEBUG
+ logf(LOG_LOG, "cat_field(): got buffer {%s}", buf);
+#endif
+
+ return field->next;
+}
+static int is_empty(char *s)
+{
+ char *p = s;
+
+ for (p = s; *p; p++)
+ {
+ if (!isspace(*p))
+ return 0;
+ }
+ return 1;
+}
+static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data1_node *root)
+{
+ data1_marctab *marctab = root->u.root.absyn->marc;
+ data1_node *top = root->child;
+ data1_node *field;
+ mc_context *c;
+ mc_field *pf;
+ char buf[1000000];
+
+ c = mc_mk_context(mc_stmnt+3);
+
+ if (!c)
+ return;
+
+ pf = mc_getfield(c);
+
+ if (!pf)
+ {
+ mc_destroy_context(c);
+ return;
+ }
+#if MARCOMP_DEBUG
+ logf(LOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt);
+#endif
+ if (!yaz_matchstr(pf->name, "ldr"))
+ {
+ data1_node *new;
+#if MARCOMP_DEBUG
+ logf(LOG_LOG,"parse_data1_tree(): try LEADER from {%d} to {%d} positions",
+ pf->interval.start, pf->interval.end);
+#endif
+ new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top);
+ data1_mk_text_n(p->dh, p->mem, marctab->leader+pf->interval.start,
+ pf->interval.end-pf->interval.start+1, new);
+ }
+ else
+ {
+ field=top->child;
+
+ while(field)
+ {
+ if (!yaz_matchstr(field->u.tag.tag, pf->name))
+ {
+ data1_node *new;
+ char *pb = buf;
+#if MARCOMP_DEBUG
+ logf(LOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag);
+#endif
+ *buf = '\0';
+ field = cat_field(p, pf, buf, field);
+
+ for (pb = strtok(pb, "\n"); pb; pb = strtok(NULL, "\n"))
+ {
+ if (!is_empty(pb))
+ {
+ new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top);
+ data1_mk_text_n(p->dh, p->mem, pb, strlen(pb), new);
+ }
+ }
+ }
+ else
+ {
+ field = field->next;
+ }
+ }
+ }
+ mc_destroy_field(pf);
+ mc_destroy_context(c);
+}
+
+data1_node *grs_read_marc(struct grs_read_info *p)
+{
+ data1_node *root = grs_read_iso2709(p);
+ data1_element *e;
+
+ if (!root)
+ return 0;
+
+ for (e=root->u.root.absyn->main_elements; e; e=e->next)
+ {
+ data1_tag *tag = e->tag;
+
+ if (tag && tag->which == DATA1T_string &&
+ !yaz_matchstr(tag->value.string, "mc?"))
+ parse_data1_tree(p, tag->value.string, root);
+ }
+ return root;
+}
static void *grs_init_marc(void)
{
return 0;