--- /dev/null
+/*
+ * Copyright (C) 1997, Index Data I/S
+ * All rights reserved.
+ * Sebastian Hammer, Adam Dickmeiss
+ *
+ * $Log: marcread.c,v $
+ * Revision 1.1 1997-09-04 13:54:40 adam
+ * Added MARC filter - type grs.marc.<syntax> where syntax refers
+ * to abstract syntax. New method tellf in retrieve/extract method.
+ *
+ */
+#include <stdio.h>
+#include <ctype.h>
+#include <assert.h>
+
+#include <log.h>
+#include <yaz-util.h>
+#include <marcdisp.h>
+#include "grsread.h"
+
+data1_node *data1_mk_node_wp (NMEM mem, data1_node *parent)
+{
+ data1_node *res = data1_mk_node (mem);
+
+ if (!parent)
+ res->root = res;
+ else
+ {
+ res->root = parent->root;
+ res->parent = parent;
+ if (!parent->child)
+ parent->child = parent->last_child = res;
+ else
+ parent->last_child->next = res;
+ parent->num_children++;
+ parent->last_child = res;
+ }
+ return res;
+}
+
+static void destroy_data (struct data1_node *n)
+{
+ assert (n->which == DATA1N_data);
+ xfree (n->u.data.data);
+}
+
+data1_node *data1_mk_node_text (NMEM mem, data1_node *parent,
+ const char *buf, size_t len)
+{
+ data1_node *res = data1_mk_node_wp (mem, parent);
+ res->which = DATA1N_data;
+ res->u.data.formatted_text = 0;
+ res->u.data.what = DATA1I_text;
+ res->u.data.len = len;
+ if (res->u.data.len > DATA1_LOCALDATA) {
+ res->u.data.data = xmalloc (res->u.data.len);
+ res->destroy = destroy_data;
+ }
+ else
+ res->u.data.data = res->lbuf;
+ memcpy (res->u.data.data, buf, res->u.data.len);
+ return res;
+}
+
+data1_node *data1_mk_node_tag (NMEM mem, data1_node *parent,
+ const char *tag, size_t len)
+{
+ data1_element *elem = NULL;
+ data1_node *partag = get_parent_tag(parent);
+ data1_node *res;
+ data1_element *e = NULL;
+ int localtag = 0;
+
+ res = data1_mk_node_wp (mem, parent);
+
+ res->which = DATA1N_tag;
+ res->u.tag.tag = res->lbuf;
+ res->u.tag.get_bytes = -1;
+
+ if (len >= DATA1_LOCALDATA)
+ len = DATA1_LOCALDATA-1;
+
+ memcpy (res->u.tag.tag, tag, len);
+ res->u.tag.tag[len] = '\0';
+
+ if (parent->which == DATA1N_variant)
+ return res;
+ if (partag)
+ if (!(e = partag->u.tag.element))
+ localtag = 1;
+
+ elem = data1_getelementbytagname (res->root->u.root.absyn, e,
+ res->u.tag.tag);
+ res->u.tag.element = elem;
+ res->u.tag.node_selected = 0;
+ res->u.tag.make_variantlist = 0;
+ res->u.tag.no_data_requested = 0;
+ return res;
+}
+
+#define MARC_DEBUG 0
+
+data1_node *grs_read_marc (struct grs_read_info *p)
+{
+ char buf[100000];
+ int entry_p;
+ int record_length;
+ int indicator_length;
+ int identifier_length;
+ int base_address;
+ int length_data_entry;
+ int length_starting;
+ int length_implementation;
+ int read_bytes;
+#if MARC_DEBUG
+ FILE *outf = stdout;
+#endif
+
+ data1_node *res_root;
+ data1_absyn *absyn;
+ char *absynName;
+
+ if ((*p->readf)(p->fh, buf, 5) != 5)
+ return NULL;
+ record_length = atoi_n (buf, 5);
+ if (record_length < 25)
+ {
+ logf (LOG_WARN, "MARC record length < 25, is %d", record_length);
+ return NULL;
+ }
+ /* read remaining part - attempt to read one byte furhter... */
+ read_bytes = (*p->readf)(p->fh, buf+5, record_length-4);
+ if (read_bytes < record_length-5)
+ {
+ logf (LOG_WARN, "Couldn't read whole MARC record");
+ return NULL;
+ }
+ if (read_bytes == record_length - 4)
+ {
+ off_t cur_offset = (*p->tellf)(p->fh);
+ assert (cur_offset > 26);
+ if (p->endf)
+ (*p->endf)(p->fh, cur_offset - 1);
+ }
+ absynName = p->type;
+ logf (LOG_DEBUG, "absynName = %s", absynName);
+ if (!(absyn = data1_get_absyn (absynName)))
+ {
+ logf (LOG_WARN, "Unknown abstract syntax: %s", absynName);
+ return NULL;
+ }
+ res_root = data1_mk_node_wp (p->mem, NULL);
+ res_root->u.root.type = nmem_malloc (p->mem, strlen(absynName)+1);
+ strcpy (res_root->u.root.type, absynName);
+ res_root->u.root.absyn = absyn;
+
+ indicator_length = atoi_n (buf+10, 1);
+ identifier_length = atoi_n (buf+11, 1);
+ base_address = atoi_n (buf+12, 4);
+
+ length_data_entry = atoi_n (buf+20, 1);
+ length_data_entry = atoi_n (buf+20, 1);
+ length_data_entry = atoi_n (buf+20, 1);
+ length_starting = atoi_n (buf+21, 1);
+ length_implementation = atoi_n (buf+22, 1);
+
+ for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
+ entry_p += 3+length_data_entry+length_starting;
+ base_address = entry_p+1;
+ for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
+ {
+ int data_length;
+ int data_offset;
+ int end_offset;
+ int i, i0;
+ char tag[4];
+ data1_node *res;
+ data1_node *parent = res_root;
+
+ memcpy (tag, buf+entry_p, 3);
+ entry_p += 3;
+ tag[3] = '\0';
+
+ /* generate field node */
+ res = data1_mk_node_tag (p->mem, res_root, tag, 3);
+
+#if MARC_DEBUG
+ fprintf (outf, "%s ", tag);
+#endif
+ data_length = atoi_n (buf+entry_p, length_data_entry);
+ entry_p += length_data_entry;
+ data_offset = atoi_n (buf+entry_p, length_starting);
+ entry_p += length_starting;
+ i = data_offset + base_address;
+ end_offset = i+data_length-1;
+
+ if (memcmp (tag, "00", 2) && indicator_length)
+ {
+ /* generate indicator node */
+#if MARC_DEBUG
+ int j;
+#endif
+ res = data1_mk_node_tag (p->mem, res, buf+i, indicator_length);
+#if MARC_DEBUG
+ for (j = 0; j<indicator_length; j++)
+ fprintf (outf, "%c", buf[j+i]);
+#endif
+ i += indicator_length;
+ }
+ parent = res;
+ /* traverse sub fields */
+ i0 = i;
+ while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
+ {
+ if (memcmp (tag, "00", 2) && identifier_length)
+ {
+ data1_node *res = data1_mk_node_tag (p->mem, parent, buf+i+1,
+ identifier_length-1);
+#if MARC_DEBUG
+ fprintf (outf, " $");
+ for (j = 1; j<identifier_length; j++)
+ fprintf (outf, "%c", buf[j+i]);
+ fprintf (outf, " ");
+#endif
+ i += identifier_length;
+ i0 = i;
+ while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
+ buf[i] != ISO2709_FS && i < end_offset)
+ {
+#if MARC_DEBUG
+ fprintf (outf, "%c", buf[i]);
+#endif
+ i++;
+ }
+ data1_mk_node_text (p->mem, res, buf + i0, i - i0);
+ i0 = i;
+ }
+ else
+ {
+#if MARC_DEBUG
+ fprintf (outf, "%c", buf[i]);
+#endif
+ i++;
+ }
+ }
+ if (i > i0)
+ {
+ data1_node *res = data1_mk_node_tag (p->mem, parent, "@", 1);
+ data1_mk_node_text (p->mem, res, buf + i0, i - i0);
+ }
+#if MARC_DEBUG
+ fprintf (outf, "\n");
+ if (i < end_offset)
+ fprintf (outf, "-- separator but not at end of field\n");
+ if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
+ fprintf (outf, "-- no separator at end of field\n");
+#endif
+ }
+ return res_root;
+}