2 * Copyright (C) 1995-2006, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: marcdisp.c,v 1.32 2006-08-01 09:28:04 adam Exp $
10 * \brief Implements MARC conversion utilities
25 #include <yaz/marcdisp.h>
26 #include <yaz/wrbuf.h>
27 #include <yaz/yaz-util.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
34 static void yaz_marc_reset(yaz_marc_t mt);
36 /** \brief node types for yaz_marc_node */
37 enum YAZ_MARC_NODE_TYPE
40 YAZ_MARC_CONTROLFIELD,
45 /** \brief represets a data field */
46 struct yaz_marc_datafield {
49 struct yaz_marc_subfield *subfields;
52 /** \brief represents a control field */
53 struct yaz_marc_controlfield {
58 /** \brief a comment node */
59 struct yaz_marc_comment {
63 /** \brief MARC node */
64 struct yaz_marc_node {
65 enum YAZ_MARC_NODE_TYPE which;
67 struct yaz_marc_datafield datafield;
68 struct yaz_marc_controlfield controlfield;
72 struct yaz_marc_node *next;
75 /** \brief represents a subfield */
76 struct yaz_marc_subfield {
78 struct yaz_marc_subfield *next;
81 /** \brief the internals of a yaz_marc_t handle */
90 struct yaz_marc_node *nodes;
91 struct yaz_marc_node **nodes_pp;
92 struct yaz_marc_subfield **subfield_pp;
95 yaz_marc_t yaz_marc_create(void)
97 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
98 mt->xml = YAZ_MARC_LINE;
100 mt->m_wr = wrbuf_alloc();
102 strcpy(mt->subfield_str, " $");
103 strcpy(mt->endline_str, "\n");
105 mt->nmem = nmem_create();
110 void yaz_marc_destroy(yaz_marc_t mt)
114 nmem_destroy(mt->nmem);
115 wrbuf_free (mt->m_wr, 1);
119 struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
121 struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n));
124 mt->nodes_pp = &n->next;
128 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
130 struct yaz_marc_node *n = yaz_marc_add_node(mt);
131 n->which = YAZ_MARC_COMMENT;
132 n->u.comment = nmem_strdup(mt->nmem, comment);
135 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
142 _vsnprintf(buf, sizeof(buf)-1, fmt, ap);
146 vsnprintf(buf, sizeof(buf), fmt, ap);
148 vsprintf(buf, fmt, ap);
152 yaz_marc_add_comment(mt, buf);
156 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
158 struct yaz_marc_node *n = yaz_marc_add_node(mt);
159 n->which = YAZ_MARC_LEADER;
160 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
163 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
164 const char *data, size_t data_len)
166 struct yaz_marc_node *n = yaz_marc_add_node(mt);
167 n->which = YAZ_MARC_CONTROLFIELD;
168 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
169 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
175 sprintf(msg, "controlfield:");
176 for (i = 0; i < 16 && i < data_len; i++)
177 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
179 sprintf(msg + strlen(msg), " ..");
180 yaz_marc_add_comment(mt, msg);
185 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
186 const xmlNode *ptr_data)
188 struct yaz_marc_node *n = yaz_marc_add_node(mt);
189 n->which = YAZ_MARC_CONTROLFIELD;
190 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
191 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
195 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
196 const char *indicator, size_t indicator_len)
198 struct yaz_marc_node *n = yaz_marc_add_node(mt);
199 n->which = YAZ_MARC_DATAFIELD;
200 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
201 n->u.datafield.indicator =
202 nmem_strdupn(mt->nmem, indicator, indicator_len);
203 n->u.datafield.subfields = 0;
205 /* make subfield_pp the current (last one) */
206 mt->subfield_pp = &n->u.datafield.subfields;
210 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
211 const char *indicator, size_t indicator_len)
213 struct yaz_marc_node *n = yaz_marc_add_node(mt);
214 n->which = YAZ_MARC_DATAFIELD;
215 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
216 n->u.datafield.indicator =
217 nmem_strdupn(mt->nmem, indicator, indicator_len);
218 n->u.datafield.subfields = 0;
220 /* make subfield_pp the current (last one) */
221 mt->subfield_pp = &n->u.datafield.subfields;
225 void yaz_marc_add_subfield(yaz_marc_t mt,
226 const char *code_data, size_t code_data_len)
233 sprintf(msg, "subfield:");
234 for (i = 0; i < 16 && i < code_data_len; i++)
235 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
236 if (i < code_data_len)
237 sprintf(msg + strlen(msg), " ..");
238 yaz_marc_add_comment(mt, msg);
243 struct yaz_marc_subfield *n = nmem_malloc(mt->nmem, sizeof(*n));
244 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
246 /* mark subfield_pp to point to this one, so we append here next */
247 *mt->subfield_pp = n;
248 mt->subfield_pp = &n->next;
252 static int atoi_n_check(const char *buf, int size, int *val)
254 if (!isdigit(*(const unsigned char *) buf))
256 *val = atoi_n(buf, size);
260 /** \brief reads the MARC 24 bytes leader and checks content
262 \param leader of the 24 byte leader
263 \param indicator_length indicator length
264 \param identifier_length identifier length
265 \param base_address base address
266 \param length_data_entry length of data entry
267 \param length_starting length of starting
268 \param length_implementation length of implementation defined data
270 static void yaz_marc_read_leader(yaz_marc_t mt, const char *leader_c,
271 int *indicator_length,
272 int *identifier_length,
274 int *length_data_entry,
275 int *length_starting,
276 int *length_implementation)
280 memcpy(leader, leader_c, 24);
282 if (!atoi_n_check(leader+10, 1, indicator_length))
285 "Indicator length at offset 10 should hold a digit."
288 *indicator_length = 2;
290 if (!atoi_n_check(leader+11, 1, identifier_length))
293 "Identifier length at offset 11 should hold a digit."
296 *identifier_length = 2;
298 if (!atoi_n_check(leader+12, 5, base_address))
301 "Base address at offsets 12..16 should hold a number."
305 if (!atoi_n_check(leader+20, 1, length_data_entry))
308 "Length data entry at offset 20 should hold a digit."
310 *length_data_entry = 4;
313 if (!atoi_n_check(leader+21, 1, length_starting))
316 "Length starting at offset 21 should hold a digit."
318 *length_starting = 5;
321 if (!atoi_n_check(leader+22, 1, length_implementation))
324 "Length implementation at offset 22 should hold a digit."
326 *length_implementation = 0;
332 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
333 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
334 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
335 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
336 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
337 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
339 yaz_marc_add_leader(mt, leader, 24);
342 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
344 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
345 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
348 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
350 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
351 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
354 /* try to guess how many bytes the identifier really is! */
355 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
360 for (i = 1; i<5; i++)
363 size_t outbytesleft = sizeof(outbuf);
365 const char *inp = buf;
367 size_t inbytesleft = i;
368 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
369 &outp, &outbytesleft);
370 if (r != (size_t) (-1))
371 return i; /* got a complete sequence */
373 return 1; /* giving up */
375 return 1; /* we don't know */
378 static void yaz_marc_reset(yaz_marc_t mt)
380 nmem_reset(mt->nmem);
382 mt->nodes_pp = &mt->nodes;
386 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
388 struct yaz_marc_node *n;
389 int identifier_length;
390 const char *leader = 0;
392 for (n = mt->nodes; n; n = n->next)
393 if (n->which == YAZ_MARC_LEADER)
395 leader = n->u.leader;
401 if (!atoi_n_check(leader+11, 1, &identifier_length))
404 for (n = mt->nodes; n; n = n->next)
406 struct yaz_marc_subfield *s;
409 case YAZ_MARC_DATAFIELD:
410 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
411 n->u.datafield.indicator);
412 for (s = n->u.datafield.subfields; s; s = s->next)
414 /* if identifier length is 2 (most MARCs),
415 the code is a single character .. However we've
416 seen multibyte codes, so see how big it really is */
417 size_t using_code_len =
418 (identifier_length != 2) ? identifier_length - 1
420 cdata_one_character(mt, s->code_data);
422 wrbuf_puts (wr, mt->subfield_str);
423 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
425 wrbuf_printf(wr, " ");
426 wrbuf_iconv_puts(wr, mt->iconv_cd,
427 s->code_data + using_code_len);
429 wrbuf_puts (wr, mt->endline_str);
431 case YAZ_MARC_CONTROLFIELD:
432 wrbuf_printf(wr, "%s ", n->u.controlfield.tag);
433 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
434 wrbuf_puts (wr, mt->endline_str);
436 case YAZ_MARC_COMMENT:
438 wrbuf_iconv_write(wr, mt->iconv_cd,
439 n->u.comment, strlen(n->u.comment));
440 wrbuf_puts(wr, ")\n");
442 case YAZ_MARC_LEADER:
443 wrbuf_printf(wr, "%s\n", n->u.leader);
449 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
454 return yaz_marc_write_line(mt, wr);
455 case YAZ_MARC_MARCXML:
456 return yaz_marc_write_marcxml(mt, wr);
457 case YAZ_MARC_XCHANGE:
458 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
459 case YAZ_MARC_ISO2709:
460 return yaz_marc_write_iso2709(mt, wr);
465 /** \brief common MARC XML/Xchange writer
467 \param wr WRBUF output
468 \param ns XMLNS for the elements
469 \param format record format (e.g. "MARC21")
470 \param type record type (e.g. "Bibliographic")
472 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
477 struct yaz_marc_node *n;
478 int identifier_length;
479 const char *leader = 0;
481 for (n = mt->nodes; n; n = n->next)
482 if (n->which == YAZ_MARC_LEADER)
484 leader = n->u.leader;
490 if (!atoi_n_check(leader+11, 1, &identifier_length))
493 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
495 wrbuf_printf(wr, " format=\"%.80s\"", format);
497 wrbuf_printf(wr, " type=\"%.80s\"", type);
498 wrbuf_printf(wr, ">\n");
499 for (n = mt->nodes; n; n = n->next)
501 struct yaz_marc_subfield *s;
504 case YAZ_MARC_DATAFIELD:
505 wrbuf_printf(wr, " <datafield tag=\"");
506 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
507 strlen(n->u.datafield.tag));
508 wrbuf_printf(wr, "\"");
509 if (n->u.datafield.indicator)
512 for (i = 0; n->u.datafield.indicator[i]; i++)
514 wrbuf_printf(wr, " ind%d=\"", i+1);
515 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
516 n->u.datafield.indicator+i, 1);
517 wrbuf_printf(wr, "\"");
520 wrbuf_printf(wr, ">\n");
521 for (s = n->u.datafield.subfields; s; s = s->next)
523 /* if identifier length is 2 (most MARCs),
524 the code is a single character .. However we've
525 seen multibyte codes, so see how big it really is */
526 size_t using_code_len =
527 (identifier_length != 2) ? identifier_length - 1
529 cdata_one_character(mt, s->code_data);
531 wrbuf_puts(wr, " <subfield code=\"");
532 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
533 s->code_data, using_code_len);
534 wrbuf_puts(wr, "\">");
535 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
536 s->code_data + using_code_len,
537 strlen(s->code_data + using_code_len));
538 wrbuf_puts(wr, "</subfield>\n");
540 wrbuf_printf(wr, " </datafield>\n");
542 case YAZ_MARC_CONTROLFIELD:
543 wrbuf_printf(wr, " <controlfield tag=\"");
544 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
545 strlen(n->u.controlfield.tag));
546 wrbuf_printf(wr, "\">");
547 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
548 wrbuf_printf(wr, "</controlfield>\n");
550 case YAZ_MARC_COMMENT:
551 wrbuf_printf(wr, "<!-- ");
552 wrbuf_puts(wr, n->u.comment);
553 wrbuf_printf(wr, " -->\n");
555 case YAZ_MARC_LEADER:
556 wrbuf_printf(wr, " <leader>");
557 wrbuf_iconv_write_cdata(wr,
558 0 /* no charset conversion for leader */,
559 n->u.leader, strlen(n->u.leader));
560 wrbuf_printf(wr, "</leader>\n");
563 wrbuf_puts(wr, "</record>\n");
567 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
569 yaz_marc_modify_leader(mt, 9, "a");
570 return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
574 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
578 return yaz_marc_write_marcxml_ns(mt, wr,
579 "http://www.bs.dk/standards/MarcXchange",
583 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
585 struct yaz_marc_node *n;
586 int indicator_length;
587 int identifier_length;
588 int length_data_entry;
590 int length_implementation;
592 const char *leader = 0;
593 WRBUF wr_dir, wr_head, wr_data_tmp;
596 for (n = mt->nodes; n; n = n->next)
597 if (n->which == YAZ_MARC_LEADER)
598 leader = n->u.leader;
602 if (!atoi_n_check(leader+10, 1, &indicator_length))
604 if (!atoi_n_check(leader+11, 1, &identifier_length))
606 if (!atoi_n_check(leader+20, 1, &length_data_entry))
608 if (!atoi_n_check(leader+21, 1, &length_starting))
610 if (!atoi_n_check(leader+22, 1, &length_implementation))
613 wr_data_tmp = wrbuf_alloc();
614 wr_dir = wrbuf_alloc();
615 for (n = mt->nodes; n; n = n->next)
618 struct yaz_marc_subfield *s;
621 case YAZ_MARC_DATAFIELD:
622 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
623 data_length += indicator_length;
624 for (s = n->u.datafield.subfields; s; s = s->next)
626 wrbuf_rewind(wr_data_tmp);
627 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
628 data_length += 1+wrbuf_len(wr_data_tmp);
632 case YAZ_MARC_CONTROLFIELD:
633 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
635 wrbuf_rewind(wr_data_tmp);
636 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
637 n->u.controlfield.data);
638 data_length += wrbuf_len(wr_data_tmp);
641 case YAZ_MARC_COMMENT:
643 case YAZ_MARC_LEADER:
648 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
649 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
650 data_offset += data_length;
653 /* mark end of directory */
654 wrbuf_putc(wr_dir, ISO2709_FS);
656 /* base address of data (comes after leader+directory) */
657 base_address = 24 + wrbuf_len(wr_dir);
659 wr_head = wrbuf_alloc();
661 /* write record length */
662 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
663 /* from "original" leader */
664 wrbuf_write(wr_head, leader+5, 7);
665 /* base address of data */
666 wrbuf_printf(wr_head, "%05d", base_address);
667 /* from "original" leader */
668 wrbuf_write(wr_head, leader+17, 7);
670 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
671 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
672 wrbuf_free(wr_head, 1);
673 wrbuf_free(wr_dir, 1);
674 wrbuf_free(wr_data_tmp, 1);
676 for (n = mt->nodes; n; n = n->next)
678 struct yaz_marc_subfield *s;
681 case YAZ_MARC_DATAFIELD:
682 wrbuf_printf(wr, "%.*s", indicator_length,
683 n->u.datafield.indicator);
684 for (s = n->u.datafield.subfields; s; s = s->next)
686 wrbuf_printf(wr, "%c", ISO2709_IDFS);
687 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
689 wrbuf_printf(wr, "%c", ISO2709_FS);
691 case YAZ_MARC_CONTROLFIELD:
692 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
693 wrbuf_printf(wr, "%c", ISO2709_FS);
695 case YAZ_MARC_COMMENT:
697 case YAZ_MARC_LEADER:
701 wrbuf_printf(wr, "%c", ISO2709_RS);
706 int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
708 for (; ptr; ptr = ptr->next)
710 if (ptr->type == XML_ELEMENT_NODE)
712 if (!strcmp((const char *) ptr->name, "subfield"))
714 size_t ctrl_data_len = 0;
715 char *ctrl_data_buf = 0;
716 const xmlNode *p = 0, *ptr_code = 0;
717 struct _xmlAttr *attr;
718 for (attr = ptr->properties; attr; attr = attr->next)
719 if (!strcmp((const char *)attr->name, "code"))
720 ptr_code = attr->children;
724 mt, "Bad attribute '%.80s' for 'subfield'",
731 mt, "Missing attribute 'code' for 'subfield'" );
734 if (ptr_code->type == XML_TEXT_NODE)
737 strlen((const char *)ptr_code->content);
742 mt, "Missing value for 'code' in 'subfield'" );
745 for (p = ptr->children; p ; p = p->next)
746 if (p->type == XML_TEXT_NODE)
747 ctrl_data_len += strlen((const char *)p->content);
748 ctrl_data_buf = nmem_malloc(mt->nmem, ctrl_data_len+1);
749 strcpy(ctrl_data_buf, (const char *)ptr_code->content);
750 for (p = ptr->children; p ; p = p->next)
751 if (p->type == XML_TEXT_NODE)
752 strcat(ctrl_data_buf, (const char *)p->content);
753 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
758 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
766 static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
768 int indicator_length;
769 int identifier_length;
771 int length_data_entry;
773 int length_implementation;
774 const char *leader = 0;
775 const xmlNode *ptr = *ptr_p;
777 for(; ptr; ptr = ptr->next)
778 if (ptr->type == XML_ELEMENT_NODE)
780 if (!strcmp((const char *) ptr->name, "leader"))
782 xmlNode *p = ptr->children;
783 for(; p; p = p->next)
784 if (p->type == XML_TEXT_NODE)
785 leader = (const char *) p->content;
791 mt, "Expected element 'leader', got '%.80s'", ptr->name);
797 yaz_marc_cprintf(mt, "Missing element 'leader'");
800 if (strlen(leader) != 24)
802 yaz_marc_cprintf(mt, "Bad length %d of leader data."
803 " Must have length of 24 characters", strlen(leader));
806 yaz_marc_read_leader(mt, leader,
812 &length_implementation);
817 static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
819 for(; ptr; ptr = ptr->next)
820 if (ptr->type == XML_ELEMENT_NODE)
822 if (!strcmp((const char *) ptr->name, "controlfield"))
824 const xmlNode *ptr_tag = 0;
825 struct _xmlAttr *attr;
826 for (attr = ptr->properties; attr; attr = attr->next)
827 if (!strcmp((const char *)attr->name, "tag"))
828 ptr_tag = attr->children;
832 mt, "Bad attribute '%.80s' for 'controlfield'",
839 mt, "Missing attribute 'tag' for 'controlfield'" );
842 yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
844 else if (!strcmp((const char *) ptr->name, "datafield"))
846 char indstr[11]; /* 0(unused), 1,....9, + zero term */
847 const xmlNode *ptr_tag = 0;
848 struct _xmlAttr *attr;
850 for (i = 0; i<11; i++)
852 for (attr = ptr->properties; attr; attr = attr->next)
853 if (!strcmp((const char *)attr->name, "tag"))
854 ptr_tag = attr->children;
855 else if (strlen((const char *)attr->name) == 4 &&
856 !memcmp(attr->name, "ind", 3))
858 int no = atoi((const char *)attr->name+3);
860 && attr->children->type == XML_TEXT_NODE)
861 indstr[no] = attr->children->content[0];
866 mt, "Bad attribute '%.80s' for 'datafield'",
873 mt, "Missing attribute 'tag' for 'datafield'" );
876 /* note that indstr[0] is unused so we use indstr[1..] */
877 yaz_marc_add_datafield_xml(mt, ptr_tag,
878 indstr+1, strlen(indstr+1));
880 if (yaz_marc_read_xml_subfields(mt, ptr->children))
886 "Expected element controlfield or datafield,"
887 " got %.80s", ptr->name);
894 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
896 const xmlNode *ptr = xmlnode;
897 for(; ptr; ptr = ptr->next)
898 if (ptr->type == XML_ELEMENT_NODE)
900 if (!strcmp((const char *) ptr->name, "record"))
905 mt, "Unknown element '%.80s' in MARC XML reader",
912 yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
915 /* ptr points to record node now */
917 if (yaz_marc_read_xml_leader(mt, &ptr))
919 return yaz_marc_read_xml_fields(mt, ptr->next);
922 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
928 int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
932 int indicator_length;
933 int identifier_length;
934 int end_of_directory;
936 int length_data_entry;
938 int length_implementation;
942 record_length = atoi_n (buf, 5);
943 if (record_length < 25)
945 yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
948 /* ballout if bsize is known and record_length is less than that */
949 if (bsize != -1 && record_length > bsize)
951 yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
952 record_length, bsize);
956 yaz_marc_cprintf(mt, "Record length %5d", record_length);
958 yaz_marc_read_leader(mt, buf,
964 &length_implementation);
966 /* First pass. determine length of directory & base of data */
967 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
969 /* length of directory entry */
970 int l = 3 + length_data_entry + length_starting;
971 if (entry_p + l >= record_length)
973 yaz_marc_cprintf(mt, "Directory offset %d: end of record."
974 " Missing FS char", entry_p);
979 yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s",
980 entry_p, buf+entry_p);
982 /* Check for digits in length info */
984 if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
988 /* Not all digits, so stop directory scan */
989 yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
990 " length and/or length starting", entry_p);
993 entry_p += 3 + length_data_entry + length_starting;
995 end_of_directory = entry_p;
996 if (base_address != entry_p+1)
998 yaz_marc_cprintf(mt, "Base address not at end of directory,"
999 " base %d, end %d", base_address, entry_p+1);
1002 /* Second pass. parse control - and datafields */
1003 for (entry_p = 24; entry_p != end_of_directory; )
1010 int identifier_flag = 0;
1011 int entry_p0 = entry_p;
1013 memcpy (tag, buf+entry_p, 3);
1016 data_length = atoi_n(buf+entry_p, length_data_entry);
1017 entry_p += length_data_entry;
1018 data_offset = atoi_n(buf+entry_p, length_starting);
1019 entry_p += length_starting;
1020 i = data_offset + base_address;
1021 end_offset = i+data_length-1;
1023 if (data_length <= 0 || data_offset < 0)
1028 yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
1030 tag, entry_p0, data_length, data_offset);
1032 if (end_offset >= record_length)
1034 yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
1035 entry_p0, end_offset, record_length);
1039 if (memcmp (tag, "00", 2))
1040 identifier_flag = 1; /* if not 00X assume subfields */
1041 else if (indicator_length < 4 && indicator_length > 0)
1043 /* Danmarc 00X have subfields */
1044 if (buf[i + indicator_length] == ISO2709_IDFS)
1045 identifier_flag = 1;
1046 else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
1047 identifier_flag = 2;
1050 if (identifier_flag)
1053 i += identifier_flag-1;
1054 yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
1055 i += indicator_length;
1057 while (i < end_offset &&
1058 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1060 int code_offset = i+1;
1063 while (i < end_offset &&
1064 buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
1065 buf[i] != ISO2709_FS)
1067 yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
1074 while (i < end_offset &&
1075 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1077 yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
1081 yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
1084 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1086 yaz_marc_cprintf(mt, "No separator at end of field length=%d",
1090 return record_length;
1093 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1095 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1098 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1100 return -1; /* error */
1101 return r; /* OK, return length > 0 */
1104 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1105 char **result, int *rsize)
1109 wrbuf_rewind(mt->m_wr);
1110 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1112 *result = wrbuf_buf(mt->m_wr);
1114 *rsize = wrbuf_len(mt->m_wr);
1118 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1124 void yaz_marc_debug(yaz_marc_t mt, int level)
1130 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1135 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1137 struct yaz_marc_node *n;
1139 for (n = mt->nodes; n; n = n->next)
1140 if (n->which == YAZ_MARC_LEADER)
1142 leader = n->u.leader;
1143 memcpy(leader+off, str, strlen(str));
1149 int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
1151 yaz_marc_t mt = yaz_marc_create();
1156 r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr);
1157 yaz_marc_destroy(mt);
1162 int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize)
1164 return yaz_marc_decode(buf, wr, debug, bsize, 0);
1168 int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
1170 yaz_marc_t mt = yaz_marc_create();
1174 r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr);
1178 fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf);
1179 yaz_marc_destroy(mt);
1184 int marc_display_ex (const char *buf, FILE *outf, int debug)
1186 return marc_display_exl (buf, outf, debug, -1);
1190 int marc_display (const char *buf, FILE *outf)
1192 return marc_display_ex (buf, outf, 0);
1198 * indent-tabs-mode: nil
1200 * vim: shiftwidth=4 tabstop=8 expandtab