2 * Copyright (C) 1995-2006, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: marcdisp.c,v 1.37 2006-12-13 11:25:17 adam Exp $
10 * \brief Implements MARC conversion utilities
25 #include <yaz/marcdisp.h>
26 #include <yaz/wrbuf.h>
27 #include <yaz/yaz-util.h>
28 #include <yaz/nmem_xml.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
35 static void yaz_marc_reset(yaz_marc_t mt);
37 /** \brief node types for yaz_marc_node */
38 enum YAZ_MARC_NODE_TYPE
41 YAZ_MARC_CONTROLFIELD,
46 /** \brief represets a data field */
47 struct yaz_marc_datafield {
50 struct yaz_marc_subfield *subfields;
53 /** \brief represents a control field */
54 struct yaz_marc_controlfield {
59 /** \brief a comment node */
60 struct yaz_marc_comment {
64 /** \brief MARC node */
65 struct yaz_marc_node {
66 enum YAZ_MARC_NODE_TYPE which;
68 struct yaz_marc_datafield datafield;
69 struct yaz_marc_controlfield controlfield;
73 struct yaz_marc_node *next;
76 /** \brief represents a subfield */
77 struct yaz_marc_subfield {
79 struct yaz_marc_subfield *next;
82 /** \brief the internals of a yaz_marc_t handle */
92 struct yaz_marc_node *nodes;
93 struct yaz_marc_node **nodes_pp;
94 struct yaz_marc_subfield **subfield_pp;
97 yaz_marc_t yaz_marc_create(void)
99 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
100 mt->xml = YAZ_MARC_LINE;
102 mt->m_wr = wrbuf_alloc();
105 strcpy(mt->subfield_str, " $");
106 strcpy(mt->endline_str, "\n");
108 mt->nmem = nmem_create();
113 void yaz_marc_destroy(yaz_marc_t mt)
117 nmem_destroy(mt->nmem);
118 wrbuf_free(mt->m_wr, 1);
119 xfree(mt->leader_spec);
123 static int marc_exec_leader(const char *leader_spec, char *leader,
127 struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
129 struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n));
132 mt->nodes_pp = &n->next;
136 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
138 struct yaz_marc_node *n = yaz_marc_add_node(mt);
139 n->which = YAZ_MARC_COMMENT;
140 n->u.comment = nmem_strdup(mt->nmem, comment);
143 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
150 _vsnprintf(buf, sizeof(buf)-1, fmt, ap);
154 vsnprintf(buf, sizeof(buf), fmt, ap);
156 vsprintf(buf, fmt, ap);
160 yaz_marc_add_comment(mt, buf);
164 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
166 struct yaz_marc_node *n = yaz_marc_add_node(mt);
167 n->which = YAZ_MARC_LEADER;
168 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
169 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
172 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
173 const char *data, size_t data_len)
175 struct yaz_marc_node *n = yaz_marc_add_node(mt);
176 n->which = YAZ_MARC_CONTROLFIELD;
177 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
178 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
184 sprintf(msg, "controlfield:");
185 for (i = 0; i < 16 && i < data_len; i++)
186 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
188 sprintf(msg + strlen(msg), " ..");
189 yaz_marc_add_comment(mt, msg);
194 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
195 const xmlNode *ptr_data)
197 struct yaz_marc_node *n = yaz_marc_add_node(mt);
198 n->which = YAZ_MARC_CONTROLFIELD;
199 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
200 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
204 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
205 const char *indicator, size_t indicator_len)
207 struct yaz_marc_node *n = yaz_marc_add_node(mt);
208 n->which = YAZ_MARC_DATAFIELD;
209 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
210 n->u.datafield.indicator =
211 nmem_strdupn(mt->nmem, indicator, indicator_len);
212 n->u.datafield.subfields = 0;
214 /* make subfield_pp the current (last one) */
215 mt->subfield_pp = &n->u.datafield.subfields;
219 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
220 const char *indicator, size_t indicator_len)
222 struct yaz_marc_node *n = yaz_marc_add_node(mt);
223 n->which = YAZ_MARC_DATAFIELD;
224 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
225 n->u.datafield.indicator =
226 nmem_strdupn(mt->nmem, indicator, indicator_len);
227 n->u.datafield.subfields = 0;
229 /* make subfield_pp the current (last one) */
230 mt->subfield_pp = &n->u.datafield.subfields;
234 void yaz_marc_add_subfield(yaz_marc_t mt,
235 const char *code_data, size_t code_data_len)
242 sprintf(msg, "subfield:");
243 for (i = 0; i < 16 && i < code_data_len; i++)
244 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
245 if (i < code_data_len)
246 sprintf(msg + strlen(msg), " ..");
247 yaz_marc_add_comment(mt, msg);
252 struct yaz_marc_subfield *n = nmem_malloc(mt->nmem, sizeof(*n));
253 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
255 /* mark subfield_pp to point to this one, so we append here next */
256 *mt->subfield_pp = n;
257 mt->subfield_pp = &n->next;
261 static int atoi_n_check(const char *buf, int size, int *val)
263 if (!isdigit(*(const unsigned char *) buf))
265 *val = atoi_n(buf, size);
269 /** \brief reads the MARC 24 bytes leader and checks content
271 \param leader of the 24 byte leader
272 \param indicator_length indicator length
273 \param identifier_length identifier length
274 \param base_address base address
275 \param length_data_entry length of data entry
276 \param length_starting length of starting
277 \param length_implementation length of implementation defined data
279 static void yaz_marc_read_leader(yaz_marc_t mt, const char *leader_c,
280 int *indicator_length,
281 int *identifier_length,
283 int *length_data_entry,
284 int *length_starting,
285 int *length_implementation)
289 memcpy(leader, leader_c, 24);
291 if (!atoi_n_check(leader+10, 1, indicator_length))
294 "Indicator length at offset 10 should hold a digit."
297 *indicator_length = 2;
299 if (!atoi_n_check(leader+11, 1, identifier_length))
302 "Identifier length at offset 11 should hold a digit."
305 *identifier_length = 2;
307 if (!atoi_n_check(leader+12, 5, base_address))
310 "Base address at offsets 12..16 should hold a number."
314 if (!atoi_n_check(leader+20, 1, length_data_entry))
317 "Length data entry at offset 20 should hold a digit."
319 *length_data_entry = 4;
322 if (!atoi_n_check(leader+21, 1, length_starting))
325 "Length starting at offset 21 should hold a digit."
327 *length_starting = 5;
330 if (!atoi_n_check(leader+22, 1, length_implementation))
333 "Length implementation at offset 22 should hold a digit."
335 *length_implementation = 0;
341 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
342 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
343 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
344 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
345 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
346 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
348 yaz_marc_add_leader(mt, leader, 24);
351 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
353 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
354 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
357 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
359 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
360 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
363 /* try to guess how many bytes the identifier really is! */
364 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
369 for (i = 1; i<5; i++)
372 size_t outbytesleft = sizeof(outbuf);
374 const char *inp = buf;
376 size_t inbytesleft = i;
377 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
378 &outp, &outbytesleft);
379 if (r != (size_t) (-1))
380 return i; /* got a complete sequence */
382 return 1; /* giving up */
384 return 1; /* we don't know */
387 static void yaz_marc_reset(yaz_marc_t mt)
389 nmem_reset(mt->nmem);
391 mt->nodes_pp = &mt->nodes;
395 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
397 struct yaz_marc_node *n;
398 int identifier_length;
399 const char *leader = 0;
401 for (n = mt->nodes; n; n = n->next)
402 if (n->which == YAZ_MARC_LEADER)
404 leader = n->u.leader;
410 if (!atoi_n_check(leader+11, 1, &identifier_length))
413 for (n = mt->nodes; n; n = n->next)
417 case YAZ_MARC_COMMENT:
418 wrbuf_iconv_write(wr, mt->iconv_cd,
419 n->u.comment, strlen(n->u.comment));
420 wrbuf_puts(wr, ")\n");
430 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
432 struct yaz_marc_node *n;
433 int identifier_length;
434 const char *leader = 0;
436 for (n = mt->nodes; n; n = n->next)
437 if (n->which == YAZ_MARC_LEADER)
439 leader = n->u.leader;
445 if (!atoi_n_check(leader+11, 1, &identifier_length))
448 for (n = mt->nodes; n; n = n->next)
450 struct yaz_marc_subfield *s;
453 case YAZ_MARC_DATAFIELD:
454 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
455 n->u.datafield.indicator);
456 for (s = n->u.datafield.subfields; s; s = s->next)
458 /* if identifier length is 2 (most MARCs),
459 the code is a single character .. However we've
460 seen multibyte codes, so see how big it really is */
461 size_t using_code_len =
462 (identifier_length != 2) ? identifier_length - 1
464 cdata_one_character(mt, s->code_data);
466 wrbuf_puts (wr, mt->subfield_str);
467 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
469 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
470 wrbuf_iconv_puts(wr, mt->iconv_cd,
471 s->code_data + using_code_len);
472 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
475 wrbuf_puts (wr, mt->endline_str);
477 case YAZ_MARC_CONTROLFIELD:
478 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
479 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
480 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
481 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
483 wrbuf_puts (wr, mt->endline_str);
485 case YAZ_MARC_COMMENT:
487 wrbuf_iconv_write(wr, mt->iconv_cd,
488 n->u.comment, strlen(n->u.comment));
489 wrbuf_puts(wr, ")\n");
491 case YAZ_MARC_LEADER:
492 wrbuf_printf(wr, "%s\n", n->u.leader);
498 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
503 return yaz_marc_write_line(mt, wr);
504 case YAZ_MARC_MARCXML:
505 return yaz_marc_write_marcxml(mt, wr);
506 case YAZ_MARC_XCHANGE:
507 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
508 case YAZ_MARC_ISO2709:
509 return yaz_marc_write_iso2709(mt, wr);
511 return yaz_marc_write_check(mt, wr);
516 /** \brief common MARC XML/Xchange writer
518 \param wr WRBUF output
519 \param ns XMLNS for the elements
520 \param format record format (e.g. "MARC21")
521 \param type record type (e.g. "Bibliographic")
523 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
528 struct yaz_marc_node *n;
529 int identifier_length;
530 const char *leader = 0;
532 for (n = mt->nodes; n; n = n->next)
533 if (n->which == YAZ_MARC_LEADER)
535 leader = n->u.leader;
541 if (!atoi_n_check(leader+11, 1, &identifier_length))
544 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
546 wrbuf_printf(wr, " format=\"%.80s\"", format);
548 wrbuf_printf(wr, " type=\"%.80s\"", type);
549 wrbuf_printf(wr, ">\n");
550 for (n = mt->nodes; n; n = n->next)
552 struct yaz_marc_subfield *s;
556 case YAZ_MARC_DATAFIELD:
557 wrbuf_printf(wr, " <datafield tag=\"");
558 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
559 strlen(n->u.datafield.tag));
560 wrbuf_printf(wr, "\"");
561 if (n->u.datafield.indicator)
564 for (i = 0; n->u.datafield.indicator[i]; i++)
566 wrbuf_printf(wr, " ind%d=\"", i+1);
567 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
568 n->u.datafield.indicator+i, 1);
569 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
572 wrbuf_printf(wr, ">\n");
573 for (s = n->u.datafield.subfields; s; s = s->next)
575 /* if identifier length is 2 (most MARCs),
576 the code is a single character .. However we've
577 seen multibyte codes, so see how big it really is */
578 size_t using_code_len =
579 (identifier_length != 2) ? identifier_length - 1
581 cdata_one_character(mt, s->code_data);
583 wrbuf_iconv_puts(wr, mt->iconv_cd, " <subfield code=\"");
584 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
585 s->code_data, using_code_len);
586 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
587 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
588 s->code_data + using_code_len,
589 strlen(s->code_data + using_code_len));
590 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
591 wrbuf_puts(wr, "\n");
593 wrbuf_printf(wr, " </datafield>\n");
595 case YAZ_MARC_CONTROLFIELD:
596 wrbuf_printf(wr, " <controlfield tag=\"");
597 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
598 strlen(n->u.controlfield.tag));
599 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
600 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
601 wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
602 wrbuf_puts(wr, "\n");
604 case YAZ_MARC_COMMENT:
605 wrbuf_printf(wr, "<!-- ");
606 wrbuf_puts(wr, n->u.comment);
607 wrbuf_printf(wr, " -->\n");
609 case YAZ_MARC_LEADER:
610 wrbuf_printf(wr, " <leader>");
611 wrbuf_iconv_write_cdata(wr,
612 0 /* no charset conversion for leader */,
613 n->u.leader, strlen(n->u.leader));
614 wrbuf_printf(wr, "</leader>\n");
617 wrbuf_puts(wr, "</record>\n");
621 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
623 if (!mt->leader_spec)
624 yaz_marc_modify_leader(mt, 9, "a");
625 return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
629 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
633 return yaz_marc_write_marcxml_ns(mt, wr,
634 "http://www.bs.dk/standards/MarcXchange",
638 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
640 struct yaz_marc_node *n;
641 int indicator_length;
642 int identifier_length;
643 int length_data_entry;
645 int length_implementation;
647 const char *leader = 0;
648 WRBUF wr_dir, wr_head, wr_data_tmp;
651 for (n = mt->nodes; n; n = n->next)
652 if (n->which == YAZ_MARC_LEADER)
653 leader = n->u.leader;
657 if (!atoi_n_check(leader+10, 1, &indicator_length))
659 if (!atoi_n_check(leader+11, 1, &identifier_length))
661 if (!atoi_n_check(leader+20, 1, &length_data_entry))
663 if (!atoi_n_check(leader+21, 1, &length_starting))
665 if (!atoi_n_check(leader+22, 1, &length_implementation))
668 wr_data_tmp = wrbuf_alloc();
669 wr_dir = wrbuf_alloc();
670 for (n = mt->nodes; n; n = n->next)
673 struct yaz_marc_subfield *s;
677 case YAZ_MARC_DATAFIELD:
678 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
679 data_length += indicator_length;
680 wrbuf_rewind(wr_data_tmp);
681 for (s = n->u.datafield.subfields; s; s = s->next)
683 /* write dummy IDFS + content */
684 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
685 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
687 /* write dummy FS (makes MARC-8 to become ASCII) */
688 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
689 data_length += wrbuf_len(wr_data_tmp);
691 case YAZ_MARC_CONTROLFIELD:
692 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
694 wrbuf_rewind(wr_data_tmp);
695 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
696 n->u.controlfield.data);
697 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
698 data_length += wrbuf_len(wr_data_tmp);
700 case YAZ_MARC_COMMENT:
702 case YAZ_MARC_LEADER:
707 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
708 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
709 data_offset += data_length;
712 /* mark end of directory */
713 wrbuf_putc(wr_dir, ISO2709_FS);
715 /* base address of data (comes after leader+directory) */
716 base_address = 24 + wrbuf_len(wr_dir);
718 wr_head = wrbuf_alloc();
720 /* write record length */
721 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
722 /* from "original" leader */
723 wrbuf_write(wr_head, leader+5, 7);
724 /* base address of data */
725 wrbuf_printf(wr_head, "%05d", base_address);
726 /* from "original" leader */
727 wrbuf_write(wr_head, leader+17, 7);
729 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
730 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
731 wrbuf_free(wr_head, 1);
732 wrbuf_free(wr_dir, 1);
733 wrbuf_free(wr_data_tmp, 1);
735 for (n = mt->nodes; n; n = n->next)
737 struct yaz_marc_subfield *s;
741 case YAZ_MARC_DATAFIELD:
742 wrbuf_printf(wr, "%.*s", indicator_length,
743 n->u.datafield.indicator);
744 for (s = n->u.datafield.subfields; s; s = s->next)
746 wrbuf_putc(wr, ISO2709_IDFS);
747 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
748 /* write dummy blank - makes MARC-8 to become ASCII */
749 wrbuf_iconv_putchar(wr, mt->iconv_cd, ' ');
752 wrbuf_putc(wr, ISO2709_FS);
754 case YAZ_MARC_CONTROLFIELD:
755 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
756 /* write dummy blank - makes MARC-8 to become ASCII */
757 wrbuf_iconv_putchar(wr, mt->iconv_cd, ' ');
759 wrbuf_putc(wr, ISO2709_FS);
761 case YAZ_MARC_COMMENT:
763 case YAZ_MARC_LEADER:
767 wrbuf_printf(wr, "%c", ISO2709_RS);
772 int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
774 for (; ptr; ptr = ptr->next)
776 if (ptr->type == XML_ELEMENT_NODE)
778 if (!strcmp((const char *) ptr->name, "subfield"))
780 size_t ctrl_data_len = 0;
781 char *ctrl_data_buf = 0;
782 const xmlNode *p = 0, *ptr_code = 0;
783 struct _xmlAttr *attr;
784 for (attr = ptr->properties; attr; attr = attr->next)
785 if (!strcmp((const char *)attr->name, "code"))
786 ptr_code = attr->children;
790 mt, "Bad attribute '%.80s' for 'subfield'",
797 mt, "Missing attribute 'code' for 'subfield'" );
800 if (ptr_code->type == XML_TEXT_NODE)
803 strlen((const char *)ptr_code->content);
808 mt, "Missing value for 'code' in 'subfield'" );
811 for (p = ptr->children; p ; p = p->next)
812 if (p->type == XML_TEXT_NODE)
813 ctrl_data_len += strlen((const char *)p->content);
814 ctrl_data_buf = nmem_malloc(mt->nmem, ctrl_data_len+1);
815 strcpy(ctrl_data_buf, (const char *)ptr_code->content);
816 for (p = ptr->children; p ; p = p->next)
817 if (p->type == XML_TEXT_NODE)
818 strcat(ctrl_data_buf, (const char *)p->content);
819 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
824 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
832 static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
834 int indicator_length;
835 int identifier_length;
837 int length_data_entry;
839 int length_implementation;
840 const char *leader = 0;
841 const xmlNode *ptr = *ptr_p;
843 for(; ptr; ptr = ptr->next)
844 if (ptr->type == XML_ELEMENT_NODE)
846 if (!strcmp((const char *) ptr->name, "leader"))
848 xmlNode *p = ptr->children;
849 for(; p; p = p->next)
850 if (p->type == XML_TEXT_NODE)
851 leader = (const char *) p->content;
857 mt, "Expected element 'leader', got '%.80s'", ptr->name);
863 yaz_marc_cprintf(mt, "Missing element 'leader'");
866 if (strlen(leader) != 24)
868 yaz_marc_cprintf(mt, "Bad length %d of leader data."
869 " Must have length of 24 characters", strlen(leader));
872 yaz_marc_read_leader(mt, leader,
878 &length_implementation);
883 static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
885 for(; ptr; ptr = ptr->next)
886 if (ptr->type == XML_ELEMENT_NODE)
888 if (!strcmp((const char *) ptr->name, "controlfield"))
890 const xmlNode *ptr_tag = 0;
891 struct _xmlAttr *attr;
892 for (attr = ptr->properties; attr; attr = attr->next)
893 if (!strcmp((const char *)attr->name, "tag"))
894 ptr_tag = attr->children;
898 mt, "Bad attribute '%.80s' for 'controlfield'",
905 mt, "Missing attribute 'tag' for 'controlfield'" );
908 yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
910 else if (!strcmp((const char *) ptr->name, "datafield"))
912 char indstr[11]; /* 0(unused), 1,....9, + zero term */
913 const xmlNode *ptr_tag = 0;
914 struct _xmlAttr *attr;
916 for (i = 0; i<11; i++)
918 for (attr = ptr->properties; attr; attr = attr->next)
919 if (!strcmp((const char *)attr->name, "tag"))
920 ptr_tag = attr->children;
921 else if (strlen((const char *)attr->name) == 4 &&
922 !memcmp(attr->name, "ind", 3))
924 int no = atoi((const char *)attr->name+3);
926 && attr->children->type == XML_TEXT_NODE)
927 indstr[no] = attr->children->content[0];
932 mt, "Bad attribute '%.80s' for 'datafield'",
939 mt, "Missing attribute 'tag' for 'datafield'" );
942 /* note that indstr[0] is unused so we use indstr[1..] */
943 yaz_marc_add_datafield_xml(mt, ptr_tag,
944 indstr+1, strlen(indstr+1));
946 if (yaz_marc_read_xml_subfields(mt, ptr->children))
952 "Expected element controlfield or datafield,"
953 " got %.80s", ptr->name);
961 int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
964 for(; ptr; ptr = ptr->next)
965 if (ptr->type == XML_ELEMENT_NODE)
967 if (!strcmp((const char *) ptr->name, "record"))
972 mt, "Unknown element '%.80s' in MARC XML reader",
979 yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
982 /* ptr points to record node now */
984 if (yaz_marc_read_xml_leader(mt, &ptr))
986 return yaz_marc_read_xml_fields(mt, ptr->next);
992 int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
996 int indicator_length;
997 int identifier_length;
998 int end_of_directory;
1000 int length_data_entry;
1001 int length_starting;
1002 int length_implementation;
1006 record_length = atoi_n (buf, 5);
1007 if (record_length < 25)
1009 yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
1012 /* ballout if bsize is known and record_length is less than that */
1013 if (bsize != -1 && record_length > bsize)
1015 yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
1016 record_length, bsize);
1020 yaz_marc_cprintf(mt, "Record length %5d", record_length);
1022 yaz_marc_read_leader(mt, buf,
1028 &length_implementation);
1030 /* First pass. determine length of directory & base of data */
1031 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
1033 /* length of directory entry */
1034 int l = 3 + length_data_entry + length_starting;
1035 if (entry_p + l >= record_length)
1037 yaz_marc_cprintf(mt, "Directory offset %d: end of record."
1038 " Missing FS char", entry_p);
1043 yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s",
1044 entry_p, buf+entry_p);
1046 /* Check for digits in length info */
1048 if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
1052 /* Not all digits, so stop directory scan */
1053 yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
1054 " length and/or length starting", entry_p);
1057 entry_p += 3 + length_data_entry + length_starting;
1059 end_of_directory = entry_p;
1060 if (base_address != entry_p+1)
1062 yaz_marc_cprintf(mt, "Base address not at end of directory,"
1063 " base %d, end %d", base_address, entry_p+1);
1066 /* Second pass. parse control - and datafields */
1067 for (entry_p = 24; entry_p != end_of_directory; )
1074 int identifier_flag = 0;
1075 int entry_p0 = entry_p;
1077 memcpy (tag, buf+entry_p, 3);
1080 data_length = atoi_n(buf+entry_p, length_data_entry);
1081 entry_p += length_data_entry;
1082 data_offset = atoi_n(buf+entry_p, length_starting);
1083 entry_p += length_starting;
1084 i = data_offset + base_address;
1085 end_offset = i+data_length-1;
1087 if (data_length <= 0 || data_offset < 0)
1092 yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
1094 tag, entry_p0, data_length, data_offset);
1096 if (end_offset >= record_length)
1098 yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
1099 entry_p0, end_offset, record_length);
1103 if (memcmp (tag, "00", 2))
1104 identifier_flag = 1; /* if not 00X assume subfields */
1105 else if (indicator_length < 4 && indicator_length > 0)
1107 /* Danmarc 00X have subfields */
1108 if (buf[i + indicator_length] == ISO2709_IDFS)
1109 identifier_flag = 1;
1110 else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
1111 identifier_flag = 2;
1114 if (identifier_flag)
1117 i += identifier_flag-1;
1118 yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
1119 i += indicator_length;
1121 while (i < end_offset &&
1122 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1124 int code_offset = i+1;
1127 while (i < end_offset &&
1128 buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
1129 buf[i] != ISO2709_FS)
1131 yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
1138 while (i < end_offset &&
1139 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1141 yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
1145 yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
1148 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1150 yaz_marc_cprintf(mt, "No separator at end of field length=%d",
1154 return record_length;
1157 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1159 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1162 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1164 return -1; /* error */
1165 return r; /* OK, return length > 0 */
1168 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1169 char **result, int *rsize)
1173 wrbuf_rewind(mt->m_wr);
1174 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1176 *result = wrbuf_buf(mt->m_wr);
1178 *rsize = wrbuf_len(mt->m_wr);
1182 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1188 void yaz_marc_debug(yaz_marc_t mt, int level)
1194 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1199 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1201 struct yaz_marc_node *n;
1203 for (n = mt->nodes; n; n = n->next)
1204 if (n->which == YAZ_MARC_LEADER)
1206 leader = n->u.leader;
1207 memcpy(leader+off, str, strlen(str));
1213 int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
1215 yaz_marc_t mt = yaz_marc_create();
1220 r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr);
1221 yaz_marc_destroy(mt);
1226 int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize)
1228 return yaz_marc_decode(buf, wr, debug, bsize, 0);
1232 int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
1234 yaz_marc_t mt = yaz_marc_create();
1238 r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr);
1242 fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf);
1243 yaz_marc_destroy(mt);
1248 int marc_display_ex (const char *buf, FILE *outf, int debug)
1250 return marc_display_exl (buf, outf, debug, -1);
1254 int marc_display (const char *buf, FILE *outf)
1256 return marc_display_ex (buf, outf, 0);
1259 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1261 xfree(mt->leader_spec);
1262 mt->leader_spec = 0;
1265 char dummy_leader[24];
1266 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1268 mt->leader_spec = xstrdup(leader_spec);
1273 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1275 const char *cp = leader_spec;
1280 int no_read = 0, no = 0;
1282 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1283 if (no < 2 || no_read < 3)
1285 if (pos < 0 || pos >= size)
1290 const char *vp = strchr(val+1, '\'');
1296 if (len + pos > size)
1298 memcpy(leader + pos, val+1, len);
1300 else if (*val >= '0' && *val <= '9')
1320 * indent-tabs-mode: nil
1322 * vim: shiftwidth=4 tabstop=8 expandtab