2 * Copyright (C) 1995-2006, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: marcdisp.c,v 1.35 2006-10-27 12:19:15 adam Exp $
10 * \brief Implements MARC conversion utilities
25 #include <yaz/marcdisp.h>
26 #include <yaz/wrbuf.h>
27 #include <yaz/yaz-util.h>
28 #include <yaz/nmem_xml.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
35 static void yaz_marc_reset(yaz_marc_t mt);
37 /** \brief node types for yaz_marc_node */
38 enum YAZ_MARC_NODE_TYPE
41 YAZ_MARC_CONTROLFIELD,
46 /** \brief represets a data field */
47 struct yaz_marc_datafield {
50 struct yaz_marc_subfield *subfields;
53 /** \brief represents a control field */
54 struct yaz_marc_controlfield {
59 /** \brief a comment node */
60 struct yaz_marc_comment {
64 /** \brief MARC node */
65 struct yaz_marc_node {
66 enum YAZ_MARC_NODE_TYPE which;
68 struct yaz_marc_datafield datafield;
69 struct yaz_marc_controlfield controlfield;
73 struct yaz_marc_node *next;
76 /** \brief represents a subfield */
77 struct yaz_marc_subfield {
79 struct yaz_marc_subfield *next;
82 /** \brief the internals of a yaz_marc_t handle */
92 struct yaz_marc_node *nodes;
93 struct yaz_marc_node **nodes_pp;
94 struct yaz_marc_subfield **subfield_pp;
97 yaz_marc_t yaz_marc_create(void)
99 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
100 mt->xml = YAZ_MARC_LINE;
102 mt->m_wr = wrbuf_alloc();
105 strcpy(mt->subfield_str, " $");
106 strcpy(mt->endline_str, "\n");
108 mt->nmem = nmem_create();
113 void yaz_marc_destroy(yaz_marc_t mt)
117 nmem_destroy(mt->nmem);
118 wrbuf_free(mt->m_wr, 1);
119 xfree(mt->leader_spec);
123 static int marc_exec_leader(const char *leader_spec, char *leader,
127 struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
129 struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n));
132 mt->nodes_pp = &n->next;
136 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
138 struct yaz_marc_node *n = yaz_marc_add_node(mt);
139 n->which = YAZ_MARC_COMMENT;
140 n->u.comment = nmem_strdup(mt->nmem, comment);
143 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
150 _vsnprintf(buf, sizeof(buf)-1, fmt, ap);
154 vsnprintf(buf, sizeof(buf), fmt, ap);
156 vsprintf(buf, fmt, ap);
160 yaz_marc_add_comment(mt, buf);
164 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
166 struct yaz_marc_node *n = yaz_marc_add_node(mt);
167 n->which = YAZ_MARC_LEADER;
168 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
169 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
172 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
173 const char *data, size_t data_len)
175 struct yaz_marc_node *n = yaz_marc_add_node(mt);
176 n->which = YAZ_MARC_CONTROLFIELD;
177 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
178 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
184 sprintf(msg, "controlfield:");
185 for (i = 0; i < 16 && i < data_len; i++)
186 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
188 sprintf(msg + strlen(msg), " ..");
189 yaz_marc_add_comment(mt, msg);
194 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
195 const xmlNode *ptr_data)
197 struct yaz_marc_node *n = yaz_marc_add_node(mt);
198 n->which = YAZ_MARC_CONTROLFIELD;
199 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
200 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
204 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
205 const char *indicator, size_t indicator_len)
207 struct yaz_marc_node *n = yaz_marc_add_node(mt);
208 n->which = YAZ_MARC_DATAFIELD;
209 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
210 n->u.datafield.indicator =
211 nmem_strdupn(mt->nmem, indicator, indicator_len);
212 n->u.datafield.subfields = 0;
214 /* make subfield_pp the current (last one) */
215 mt->subfield_pp = &n->u.datafield.subfields;
219 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
220 const char *indicator, size_t indicator_len)
222 struct yaz_marc_node *n = yaz_marc_add_node(mt);
223 n->which = YAZ_MARC_DATAFIELD;
224 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
225 n->u.datafield.indicator =
226 nmem_strdupn(mt->nmem, indicator, indicator_len);
227 n->u.datafield.subfields = 0;
229 /* make subfield_pp the current (last one) */
230 mt->subfield_pp = &n->u.datafield.subfields;
234 void yaz_marc_add_subfield(yaz_marc_t mt,
235 const char *code_data, size_t code_data_len)
242 sprintf(msg, "subfield:");
243 for (i = 0; i < 16 && i < code_data_len; i++)
244 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
245 if (i < code_data_len)
246 sprintf(msg + strlen(msg), " ..");
247 yaz_marc_add_comment(mt, msg);
252 struct yaz_marc_subfield *n = nmem_malloc(mt->nmem, sizeof(*n));
253 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
255 /* mark subfield_pp to point to this one, so we append here next */
256 *mt->subfield_pp = n;
257 mt->subfield_pp = &n->next;
261 static int atoi_n_check(const char *buf, int size, int *val)
263 if (!isdigit(*(const unsigned char *) buf))
265 *val = atoi_n(buf, size);
269 /** \brief reads the MARC 24 bytes leader and checks content
271 \param leader of the 24 byte leader
272 \param indicator_length indicator length
273 \param identifier_length identifier length
274 \param base_address base address
275 \param length_data_entry length of data entry
276 \param length_starting length of starting
277 \param length_implementation length of implementation defined data
279 static void yaz_marc_read_leader(yaz_marc_t mt, const char *leader_c,
280 int *indicator_length,
281 int *identifier_length,
283 int *length_data_entry,
284 int *length_starting,
285 int *length_implementation)
289 memcpy(leader, leader_c, 24);
291 if (!atoi_n_check(leader+10, 1, indicator_length))
294 "Indicator length at offset 10 should hold a digit."
297 *indicator_length = 2;
299 if (!atoi_n_check(leader+11, 1, identifier_length))
302 "Identifier length at offset 11 should hold a digit."
305 *identifier_length = 2;
307 if (!atoi_n_check(leader+12, 5, base_address))
310 "Base address at offsets 12..16 should hold a number."
314 if (!atoi_n_check(leader+20, 1, length_data_entry))
317 "Length data entry at offset 20 should hold a digit."
319 *length_data_entry = 4;
322 if (!atoi_n_check(leader+21, 1, length_starting))
325 "Length starting at offset 21 should hold a digit."
327 *length_starting = 5;
330 if (!atoi_n_check(leader+22, 1, length_implementation))
333 "Length implementation at offset 22 should hold a digit."
335 *length_implementation = 0;
341 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
342 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
343 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
344 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
345 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
346 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
348 yaz_marc_add_leader(mt, leader, 24);
351 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
353 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
354 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
357 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
359 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
360 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
363 /* try to guess how many bytes the identifier really is! */
364 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
369 for (i = 1; i<5; i++)
372 size_t outbytesleft = sizeof(outbuf);
374 const char *inp = buf;
376 size_t inbytesleft = i;
377 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
378 &outp, &outbytesleft);
379 if (r != (size_t) (-1))
380 return i; /* got a complete sequence */
382 return 1; /* giving up */
384 return 1; /* we don't know */
387 static void yaz_marc_reset(yaz_marc_t mt)
389 nmem_reset(mt->nmem);
391 mt->nodes_pp = &mt->nodes;
395 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
397 struct yaz_marc_node *n;
398 int identifier_length;
399 const char *leader = 0;
401 for (n = mt->nodes; n; n = n->next)
402 if (n->which == YAZ_MARC_LEADER)
404 leader = n->u.leader;
410 if (!atoi_n_check(leader+11, 1, &identifier_length))
413 for (n = mt->nodes; n; n = n->next)
415 struct yaz_marc_subfield *s;
418 case YAZ_MARC_DATAFIELD:
419 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
420 n->u.datafield.indicator);
421 for (s = n->u.datafield.subfields; s; s = s->next)
423 /* if identifier length is 2 (most MARCs),
424 the code is a single character .. However we've
425 seen multibyte codes, so see how big it really is */
426 size_t using_code_len =
427 (identifier_length != 2) ? identifier_length - 1
429 cdata_one_character(mt, s->code_data);
431 wrbuf_puts (wr, mt->subfield_str);
432 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
434 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
435 wrbuf_iconv_puts(wr, mt->iconv_cd,
436 s->code_data + using_code_len);
437 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
440 wrbuf_puts (wr, mt->endline_str);
442 case YAZ_MARC_CONTROLFIELD:
443 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
444 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
445 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
446 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
448 wrbuf_puts (wr, mt->endline_str);
450 case YAZ_MARC_COMMENT:
452 wrbuf_iconv_write(wr, mt->iconv_cd,
453 n->u.comment, strlen(n->u.comment));
454 wrbuf_puts(wr, ")\n");
456 case YAZ_MARC_LEADER:
457 wrbuf_printf(wr, "%s\n", n->u.leader);
463 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
468 return yaz_marc_write_line(mt, wr);
469 case YAZ_MARC_MARCXML:
470 return yaz_marc_write_marcxml(mt, wr);
471 case YAZ_MARC_XCHANGE:
472 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
473 case YAZ_MARC_ISO2709:
474 return yaz_marc_write_iso2709(mt, wr);
479 /** \brief common MARC XML/Xchange writer
481 \param wr WRBUF output
482 \param ns XMLNS for the elements
483 \param format record format (e.g. "MARC21")
484 \param type record type (e.g. "Bibliographic")
486 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
491 struct yaz_marc_node *n;
492 int identifier_length;
493 const char *leader = 0;
495 for (n = mt->nodes; n; n = n->next)
496 if (n->which == YAZ_MARC_LEADER)
498 leader = n->u.leader;
504 if (!atoi_n_check(leader+11, 1, &identifier_length))
507 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
509 wrbuf_printf(wr, " format=\"%.80s\"", format);
511 wrbuf_printf(wr, " type=\"%.80s\"", type);
512 wrbuf_printf(wr, ">\n");
513 for (n = mt->nodes; n; n = n->next)
515 struct yaz_marc_subfield *s;
519 case YAZ_MARC_DATAFIELD:
520 wrbuf_printf(wr, " <datafield tag=\"");
521 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
522 strlen(n->u.datafield.tag));
523 wrbuf_printf(wr, "\"");
524 if (n->u.datafield.indicator)
527 for (i = 0; n->u.datafield.indicator[i]; i++)
529 wrbuf_printf(wr, " ind%d=\"", i+1);
530 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
531 n->u.datafield.indicator+i, 1);
532 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
535 wrbuf_printf(wr, ">\n");
536 for (s = n->u.datafield.subfields; s; s = s->next)
538 /* if identifier length is 2 (most MARCs),
539 the code is a single character .. However we've
540 seen multibyte codes, so see how big it really is */
541 size_t using_code_len =
542 (identifier_length != 2) ? identifier_length - 1
544 cdata_one_character(mt, s->code_data);
546 wrbuf_iconv_puts(wr, mt->iconv_cd, " <subfield code=\"");
547 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
548 s->code_data, using_code_len);
549 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
550 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
551 s->code_data + using_code_len,
552 strlen(s->code_data + using_code_len));
553 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
554 wrbuf_puts(wr, "\n");
556 wrbuf_printf(wr, " </datafield>\n");
558 case YAZ_MARC_CONTROLFIELD:
559 wrbuf_printf(wr, " <controlfield tag=\"");
560 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
561 strlen(n->u.controlfield.tag));
562 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
563 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
564 wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
565 wrbuf_puts(wr, "\n");
567 case YAZ_MARC_COMMENT:
568 wrbuf_printf(wr, "<!-- ");
569 wrbuf_puts(wr, n->u.comment);
570 wrbuf_printf(wr, " -->\n");
572 case YAZ_MARC_LEADER:
573 wrbuf_printf(wr, " <leader>");
574 wrbuf_iconv_write_cdata(wr,
575 0 /* no charset conversion for leader */,
576 n->u.leader, strlen(n->u.leader));
577 wrbuf_printf(wr, "</leader>\n");
580 wrbuf_puts(wr, "</record>\n");
584 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
586 if (!mt->leader_spec)
587 yaz_marc_modify_leader(mt, 9, "a");
588 return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
592 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
596 return yaz_marc_write_marcxml_ns(mt, wr,
597 "http://www.bs.dk/standards/MarcXchange",
601 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
603 struct yaz_marc_node *n;
604 int indicator_length;
605 int identifier_length;
606 int length_data_entry;
608 int length_implementation;
610 const char *leader = 0;
611 WRBUF wr_dir, wr_head, wr_data_tmp;
614 for (n = mt->nodes; n; n = n->next)
615 if (n->which == YAZ_MARC_LEADER)
616 leader = n->u.leader;
620 if (!atoi_n_check(leader+10, 1, &indicator_length))
622 if (!atoi_n_check(leader+11, 1, &identifier_length))
624 if (!atoi_n_check(leader+20, 1, &length_data_entry))
626 if (!atoi_n_check(leader+21, 1, &length_starting))
628 if (!atoi_n_check(leader+22, 1, &length_implementation))
631 wr_data_tmp = wrbuf_alloc();
632 wr_dir = wrbuf_alloc();
633 for (n = mt->nodes; n; n = n->next)
636 struct yaz_marc_subfield *s;
640 case YAZ_MARC_DATAFIELD:
641 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
642 data_length += indicator_length;
643 wrbuf_rewind(wr_data_tmp);
644 for (s = n->u.datafield.subfields; s; s = s->next)
646 /* write dummy IDFS + content */
647 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
648 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
650 /* write dummy FS (makes MARC-8 to become ASCII) */
651 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
652 data_length += wrbuf_len(wr_data_tmp);
654 case YAZ_MARC_CONTROLFIELD:
655 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
657 wrbuf_rewind(wr_data_tmp);
658 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
659 n->u.controlfield.data);
660 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
661 data_length += wrbuf_len(wr_data_tmp);
663 case YAZ_MARC_COMMENT:
665 case YAZ_MARC_LEADER:
670 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
671 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
672 data_offset += data_length;
675 /* mark end of directory */
676 wrbuf_putc(wr_dir, ISO2709_FS);
678 /* base address of data (comes after leader+directory) */
679 base_address = 24 + wrbuf_len(wr_dir);
681 wr_head = wrbuf_alloc();
683 /* write record length */
684 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
685 /* from "original" leader */
686 wrbuf_write(wr_head, leader+5, 7);
687 /* base address of data */
688 wrbuf_printf(wr_head, "%05d", base_address);
689 /* from "original" leader */
690 wrbuf_write(wr_head, leader+17, 7);
692 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
693 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
694 wrbuf_free(wr_head, 1);
695 wrbuf_free(wr_dir, 1);
696 wrbuf_free(wr_data_tmp, 1);
698 for (n = mt->nodes; n; n = n->next)
700 struct yaz_marc_subfield *s;
704 case YAZ_MARC_DATAFIELD:
705 wrbuf_printf(wr, "%.*s", indicator_length,
706 n->u.datafield.indicator);
707 for (s = n->u.datafield.subfields; s; s = s->next)
709 wrbuf_putc(wr, ISO2709_IDFS);
710 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
711 /* write dummy blank - makes MARC-8 to become ASCII */
712 wrbuf_iconv_putchar(wr, mt->iconv_cd, ' ');
715 wrbuf_putc(wr, ISO2709_FS);
717 case YAZ_MARC_CONTROLFIELD:
718 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
719 /* write dummy blank - makes MARC-8 to become ASCII */
720 wrbuf_iconv_putchar(wr, mt->iconv_cd, ' ');
722 wrbuf_putc(wr, ISO2709_FS);
724 case YAZ_MARC_COMMENT:
726 case YAZ_MARC_LEADER:
730 wrbuf_printf(wr, "%c", ISO2709_RS);
735 int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
737 for (; ptr; ptr = ptr->next)
739 if (ptr->type == XML_ELEMENT_NODE)
741 if (!strcmp((const char *) ptr->name, "subfield"))
743 size_t ctrl_data_len = 0;
744 char *ctrl_data_buf = 0;
745 const xmlNode *p = 0, *ptr_code = 0;
746 struct _xmlAttr *attr;
747 for (attr = ptr->properties; attr; attr = attr->next)
748 if (!strcmp((const char *)attr->name, "code"))
749 ptr_code = attr->children;
753 mt, "Bad attribute '%.80s' for 'subfield'",
760 mt, "Missing attribute 'code' for 'subfield'" );
763 if (ptr_code->type == XML_TEXT_NODE)
766 strlen((const char *)ptr_code->content);
771 mt, "Missing value for 'code' in 'subfield'" );
774 for (p = ptr->children; p ; p = p->next)
775 if (p->type == XML_TEXT_NODE)
776 ctrl_data_len += strlen((const char *)p->content);
777 ctrl_data_buf = nmem_malloc(mt->nmem, ctrl_data_len+1);
778 strcpy(ctrl_data_buf, (const char *)ptr_code->content);
779 for (p = ptr->children; p ; p = p->next)
780 if (p->type == XML_TEXT_NODE)
781 strcat(ctrl_data_buf, (const char *)p->content);
782 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
787 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
795 static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
797 int indicator_length;
798 int identifier_length;
800 int length_data_entry;
802 int length_implementation;
803 const char *leader = 0;
804 const xmlNode *ptr = *ptr_p;
806 for(; ptr; ptr = ptr->next)
807 if (ptr->type == XML_ELEMENT_NODE)
809 if (!strcmp((const char *) ptr->name, "leader"))
811 xmlNode *p = ptr->children;
812 for(; p; p = p->next)
813 if (p->type == XML_TEXT_NODE)
814 leader = (const char *) p->content;
820 mt, "Expected element 'leader', got '%.80s'", ptr->name);
826 yaz_marc_cprintf(mt, "Missing element 'leader'");
829 if (strlen(leader) != 24)
831 yaz_marc_cprintf(mt, "Bad length %d of leader data."
832 " Must have length of 24 characters", strlen(leader));
835 yaz_marc_read_leader(mt, leader,
841 &length_implementation);
846 static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
848 for(; ptr; ptr = ptr->next)
849 if (ptr->type == XML_ELEMENT_NODE)
851 if (!strcmp((const char *) ptr->name, "controlfield"))
853 const xmlNode *ptr_tag = 0;
854 struct _xmlAttr *attr;
855 for (attr = ptr->properties; attr; attr = attr->next)
856 if (!strcmp((const char *)attr->name, "tag"))
857 ptr_tag = attr->children;
861 mt, "Bad attribute '%.80s' for 'controlfield'",
868 mt, "Missing attribute 'tag' for 'controlfield'" );
871 yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
873 else if (!strcmp((const char *) ptr->name, "datafield"))
875 char indstr[11]; /* 0(unused), 1,....9, + zero term */
876 const xmlNode *ptr_tag = 0;
877 struct _xmlAttr *attr;
879 for (i = 0; i<11; i++)
881 for (attr = ptr->properties; attr; attr = attr->next)
882 if (!strcmp((const char *)attr->name, "tag"))
883 ptr_tag = attr->children;
884 else if (strlen((const char *)attr->name) == 4 &&
885 !memcmp(attr->name, "ind", 3))
887 int no = atoi((const char *)attr->name+3);
889 && attr->children->type == XML_TEXT_NODE)
890 indstr[no] = attr->children->content[0];
895 mt, "Bad attribute '%.80s' for 'datafield'",
902 mt, "Missing attribute 'tag' for 'datafield'" );
905 /* note that indstr[0] is unused so we use indstr[1..] */
906 yaz_marc_add_datafield_xml(mt, ptr_tag,
907 indstr+1, strlen(indstr+1));
909 if (yaz_marc_read_xml_subfields(mt, ptr->children))
915 "Expected element controlfield or datafield,"
916 " got %.80s", ptr->name);
923 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
925 const xmlNode *ptr = xmlnode;
926 for(; ptr; ptr = ptr->next)
927 if (ptr->type == XML_ELEMENT_NODE)
929 if (!strcmp((const char *) ptr->name, "record"))
934 mt, "Unknown element '%.80s' in MARC XML reader",
941 yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
944 /* ptr points to record node now */
946 if (yaz_marc_read_xml_leader(mt, &ptr))
948 return yaz_marc_read_xml_fields(mt, ptr->next);
951 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
957 int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
961 int indicator_length;
962 int identifier_length;
963 int end_of_directory;
965 int length_data_entry;
967 int length_implementation;
971 record_length = atoi_n (buf, 5);
972 if (record_length < 25)
974 yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
977 /* ballout if bsize is known and record_length is less than that */
978 if (bsize != -1 && record_length > bsize)
980 yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
981 record_length, bsize);
985 yaz_marc_cprintf(mt, "Record length %5d", record_length);
987 yaz_marc_read_leader(mt, buf,
993 &length_implementation);
995 /* First pass. determine length of directory & base of data */
996 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
998 /* length of directory entry */
999 int l = 3 + length_data_entry + length_starting;
1000 if (entry_p + l >= record_length)
1002 yaz_marc_cprintf(mt, "Directory offset %d: end of record."
1003 " Missing FS char", entry_p);
1008 yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s",
1009 entry_p, buf+entry_p);
1011 /* Check for digits in length info */
1013 if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
1017 /* Not all digits, so stop directory scan */
1018 yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
1019 " length and/or length starting", entry_p);
1022 entry_p += 3 + length_data_entry + length_starting;
1024 end_of_directory = entry_p;
1025 if (base_address != entry_p+1)
1027 yaz_marc_cprintf(mt, "Base address not at end of directory,"
1028 " base %d, end %d", base_address, entry_p+1);
1031 /* Second pass. parse control - and datafields */
1032 for (entry_p = 24; entry_p != end_of_directory; )
1039 int identifier_flag = 0;
1040 int entry_p0 = entry_p;
1042 memcpy (tag, buf+entry_p, 3);
1045 data_length = atoi_n(buf+entry_p, length_data_entry);
1046 entry_p += length_data_entry;
1047 data_offset = atoi_n(buf+entry_p, length_starting);
1048 entry_p += length_starting;
1049 i = data_offset + base_address;
1050 end_offset = i+data_length-1;
1052 if (data_length <= 0 || data_offset < 0)
1057 yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
1059 tag, entry_p0, data_length, data_offset);
1061 if (end_offset >= record_length)
1063 yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
1064 entry_p0, end_offset, record_length);
1068 if (memcmp (tag, "00", 2))
1069 identifier_flag = 1; /* if not 00X assume subfields */
1070 else if (indicator_length < 4 && indicator_length > 0)
1072 /* Danmarc 00X have subfields */
1073 if (buf[i + indicator_length] == ISO2709_IDFS)
1074 identifier_flag = 1;
1075 else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
1076 identifier_flag = 2;
1079 if (identifier_flag)
1082 i += identifier_flag-1;
1083 yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
1084 i += indicator_length;
1086 while (i < end_offset &&
1087 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1089 int code_offset = i+1;
1092 while (i < end_offset &&
1093 buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
1094 buf[i] != ISO2709_FS)
1096 yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
1103 while (i < end_offset &&
1104 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1106 yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
1110 yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
1113 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1115 yaz_marc_cprintf(mt, "No separator at end of field length=%d",
1119 return record_length;
1122 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1124 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1127 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1129 return -1; /* error */
1130 return r; /* OK, return length > 0 */
1133 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1134 char **result, int *rsize)
1138 wrbuf_rewind(mt->m_wr);
1139 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1141 *result = wrbuf_buf(mt->m_wr);
1143 *rsize = wrbuf_len(mt->m_wr);
1147 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1153 void yaz_marc_debug(yaz_marc_t mt, int level)
1159 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1164 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1166 struct yaz_marc_node *n;
1168 for (n = mt->nodes; n; n = n->next)
1169 if (n->which == YAZ_MARC_LEADER)
1171 leader = n->u.leader;
1172 memcpy(leader+off, str, strlen(str));
1178 int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
1180 yaz_marc_t mt = yaz_marc_create();
1185 r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr);
1186 yaz_marc_destroy(mt);
1191 int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize)
1193 return yaz_marc_decode(buf, wr, debug, bsize, 0);
1197 int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
1199 yaz_marc_t mt = yaz_marc_create();
1203 r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr);
1207 fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf);
1208 yaz_marc_destroy(mt);
1213 int marc_display_ex (const char *buf, FILE *outf, int debug)
1215 return marc_display_exl (buf, outf, debug, -1);
1219 int marc_display (const char *buf, FILE *outf)
1221 return marc_display_ex (buf, outf, 0);
1224 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1226 xfree(mt->leader_spec);
1227 mt->leader_spec = 0;
1230 char dummy_leader[24];
1231 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1233 mt->leader_spec = xstrdup(leader_spec);
1238 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1240 const char *cp = leader_spec;
1245 int no_read = 0, no = 0;
1247 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1248 if (no < 2 || no_read < 3)
1250 if (pos < 0 || pos >= size)
1255 const char *vp = strchr(val+1, '\'');
1261 if (len + pos > size)
1263 memcpy(leader + pos, val+1, len);
1265 else if (*val >= '0' && *val <= '9')
1285 * indent-tabs-mode: nil
1287 * vim: shiftwidth=4 tabstop=8 expandtab