1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
35 enum yaz_collection_state {
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
45 YAZ_MARC_CONTROLFIELD,
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
54 struct yaz_marc_subfield *subfields;
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
63 /** \brief a comment node */
64 struct yaz_marc_comment {
68 /** \brief MARC node */
69 struct yaz_marc_node {
70 enum YAZ_MARC_NODE_TYPE which;
72 struct yaz_marc_datafield datafield;
73 struct yaz_marc_controlfield controlfield;
77 struct yaz_marc_node *next;
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
83 struct yaz_marc_subfield *next;
86 /** \brief the internals of a yaz_marc_t handle */
93 int write_using_libxml2;
94 enum yaz_collection_state enable_collection;
99 struct yaz_marc_node *nodes;
100 struct yaz_marc_node **nodes_pp;
101 struct yaz_marc_subfield **subfield_pp;
104 yaz_marc_t yaz_marc_create(void)
106 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
107 mt->output_format = YAZ_MARC_LINE;
109 mt->write_using_libxml2 = 0;
110 mt->enable_collection = no_collection;
111 mt->m_wr = wrbuf_alloc();
114 strcpy(mt->subfield_str, " $");
115 strcpy(mt->endline_str, "\n");
117 mt->nmem = nmem_create();
122 void yaz_marc_destroy(yaz_marc_t mt)
126 nmem_destroy(mt->nmem);
127 wrbuf_destroy(mt->m_wr);
128 xfree(mt->leader_spec);
132 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
137 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
139 wrbuf_iconv_reset(wr, mt->iconv_cd);
142 static int marc_exec_leader(const char *leader_spec, char *leader,
146 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
148 struct yaz_marc_node *n = (struct yaz_marc_node *)
149 nmem_malloc(mt->nmem, sizeof(*n));
152 mt->nodes_pp = &n->next;
157 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
158 const xmlNode *ptr_data)
160 struct yaz_marc_node *n = yaz_marc_add_node(mt);
161 n->which = YAZ_MARC_CONTROLFIELD;
162 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
163 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
166 void yaz_marc_add_controlfield_turbo_xml(yaz_marc_t mt, const char *tag,
167 const xmlNode *ptr_data)
169 struct yaz_marc_node *n = yaz_marc_add_node(mt);
170 n->which = YAZ_MARC_CONTROLFIELD;
171 n->u.controlfield.tag = tag;
172 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
178 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
180 struct yaz_marc_node *n = yaz_marc_add_node(mt);
181 n->which = YAZ_MARC_COMMENT;
182 n->u.comment = nmem_strdup(mt->nmem, comment);
185 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
191 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
192 yaz_marc_add_comment(mt, buf);
196 int yaz_marc_get_debug(yaz_marc_t mt)
201 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
203 struct yaz_marc_node *n = yaz_marc_add_node(mt);
204 n->which = YAZ_MARC_LEADER;
205 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
206 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
209 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
210 const char *data, size_t data_len)
212 struct yaz_marc_node *n = yaz_marc_add_node(mt);
213 n->which = YAZ_MARC_CONTROLFIELD;
214 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
215 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
221 sprintf(msg, "controlfield:");
222 for (i = 0; i < 16 && i < data_len; i++)
223 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
225 sprintf(msg + strlen(msg), " ..");
226 yaz_marc_add_comment(mt, msg);
230 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
231 const char *indicator, size_t indicator_len)
233 struct yaz_marc_node *n = yaz_marc_add_node(mt);
234 n->which = YAZ_MARC_DATAFIELD;
235 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
236 n->u.datafield.indicator =
237 nmem_strdupn(mt->nmem, indicator, indicator_len);
238 n->u.datafield.subfields = 0;
240 /* make subfield_pp the current (last one) */
241 mt->subfield_pp = &n->u.datafield.subfields;
245 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
246 const char *indicator, size_t indicator_len)
248 struct yaz_marc_node *n = yaz_marc_add_node(mt);
249 n->which = YAZ_MARC_DATAFIELD;
250 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
251 n->u.datafield.indicator =
252 nmem_strdupn(mt->nmem, indicator, indicator_len);
253 n->u.datafield.subfields = 0;
255 /* make subfield_pp the current (last one) */
256 mt->subfield_pp = &n->u.datafield.subfields;
259 struct yaz_marc_node* yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value)
261 struct yaz_marc_node *n = yaz_marc_add_node(mt);
262 n->which = YAZ_MARC_DATAFIELD;
263 n->u.datafield.tag = tag_value;
264 n->u.datafield.indicator = 0;
265 n->u.datafield.subfields = 0;
267 /* make subfield_pp the current (last one) */
268 mt->subfield_pp = &n->u.datafield.subfields;
272 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
274 n->u.datafield.indicator = indicator;
279 void yaz_marc_add_subfield(yaz_marc_t mt,
280 const char *code_data, size_t code_data_len)
287 sprintf(msg, "subfield:");
288 for (i = 0; i < 16 && i < code_data_len; i++)
289 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
290 if (i < code_data_len)
291 sprintf(msg + strlen(msg), " ..");
292 yaz_marc_add_comment(mt, msg);
297 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
298 nmem_malloc(mt->nmem, sizeof(*n));
299 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
301 /* mark subfield_pp to point to this one, so we append here next */
302 *mt->subfield_pp = n;
303 mt->subfield_pp = &n->next;
307 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
308 int *indicator_length,
309 int *identifier_length,
311 int *length_data_entry,
312 int *length_starting,
313 int *length_implementation)
317 memcpy(leader, leader_c, 24);
319 if (!atoi_n_check(leader+10, 1, indicator_length))
322 "Indicator length at offset 10 should hold a digit."
325 *indicator_length = 2;
327 if (!atoi_n_check(leader+11, 1, identifier_length))
330 "Identifier length at offset 11 should hold a digit."
333 *identifier_length = 2;
335 if (!atoi_n_check(leader+12, 5, base_address))
338 "Base address at offsets 12..16 should hold a number."
342 if (!atoi_n_check(leader+20, 1, length_data_entry))
345 "Length data entry at offset 20 should hold a digit."
347 *length_data_entry = 4;
350 if (!atoi_n_check(leader+21, 1, length_starting))
353 "Length starting at offset 21 should hold a digit."
355 *length_starting = 5;
358 if (!atoi_n_check(leader+22, 1, length_implementation))
361 "Length implementation at offset 22 should hold a digit."
363 *length_implementation = 0;
369 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
370 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
371 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
372 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
373 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
374 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
376 yaz_marc_add_leader(mt, leader, 24);
379 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
381 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
382 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
385 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
387 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
388 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
391 /* try to guess how many bytes the identifier really is! */
392 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
397 for (i = 1; i<5; i++)
400 size_t outbytesleft = sizeof(outbuf);
402 const char *inp = buf;
404 size_t inbytesleft = i;
405 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
406 &outp, &outbytesleft);
407 if (r != (size_t) (-1))
408 return i; /* got a complete sequence */
410 return 1; /* giving up */
412 return 1; /* we don't know */
415 void yaz_marc_reset(yaz_marc_t mt)
417 nmem_reset(mt->nmem);
419 mt->nodes_pp = &mt->nodes;
423 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
425 struct yaz_marc_node *n;
426 int identifier_length;
427 const char *leader = 0;
429 for (n = mt->nodes; n; n = n->next)
430 if (n->which == YAZ_MARC_LEADER)
432 leader = n->u.leader;
438 if (!atoi_n_check(leader+11, 1, &identifier_length))
441 for (n = mt->nodes; n; n = n->next)
445 case YAZ_MARC_COMMENT:
446 wrbuf_iconv_write(wr, mt->iconv_cd,
447 n->u.comment, strlen(n->u.comment));
448 wrbuf_puts(wr, "\n");
457 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
458 int identifier_length)
460 /* if identifier length is 2 (most MARCs) or less (probably an error),
461 the code is a single character .. However we've
462 seen multibyte codes, so see how big it really is */
463 if (identifier_length > 2)
464 return identifier_length - 1;
466 return cdata_one_character(mt, data);
469 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
471 struct yaz_marc_node *n;
472 int identifier_length;
473 const char *leader = 0;
475 for (n = mt->nodes; n; n = n->next)
476 if (n->which == YAZ_MARC_LEADER)
478 leader = n->u.leader;
484 if (!atoi_n_check(leader+11, 1, &identifier_length))
487 for (n = mt->nodes; n; n = n->next)
489 struct yaz_marc_subfield *s;
492 case YAZ_MARC_DATAFIELD:
493 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
494 n->u.datafield.indicator);
495 for (s = n->u.datafield.subfields; s; s = s->next)
497 size_t using_code_len = get_subfield_len(mt, s->code_data,
500 wrbuf_puts (wr, mt->subfield_str);
501 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
503 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
504 wrbuf_iconv_puts(wr, mt->iconv_cd,
505 s->code_data + using_code_len);
506 marc_iconv_reset(mt, wr);
508 wrbuf_puts (wr, mt->endline_str);
510 case YAZ_MARC_CONTROLFIELD:
511 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
512 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
513 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
514 marc_iconv_reset(mt, wr);
515 wrbuf_puts (wr, mt->endline_str);
517 case YAZ_MARC_COMMENT:
519 wrbuf_iconv_write(wr, mt->iconv_cd,
520 n->u.comment, strlen(n->u.comment));
521 marc_iconv_reset(mt, wr);
522 wrbuf_puts(wr, ")\n");
524 case YAZ_MARC_LEADER:
525 wrbuf_printf(wr, "%s\n", n->u.leader);
528 wrbuf_puts(wr, "\n");
532 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
534 if (mt->enable_collection == collection_second)
536 switch(mt->output_format)
538 case YAZ_MARC_MARCXML:
539 case YAZ_MARC_TMARCXML:
540 wrbuf_printf(wr, "</collection>\n");
542 case YAZ_MARC_XCHANGE:
543 wrbuf_printf(wr, "</collection>\n");
550 void yaz_marc_enable_collection(yaz_marc_t mt)
552 mt->enable_collection = collection_first;
555 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
557 switch(mt->output_format)
560 return yaz_marc_write_line(mt, wr);
561 case YAZ_MARC_MARCXML:
562 case YAZ_MARC_TMARCXML:
563 return yaz_marc_write_marcxml(mt, wr);
564 case YAZ_MARC_XCHANGE:
565 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
566 case YAZ_MARC_ISO2709:
567 return yaz_marc_write_iso2709(mt, wr);
569 return yaz_marc_write_check(mt, wr);
574 const char *collection_name[2] = { "collection", "collection"};
575 const char *record_name[2] = { "record", "r"};
576 const char *leader_name[2] = { "leader", "l"};
577 const char *controlfield_name[2]= { "controlfield", "c"};
578 const char *datafield_name[2] = { "datafield", "d"};
579 const char *subfield_name[2] = { "subfield", "s"};
582 /** \brief common MARC XML/Xchange writer
584 \param wr WRBUF output
585 \param ns XMLNS for the elements
586 \param format record format (e.g. "MARC21")
587 \param type record type (e.g. "Bibliographic")
589 static int yaz_marc_write_marcxml_ns2(yaz_marc_t mt, WRBUF wr,
594 struct yaz_marc_node *n;
595 int identifier_length;
596 const char *leader = 0;
598 int turbo = yaz_marc_get_write_format(mt) == YAZ_MARC_TMARCXML;
600 for (n = mt->nodes; n; n = n->next)
601 if (n->which == YAZ_MARC_LEADER)
603 leader = n->u.leader;
609 if (!atoi_n_check(leader+11, 1, &identifier_length))
612 if (mt->enable_collection != no_collection)
614 if (mt->enable_collection == collection_first)
615 wrbuf_printf(wr, "<%s xmlns=\"%s\">\n", collection_name[turbo], ns);
616 mt->enable_collection = collection_second;
617 wrbuf_printf(wr, "<%s", record_name[turbo]);
621 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
624 wrbuf_printf(wr, " format=\"%.80s\"", format);
626 wrbuf_printf(wr, " type=\"%.80s\"", type);
627 wrbuf_printf(wr, ">\n");
628 for (n = mt->nodes; n; n = n->next)
630 struct yaz_marc_subfield *s;
634 case YAZ_MARC_DATAFIELD:
635 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
637 wrbuf_printf(wr, " tag=\"");
638 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
639 strlen(n->u.datafield.tag));
640 wrbuf_printf(wr, "\"");
641 if (n->u.datafield.indicator)
644 for (i = 0; n->u.datafield.indicator[i]; i++)
646 wrbuf_printf(wr, " ind%d=\"", i+1);
647 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
648 n->u.datafield.indicator+i, 1);
649 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
652 wrbuf_printf(wr, ">\n");
654 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
655 strlen(n->u.datafield.tag));
657 wrbuf_printf(wr, ">\n");
658 if (n->u.datafield.indicator)
661 for (i = 0; n->u.datafield.indicator[i]; i++)
663 wrbuf_printf(wr, " <i%d>", i+1);
664 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
665 n->u.datafield.indicator+i, 1);
666 wrbuf_printf(wr, "</i%d>", i+1);
667 wrbuf_puts(wr, "\n");
671 for (s = n->u.datafield.subfields; s; s = s->next)
673 size_t using_code_len = get_subfield_len(mt, s->code_data,
675 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
677 wrbuf_printf(wr, " code=\"");
678 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
679 s->code_data, using_code_len);
680 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
682 // TODO check this. encode special characters.
683 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
684 s->code_data, using_code_len);
685 wrbuf_puts(wr, ">\n");
687 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
688 s->code_data + using_code_len,
689 strlen(s->code_data + using_code_len));
690 marc_iconv_reset(mt, wr);
691 wrbuf_printf(wr, "</%s>", subfield_name[turbo]);
692 wrbuf_puts(wr, "\n");
694 wrbuf_printf(wr, " </%s>\n", datafield_name[turbo]);
696 case YAZ_MARC_CONTROLFIELD:
697 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
699 wrbuf_printf(wr, " tag=\"");
700 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
701 strlen(n->u.controlfield.tag));
702 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
705 //TODO convert special
706 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
707 strlen(n->u.controlfield.tag));
708 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
710 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
711 n->u.controlfield.data,
712 strlen(n->u.controlfield.data));
713 marc_iconv_reset(mt, wr);
714 wrbuf_printf(wr, "</%s>", controlfield_name[turbo]);
715 wrbuf_puts(wr, "\n");
717 case YAZ_MARC_COMMENT:
718 wrbuf_printf(wr, "<!-- ");
719 wrbuf_puts(wr, n->u.comment);
720 wrbuf_printf(wr, " -->\n");
722 case YAZ_MARC_LEADER:
723 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
724 wrbuf_iconv_write_cdata(wr,
725 0 /* no charset conversion for leader */,
726 n->u.leader, strlen(n->u.leader));
727 wrbuf_printf(wr, " </%s>", leader_name[turbo]);
730 wrbuf_printf(wr, "</%s", record_name[turbo]);
734 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
739 struct yaz_marc_node *n;
740 int identifier_length;
741 const char *leader = 0;
743 for (n = mt->nodes; n; n = n->next)
744 if (n->which == YAZ_MARC_LEADER)
746 leader = n->u.leader;
752 if (!atoi_n_check(leader+11, 1, &identifier_length))
755 if (mt->enable_collection != no_collection)
757 if (mt->enable_collection == collection_first)
758 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
759 mt->enable_collection = collection_second;
760 wrbuf_printf(wr, "<record");
764 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
767 wrbuf_printf(wr, " format=\"%.80s\"", format);
769 wrbuf_printf(wr, " type=\"%.80s\"", type);
770 wrbuf_printf(wr, ">\n");
771 for (n = mt->nodes; n; n = n->next)
773 struct yaz_marc_subfield *s;
777 case YAZ_MARC_DATAFIELD:
778 wrbuf_printf(wr, " <datafield tag=\"");
779 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
780 strlen(n->u.datafield.tag));
781 wrbuf_printf(wr, "\"");
782 if (n->u.datafield.indicator)
785 for (i = 0; n->u.datafield.indicator[i]; i++)
787 wrbuf_printf(wr, " ind%d=\"", i+1);
788 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
789 n->u.datafield.indicator+i, 1);
790 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
793 wrbuf_printf(wr, ">\n");
794 for (s = n->u.datafield.subfields; s; s = s->next)
796 size_t using_code_len = get_subfield_len(mt, s->code_data,
798 wrbuf_iconv_puts(wr, mt->iconv_cd, " <subfield code=\"");
799 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
800 s->code_data, using_code_len);
801 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
802 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
803 s->code_data + using_code_len,
804 strlen(s->code_data + using_code_len));
805 marc_iconv_reset(mt, wr);
806 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
807 wrbuf_puts(wr, "\n");
809 wrbuf_printf(wr, " </datafield>\n");
811 case YAZ_MARC_CONTROLFIELD:
812 wrbuf_printf(wr, " <controlfield tag=\"");
813 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
814 strlen(n->u.controlfield.tag));
815 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
816 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
817 n->u.controlfield.data,
818 strlen(n->u.controlfield.data));
820 marc_iconv_reset(mt, wr);
821 wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
822 wrbuf_puts(wr, "\n");
824 case YAZ_MARC_COMMENT:
825 wrbuf_printf(wr, "<!-- ");
826 wrbuf_puts(wr, n->u.comment);
827 wrbuf_printf(wr, " -->\n");
829 case YAZ_MARC_LEADER:
830 wrbuf_printf(wr, " <leader>");
831 wrbuf_iconv_write_cdata(wr,
832 0 /* no charset conversion for leader */,
833 n->u.leader, strlen(n->u.leader));
834 wrbuf_printf(wr, "</leader>\n");
837 wrbuf_puts(wr, "</record>\n");
842 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
847 if (mt->write_using_libxml2)
853 if (yaz_marc_get_write_format(mt) == YAZ_MARC_MARCXML)
854 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
855 else // Check for Turbo XML
856 ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type);
860 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
863 xmlDocSetRootElement(doc, root_ptr);
864 xmlDocDumpMemory(doc, &buf_out, &len_out);
866 wrbuf_write(wr, (const char *) buf_out, len_out);
877 return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
880 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
882 /* set leader 09 to 'a' for UNICODE */
883 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
884 if (!mt->leader_spec)
885 yaz_marc_modify_leader(mt, 9, "a");
886 char *name_space = "http://www.loc.gov/MARC21/slim";
887 if (mt->output_format == YAZ_MARC_TMARCXML)
888 name_space = "http://www.indexdata.com/MARC21/turboxml";
889 return yaz_marc_write_marcxml_ns(mt, wr, name_space,
893 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
897 return yaz_marc_write_marcxml_ns(mt, wr,
898 "info:lc/xmlns/marcxchange-v1",
904 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length)
907 struct yaz_marc_subfield *s;
908 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
910 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
911 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
914 //TODO consider if safe
917 strncpy(field + 1, n->u.datafield.tag, 3);
919 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
921 if (n->u.datafield.indicator)
924 for (i = 0; n->u.datafield.indicator[i]; i++)
929 ind_val[0] = n->u.datafield.indicator[i];
932 sprintf(ind_str, "ind%d", i+1);
933 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
936 sprintf(ind_str, "i%d", i+1);
937 xmlNewTextChild(ptr, ns_record, BAD_CAST ind_str, BAD_CAST ind_val);
941 WRBUF subfield_name = wrbuf_alloc();
942 for (s = n->u.datafield.subfields; s; s = s->next)
944 xmlNode *ptr_subfield;
945 size_t using_code_len = get_subfield_len(mt, s->code_data,
947 wrbuf_rewind(wr_cdata);
948 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
949 marc_iconv_reset(mt, wr_cdata);
952 ptr_subfield = xmlNewTextChild(
954 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
955 wrbuf_rewind(wr_cdata);
956 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
957 xmlNewProp(ptr_subfield, BAD_CAST "code",
958 BAD_CAST wrbuf_cstr(wr_cdata));
960 else { // Turbo format
961 wrbuf_rewind(subfield_name);
962 wrbuf_puts(subfield_name, "s");
963 // TODO Map special codes to something possible for XML ELEMENT names
964 if ((s->code_data[0] >= '0' && s->code_data[0] <= '9') ||
965 (s->code_data[0] >= 'a' && s->code_data[0] <= 'z') ||
966 (s->code_data[0] >= 'A' && s->code_data[0] <= 'Z'))
968 wrbuf_iconv_write(subfield_name, mt->iconv_cd,s->code_data, using_code_len);
971 char buffer[2*using_code_len + 1];
973 for (index = 0; index < using_code_len; index++) {
974 sprintf(buffer + 2*index, "%02X", (unsigned char) s->code_data[index] & 0xFF);
976 buffer[2*(index+1)] = 0;
977 wrbuf_puts(subfield_name, "-");
978 wrbuf_puts(subfield_name, buffer);
979 yaz_log(YLOG_WARN, "Using numeric value in element name: %s", buffer);
981 ptr_subfield = xmlNewTextChild(ptr, ns_record,
982 BAD_CAST wrbuf_cstr(subfield_name),
983 BAD_CAST wrbuf_cstr(wr_cdata));
986 wrbuf_destroy(subfield_name);
989 int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
994 struct yaz_marc_node *n;
995 int identifier_length;
996 const char *leader = 0;
1000 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
1001 for (n = mt->nodes; n; n = n->next)
1002 if (n->which == YAZ_MARC_LEADER)
1004 leader = n->u.leader;
1010 if (!atoi_n_check(leader+11, 1, &identifier_length))
1013 wr_cdata = wrbuf_alloc();
1015 record_ptr = xmlNewNode(0, BAD_CAST "r");
1016 *root_ptr = record_ptr;
1018 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1019 xmlSetNs(record_ptr, ns_record);
1022 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1024 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1025 for (n = mt->nodes; n; n = n->next)
1027 struct yaz_marc_subfield *s;
1032 case YAZ_MARC_DATAFIELD:
1033 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
1035 case YAZ_MARC_CONTROLFIELD:
1036 wrbuf_rewind(wr_cdata);
1037 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1038 marc_iconv_reset(mt, wr_cdata);
1041 ptr = xmlNewTextChild(record_ptr, ns_record,
1042 BAD_CAST "controlfield",
1043 BAD_CAST wrbuf_cstr(wr_cdata));
1044 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1047 // TODO required iconv?
1050 strncpy(field + 1, n->u.controlfield.tag, 3);
1052 ptr = xmlNewTextChild(record_ptr, ns_record,
1054 BAD_CAST wrbuf_cstr(wr_cdata));
1058 case YAZ_MARC_COMMENT:
1059 ptr = xmlNewComment(BAD_CAST n->u.comment);
1060 xmlAddChild(record_ptr, ptr);
1062 case YAZ_MARC_LEADER:
1064 char *field = "leader";
1067 xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
1068 BAD_CAST n->u.leader);
1073 wrbuf_destroy(wr_cdata);
1078 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
1083 struct yaz_marc_node *n;
1084 int identifier_length;
1085 const char *leader = 0;
1086 xmlNode *record_ptr;
1090 for (n = mt->nodes; n; n = n->next)
1091 if (n->which == YAZ_MARC_LEADER)
1093 leader = n->u.leader;
1099 if (!atoi_n_check(leader+11, 1, &identifier_length))
1102 wr_cdata = wrbuf_alloc();
1104 record_ptr = xmlNewNode(0, BAD_CAST "record");
1105 *root_ptr = record_ptr;
1107 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1108 xmlSetNs(record_ptr, ns_record);
1111 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1113 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1114 for (n = mt->nodes; n; n = n->next)
1116 struct yaz_marc_subfield *s;
1121 case YAZ_MARC_DATAFIELD:
1122 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1123 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1124 if (n->u.datafield.indicator)
1127 for (i = 0; n->u.datafield.indicator[i]; i++)
1132 sprintf(ind_str, "ind%d", i+1);
1133 ind_val[0] = n->u.datafield.indicator[i];
1135 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1138 for (s = n->u.datafield.subfields; s; s = s->next)
1140 xmlNode *ptr_subfield;
1141 size_t using_code_len = get_subfield_len(mt, s->code_data,
1143 wrbuf_rewind(wr_cdata);
1144 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1145 s->code_data + using_code_len);
1146 marc_iconv_reset(mt, wr_cdata);
1147 ptr_subfield = xmlNewTextChild(
1149 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1151 wrbuf_rewind(wr_cdata);
1152 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1153 s->code_data, using_code_len);
1154 xmlNewProp(ptr_subfield, BAD_CAST "code",
1155 BAD_CAST wrbuf_cstr(wr_cdata));
1158 case YAZ_MARC_CONTROLFIELD:
1159 wrbuf_rewind(wr_cdata);
1160 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1161 marc_iconv_reset(mt, wr_cdata);
1163 ptr = xmlNewTextChild(record_ptr, ns_record,
1164 BAD_CAST "controlfield",
1165 BAD_CAST wrbuf_cstr(wr_cdata));
1167 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1169 case YAZ_MARC_COMMENT:
1170 ptr = xmlNewComment(BAD_CAST n->u.comment);
1171 xmlAddChild(record_ptr, ptr);
1173 case YAZ_MARC_LEADER:
1174 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1175 BAD_CAST n->u.leader);
1179 wrbuf_destroy(wr_cdata);
1188 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1190 struct yaz_marc_node *n;
1191 int indicator_length;
1192 int identifier_length;
1193 int length_data_entry;
1194 int length_starting;
1195 int length_implementation;
1196 int data_offset = 0;
1197 const char *leader = 0;
1198 WRBUF wr_dir, wr_head, wr_data_tmp;
1201 for (n = mt->nodes; n; n = n->next)
1202 if (n->which == YAZ_MARC_LEADER)
1203 leader = n->u.leader;
1207 if (!atoi_n_check(leader+10, 1, &indicator_length))
1209 if (!atoi_n_check(leader+11, 1, &identifier_length))
1211 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1213 if (!atoi_n_check(leader+21, 1, &length_starting))
1215 if (!atoi_n_check(leader+22, 1, &length_implementation))
1218 wr_data_tmp = wrbuf_alloc();
1219 wr_dir = wrbuf_alloc();
1220 for (n = mt->nodes; n; n = n->next)
1222 int data_length = 0;
1223 struct yaz_marc_subfield *s;
1227 case YAZ_MARC_DATAFIELD:
1228 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1229 data_length += indicator_length;
1230 wrbuf_rewind(wr_data_tmp);
1231 for (s = n->u.datafield.subfields; s; s = s->next)
1233 /* write dummy IDFS + content */
1234 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1235 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1236 marc_iconv_reset(mt, wr_data_tmp);
1238 /* write dummy FS (makes MARC-8 to become ASCII) */
1239 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1240 marc_iconv_reset(mt, wr_data_tmp);
1241 data_length += wrbuf_len(wr_data_tmp);
1243 case YAZ_MARC_CONTROLFIELD:
1244 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1246 wrbuf_rewind(wr_data_tmp);
1247 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1248 n->u.controlfield.data);
1249 marc_iconv_reset(mt, wr_data_tmp);
1250 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1251 marc_iconv_reset(mt, wr_data_tmp);
1252 data_length += wrbuf_len(wr_data_tmp);
1254 case YAZ_MARC_COMMENT:
1256 case YAZ_MARC_LEADER:
1261 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1262 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1263 data_offset += data_length;
1266 /* mark end of directory */
1267 wrbuf_putc(wr_dir, ISO2709_FS);
1269 /* base address of data (comes after leader+directory) */
1270 base_address = 24 + wrbuf_len(wr_dir);
1272 wr_head = wrbuf_alloc();
1274 /* write record length */
1275 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1276 /* from "original" leader */
1277 wrbuf_write(wr_head, leader+5, 7);
1278 /* base address of data */
1279 wrbuf_printf(wr_head, "%05d", base_address);
1280 /* from "original" leader */
1281 wrbuf_write(wr_head, leader+17, 7);
1283 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1284 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1285 wrbuf_destroy(wr_head);
1286 wrbuf_destroy(wr_dir);
1287 wrbuf_destroy(wr_data_tmp);
1289 for (n = mt->nodes; n; n = n->next)
1291 struct yaz_marc_subfield *s;
1295 case YAZ_MARC_DATAFIELD:
1296 wrbuf_printf(wr, "%.*s", indicator_length,
1297 n->u.datafield.indicator);
1298 for (s = n->u.datafield.subfields; s; s = s->next)
1300 wrbuf_putc(wr, ISO2709_IDFS);
1301 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1302 marc_iconv_reset(mt, wr);
1304 wrbuf_putc(wr, ISO2709_FS);
1306 case YAZ_MARC_CONTROLFIELD:
1307 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1308 marc_iconv_reset(mt, wr);
1309 wrbuf_putc(wr, ISO2709_FS);
1311 case YAZ_MARC_COMMENT:
1313 case YAZ_MARC_LEADER:
1317 wrbuf_printf(wr, "%c", ISO2709_RS);
1322 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1324 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1327 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1329 return -1; /* error */
1330 return r; /* OK, return length > 0 */
1333 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1334 const char **result, size_t *rsize)
1338 wrbuf_rewind(mt->m_wr);
1339 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1341 *result = wrbuf_cstr(mt->m_wr);
1343 *rsize = wrbuf_len(mt->m_wr);
1347 void yaz_marc_set_read_format(yaz_marc_t mt, int format)
1350 mt->input_format = format;
1353 int yaz_marc_get_read_format(yaz_marc_t mt)
1356 return mt->input_format;
1361 void yaz_marc_set_write_format(yaz_marc_t mt, int format)
1364 mt->output_format = format;
1365 // Force using libxml2
1366 if (mt->output_format == YAZ_MARC_TMARCXML)
1367 mt->write_using_libxml2 = 1;
1371 int yaz_marc_get_write_format(yaz_marc_t mt)
1374 return mt->output_format;
1380 * Deprecated, use yaz_marc_set_write_format
1382 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1384 yaz_marc_set_write_format(mt, xmlmode);
1389 void yaz_marc_debug(yaz_marc_t mt, int level)
1395 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1400 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1402 return mt->iconv_cd;
1405 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1407 struct yaz_marc_node *n;
1409 for (n = mt->nodes; n; n = n->next)
1410 if (n->which == YAZ_MARC_LEADER)
1412 leader = n->u.leader;
1413 memcpy(leader+off, str, strlen(str));
1418 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1420 xfree(mt->leader_spec);
1421 mt->leader_spec = 0;
1424 char dummy_leader[24];
1425 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1427 mt->leader_spec = xstrdup(leader_spec);
1432 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1434 const char *cp = leader_spec;
1439 int no_read = 0, no = 0;
1441 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1442 if (no < 2 || no_read < 3)
1444 if (pos < 0 || (size_t) pos >= size)
1449 const char *vp = strchr(val+1, '\'');
1455 if (len + pos > size)
1457 memcpy(leader + pos, val+1, len);
1459 else if (*val >= '0' && *val <= '9')
1475 int yaz_marc_decode_formatstr(const char *arg)
1478 if (!strcmp(arg, "marc"))
1479 mode = YAZ_MARC_ISO2709;
1480 if (!strcmp(arg, "marcxml"))
1481 mode = YAZ_MARC_MARCXML;
1482 if (!strcmp(arg, "tmarcxml"))
1483 mode = YAZ_MARC_TMARCXML;
1484 if (!strcmp(arg, "marcxchange"))
1485 mode = YAZ_MARC_XCHANGE;
1486 if (!strcmp(arg, "line"))
1487 mode = YAZ_MARC_LINE;
1491 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1493 mt->write_using_libxml2 = enable;
1496 int yaz_marc_is_turbo_format(yaz_marc_t mt)
1498 return mt->output_format == YAZ_MARC_TMARCXML;
1505 * c-file-style: "Stroustrup"
1506 * indent-tabs-mode: nil
1508 * vim: shiftwidth=4 tabstop=8 expandtab