1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
35 enum yaz_collection_state {
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
45 YAZ_MARC_CONTROLFIELD,
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
54 struct yaz_marc_subfield *subfields;
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
63 /** \brief a comment node */
64 struct yaz_marc_comment {
68 /** \brief MARC node */
69 struct yaz_marc_node {
70 enum YAZ_MARC_NODE_TYPE which;
72 struct yaz_marc_datafield datafield;
73 struct yaz_marc_controlfield controlfield;
77 struct yaz_marc_node *next;
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
83 struct yaz_marc_subfield *next;
86 /** \brief the internals of a yaz_marc_t handle */
93 int write_using_libxml2;
94 enum yaz_collection_state enable_collection;
99 struct yaz_marc_node *nodes;
100 struct yaz_marc_node **nodes_pp;
101 struct yaz_marc_subfield **subfield_pp;
104 yaz_marc_t yaz_marc_create(void)
106 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
107 mt->output_format = YAZ_MARC_LINE;
109 mt->write_using_libxml2 = 0;
110 mt->enable_collection = no_collection;
111 mt->m_wr = wrbuf_alloc();
114 strcpy(mt->subfield_str, " $");
115 strcpy(mt->endline_str, "\n");
117 mt->nmem = nmem_create();
122 void yaz_marc_destroy(yaz_marc_t mt)
126 nmem_destroy(mt->nmem);
127 wrbuf_destroy(mt->m_wr);
128 xfree(mt->leader_spec);
132 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
137 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
139 wrbuf_iconv_reset(wr, mt->iconv_cd);
142 static int marc_exec_leader(const char *leader_spec, char *leader,
146 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
148 struct yaz_marc_node *n = (struct yaz_marc_node *)
149 nmem_malloc(mt->nmem, sizeof(*n));
152 mt->nodes_pp = &n->next;
157 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
158 const xmlNode *ptr_data)
160 struct yaz_marc_node *n = yaz_marc_add_node(mt);
161 n->which = YAZ_MARC_CONTROLFIELD;
162 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
163 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
166 void yaz_marc_add_controlfield_turbo_xml(yaz_marc_t mt, char *tag,
167 const xmlNode *ptr_data)
169 struct yaz_marc_node *n = yaz_marc_add_node(mt);
170 n->which = YAZ_MARC_CONTROLFIELD;
171 n->u.controlfield.tag = tag;
172 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
178 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
180 struct yaz_marc_node *n = yaz_marc_add_node(mt);
181 n->which = YAZ_MARC_COMMENT;
182 n->u.comment = nmem_strdup(mt->nmem, comment);
185 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
191 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
192 yaz_marc_add_comment(mt, buf);
196 int yaz_marc_get_debug(yaz_marc_t mt)
201 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
203 struct yaz_marc_node *n = yaz_marc_add_node(mt);
204 n->which = YAZ_MARC_LEADER;
205 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
206 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
209 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
210 const char *data, size_t data_len)
212 struct yaz_marc_node *n = yaz_marc_add_node(mt);
213 n->which = YAZ_MARC_CONTROLFIELD;
214 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
215 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
221 sprintf(msg, "controlfield:");
222 for (i = 0; i < 16 && i < data_len; i++)
223 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
225 sprintf(msg + strlen(msg), " ..");
226 yaz_marc_add_comment(mt, msg);
230 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
231 const char *indicator, size_t indicator_len)
233 struct yaz_marc_node *n = yaz_marc_add_node(mt);
234 n->which = YAZ_MARC_DATAFIELD;
235 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
236 n->u.datafield.indicator =
237 nmem_strdupn(mt->nmem, indicator, indicator_len);
238 n->u.datafield.subfields = 0;
240 /* make subfield_pp the current (last one) */
241 mt->subfield_pp = &n->u.datafield.subfields;
245 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
246 const char *indicator, size_t indicator_len)
248 struct yaz_marc_node *n = yaz_marc_add_node(mt);
249 n->which = YAZ_MARC_DATAFIELD;
250 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
251 n->u.datafield.indicator =
252 nmem_strdupn(mt->nmem, indicator, indicator_len);
253 n->u.datafield.subfields = 0;
255 /* make subfield_pp the current (last one) */
256 mt->subfield_pp = &n->u.datafield.subfields;
259 struct yaz_marc_node* yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value)
261 struct yaz_marc_node *n = yaz_marc_add_node(mt);
262 n->which = YAZ_MARC_DATAFIELD;
263 n->u.datafield.tag = tag_value;
264 n->u.datafield.indicator = 0;
265 n->u.datafield.subfields = 0;
267 /* make subfield_pp the current (last one) */
268 mt->subfield_pp = &n->u.datafield.subfields;
272 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
274 n->u.datafield.indicator = indicator;
279 void yaz_marc_add_subfield(yaz_marc_t mt,
280 const char *code_data, size_t code_data_len)
287 sprintf(msg, "subfield:");
288 for (i = 0; i < 16 && i < code_data_len; i++)
289 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
290 if (i < code_data_len)
291 sprintf(msg + strlen(msg), " ..");
292 yaz_marc_add_comment(mt, msg);
297 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
298 nmem_malloc(mt->nmem, sizeof(*n));
299 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
301 /* mark subfield_pp to point to this one, so we append here next */
302 *mt->subfield_pp = n;
303 mt->subfield_pp = &n->next;
307 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
308 int *indicator_length,
309 int *identifier_length,
311 int *length_data_entry,
312 int *length_starting,
313 int *length_implementation)
317 memcpy(leader, leader_c, 24);
319 if (!atoi_n_check(leader+10, 1, indicator_length))
322 "Indicator length at offset 10 should hold a digit."
325 *indicator_length = 2;
327 if (!atoi_n_check(leader+11, 1, identifier_length))
330 "Identifier length at offset 11 should hold a digit."
333 *identifier_length = 2;
335 if (!atoi_n_check(leader+12, 5, base_address))
338 "Base address at offsets 12..16 should hold a number."
342 if (!atoi_n_check(leader+20, 1, length_data_entry))
345 "Length data entry at offset 20 should hold a digit."
347 *length_data_entry = 4;
350 if (!atoi_n_check(leader+21, 1, length_starting))
353 "Length starting at offset 21 should hold a digit."
355 *length_starting = 5;
358 if (!atoi_n_check(leader+22, 1, length_implementation))
361 "Length implementation at offset 22 should hold a digit."
363 *length_implementation = 0;
369 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
370 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
371 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
372 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
373 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
374 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
376 yaz_marc_add_leader(mt, leader, 24);
379 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
381 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
382 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
385 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
387 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
388 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
391 /* try to guess how many bytes the identifier really is! */
392 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
397 for (i = 1; i<5; i++)
400 size_t outbytesleft = sizeof(outbuf);
402 const char *inp = buf;
404 size_t inbytesleft = i;
405 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
406 &outp, &outbytesleft);
407 if (r != (size_t) (-1))
408 return i; /* got a complete sequence */
410 return 1; /* giving up */
412 return 1; /* we don't know */
415 void yaz_marc_reset(yaz_marc_t mt)
417 nmem_reset(mt->nmem);
419 mt->nodes_pp = &mt->nodes;
423 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
425 struct yaz_marc_node *n;
426 int identifier_length;
427 const char *leader = 0;
429 for (n = mt->nodes; n; n = n->next)
430 if (n->which == YAZ_MARC_LEADER)
432 leader = n->u.leader;
438 if (!atoi_n_check(leader+11, 1, &identifier_length))
441 for (n = mt->nodes; n; n = n->next)
445 case YAZ_MARC_COMMENT:
446 wrbuf_iconv_write(wr, mt->iconv_cd,
447 n->u.comment, strlen(n->u.comment));
448 wrbuf_puts(wr, "\n");
457 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
458 int identifier_length)
460 /* if identifier length is 2 (most MARCs) or less (probably an error),
461 the code is a single character .. However we've
462 seen multibyte codes, so see how big it really is */
463 if (identifier_length > 2)
464 return identifier_length - 1;
466 return cdata_one_character(mt, data);
469 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
471 struct yaz_marc_node *n;
472 int identifier_length;
473 const char *leader = 0;
475 for (n = mt->nodes; n; n = n->next)
476 if (n->which == YAZ_MARC_LEADER)
478 leader = n->u.leader;
484 if (!atoi_n_check(leader+11, 1, &identifier_length))
487 for (n = mt->nodes; n; n = n->next)
489 struct yaz_marc_subfield *s;
492 case YAZ_MARC_DATAFIELD:
493 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
494 n->u.datafield.indicator);
495 for (s = n->u.datafield.subfields; s; s = s->next)
497 size_t using_code_len = get_subfield_len(mt, s->code_data,
500 wrbuf_puts (wr, mt->subfield_str);
501 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
503 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
504 wrbuf_iconv_puts(wr, mt->iconv_cd,
505 s->code_data + using_code_len);
506 marc_iconv_reset(mt, wr);
508 wrbuf_puts (wr, mt->endline_str);
510 case YAZ_MARC_CONTROLFIELD:
511 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
512 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
513 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
514 marc_iconv_reset(mt, wr);
515 wrbuf_puts (wr, mt->endline_str);
517 case YAZ_MARC_COMMENT:
519 wrbuf_iconv_write(wr, mt->iconv_cd,
520 n->u.comment, strlen(n->u.comment));
521 marc_iconv_reset(mt, wr);
522 wrbuf_puts(wr, ")\n");
524 case YAZ_MARC_LEADER:
525 wrbuf_printf(wr, "%s\n", n->u.leader);
528 wrbuf_puts(wr, "\n");
532 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
534 if (mt->enable_collection == collection_second)
536 switch(mt->output_format)
538 case YAZ_MARC_MARCXML:
539 case YAZ_MARC_TMARCXML:
540 wrbuf_printf(wr, "</collection>\n");
542 case YAZ_MARC_XCHANGE:
543 wrbuf_printf(wr, "</collection>\n");
550 void yaz_marc_enable_collection(yaz_marc_t mt)
552 mt->enable_collection = collection_first;
555 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
557 switch(mt->output_format)
560 return yaz_marc_write_line(mt, wr);
561 case YAZ_MARC_MARCXML:
562 case YAZ_MARC_TMARCXML:
563 return yaz_marc_write_marcxml(mt, wr);
564 case YAZ_MARC_XCHANGE:
565 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
566 case YAZ_MARC_ISO2709:
567 return yaz_marc_write_iso2709(mt, wr);
569 return yaz_marc_write_check(mt, wr);
574 const char *collection_name[2] = { "collection", "collection"};
575 const char *record_name[2] = { "record", "r"};
576 const char *leader_name[2] = { "leader", "l"};
577 const char *controlfield_name[2]= { "controlfield", "c"};
578 const char *datafield_name[2] = { "datafield", "d"};
579 const char *subfield_name[2] = { "subfield", "s"};
582 /** \brief common MARC XML/Xchange writer
584 \param wr WRBUF output
585 \param ns XMLNS for the elements
586 \param format record format (e.g. "MARC21")
587 \param type record type (e.g. "Bibliographic")
589 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
594 struct yaz_marc_node *n;
595 int identifier_length;
596 const char *leader = 0;
598 int turbo = yaz_marc_get_write_format(mt) == YAZ_MARC_TMARCXML;
600 for (n = mt->nodes; n; n = n->next)
601 if (n->which == YAZ_MARC_LEADER)
603 leader = n->u.leader;
609 if (!atoi_n_check(leader+11, 1, &identifier_length))
612 if (mt->enable_collection != no_collection)
614 if (mt->enable_collection == collection_first)
615 wrbuf_printf(wr, "<%s xmlns=\"%s\">\n", collection_name[turbo], ns);
616 mt->enable_collection = collection_second;
617 wrbuf_printf(wr, "<%s", record_name[turbo]);
621 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
624 wrbuf_printf(wr, " format=\"%.80s\"", format);
626 wrbuf_printf(wr, " type=\"%.80s\"", type);
627 wrbuf_printf(wr, ">\n");
628 for (n = mt->nodes; n; n = n->next)
630 struct yaz_marc_subfield *s;
634 case YAZ_MARC_DATAFIELD:
636 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
638 wrbuf_printf(wr, " tag=\"");
639 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
640 strlen(n->u.datafield.tag));
641 wrbuf_printf(wr, "\"");
642 if (n->u.datafield.indicator)
645 for (i = 0; n->u.datafield.indicator[i]; i++)
647 wrbuf_printf(wr, " ind%d=\"", i+1);
648 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
649 n->u.datafield.indicator+i, 1);
650 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
653 wrbuf_printf(wr, ">\n");
656 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
657 strlen(n->u.datafield.tag));
659 wrbuf_printf(wr, ">\n");
660 if (n->u.datafield.indicator)
663 for (i = 0; n->u.datafield.indicator[i]; i++)
665 wrbuf_printf(wr, " <i%d>", i+1);
666 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
667 n->u.datafield.indicator+i, 1);
668 wrbuf_printf(wr, "</i%d>", i+1);
669 wrbuf_puts(wr, "\n");
673 for (s = n->u.datafield.subfields; s; s = s->next)
675 size_t using_code_len = get_subfield_len(mt, s->code_data,
677 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
679 wrbuf_printf(wr, " code=\"");
680 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
681 s->code_data, using_code_len);
682 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
684 // TODO check this. encode special characters.
685 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
686 s->code_data, using_code_len);
689 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
690 s->code_data + using_code_len,
691 strlen(s->code_data + using_code_len));
692 marc_iconv_reset(mt, wr);
693 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
694 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
695 s->code_data, using_code_len);
696 wrbuf_puts(wr, ">\n");
698 wrbuf_printf(wr, " </%s", datafield_name[turbo]);
700 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
701 strlen(n->u.datafield.tag));
702 wrbuf_printf(wr, ">\n", datafield_name[turbo]);
704 case YAZ_MARC_CONTROLFIELD:
705 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
707 wrbuf_printf(wr, " tag=\"");
708 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
709 strlen(n->u.controlfield.tag));
710 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
713 //TODO convert special
714 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
715 strlen(n->u.controlfield.tag));
716 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
718 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
719 n->u.controlfield.data,
720 strlen(n->u.controlfield.data));
721 marc_iconv_reset(mt, wr);
722 wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
723 //TODO convert special
724 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
725 strlen(n->u.controlfield.tag));
726 wrbuf_puts(wr, ">\n");
728 case YAZ_MARC_COMMENT:
729 wrbuf_printf(wr, "<!-- ");
730 wrbuf_puts(wr, n->u.comment);
731 wrbuf_printf(wr, " -->\n");
733 case YAZ_MARC_LEADER:
734 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
735 wrbuf_iconv_write_cdata(wr,
736 0 /* no charset conversion for leader */,
737 n->u.leader, strlen(n->u.leader));
738 wrbuf_printf(wr, " </%s>", leader_name[turbo]);
741 wrbuf_printf(wr, "</%s>", record_name[turbo]);
745 static int yaz_marc_write_marcxml_ns2(yaz_marc_t mt, WRBUF wr,
750 struct yaz_marc_node *n;
751 int identifier_length;
752 const char *leader = 0;
754 for (n = mt->nodes; n; n = n->next)
755 if (n->which == YAZ_MARC_LEADER)
757 leader = n->u.leader;
763 if (!atoi_n_check(leader+11, 1, &identifier_length))
766 if (mt->enable_collection != no_collection)
768 if (mt->enable_collection == collection_first)
769 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
770 mt->enable_collection = collection_second;
771 wrbuf_printf(wr, "<record");
775 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
778 wrbuf_printf(wr, " format=\"%.80s\"", format);
780 wrbuf_printf(wr, " type=\"%.80s\"", type);
781 wrbuf_printf(wr, ">\n");
782 for (n = mt->nodes; n; n = n->next)
784 struct yaz_marc_subfield *s;
788 case YAZ_MARC_DATAFIELD:
789 wrbuf_printf(wr, " <datafield tag=\"");
790 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
791 strlen(n->u.datafield.tag));
792 wrbuf_printf(wr, "\"");
793 if (n->u.datafield.indicator)
796 for (i = 0; n->u.datafield.indicator[i]; i++)
798 wrbuf_printf(wr, " ind%d=\"", i+1);
799 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
800 n->u.datafield.indicator+i, 1);
801 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
804 wrbuf_printf(wr, ">\n");
805 for (s = n->u.datafield.subfields; s; s = s->next)
807 size_t using_code_len = get_subfield_len(mt, s->code_data,
809 wrbuf_iconv_puts(wr, mt->iconv_cd, " <subfield code=\"");
810 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
811 s->code_data, using_code_len);
812 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
813 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
814 s->code_data + using_code_len,
815 strlen(s->code_data + using_code_len));
816 marc_iconv_reset(mt, wr);
817 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
818 wrbuf_puts(wr, "\n");
820 wrbuf_printf(wr, " </datafield>\n");
822 case YAZ_MARC_CONTROLFIELD:
823 wrbuf_printf(wr, " <controlfield tag=\"");
824 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
825 strlen(n->u.controlfield.tag));
826 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
827 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
828 n->u.controlfield.data,
829 strlen(n->u.controlfield.data));
831 marc_iconv_reset(mt, wr);
832 wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
833 wrbuf_puts(wr, "\n");
835 case YAZ_MARC_COMMENT:
836 wrbuf_printf(wr, "<!-- ");
837 wrbuf_puts(wr, n->u.comment);
838 wrbuf_printf(wr, " -->\n");
840 case YAZ_MARC_LEADER:
841 wrbuf_printf(wr, " <leader>");
842 wrbuf_iconv_write_cdata(wr,
843 0 /* no charset conversion for leader */,
844 n->u.leader, strlen(n->u.leader));
845 wrbuf_printf(wr, "</leader>\n");
848 wrbuf_puts(wr, "</record>\n");
853 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
858 if (mt->write_using_libxml2)
864 if (yaz_marc_get_write_format(mt) == YAZ_MARC_MARCXML)
865 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
866 else // Check for Turbo XML
867 ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type);
871 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
874 xmlDocSetRootElement(doc, root_ptr);
875 xmlDocDumpMemory(doc, &buf_out, &len_out);
877 wrbuf_write(wr, (const char *) buf_out, len_out);
888 return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
891 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
893 /* set leader 09 to 'a' for UNICODE */
894 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
895 if (!mt->leader_spec)
896 yaz_marc_modify_leader(mt, 9, "a");
897 char *name_space = "http://www.loc.gov/MARC21/slim";
898 if (mt->output_format == YAZ_MARC_TMARCXML)
899 name_space = "http://www.indexdata.com/MARC21/turboxml";
900 return yaz_marc_write_marcxml_ns(mt, wr, name_space,
904 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
908 return yaz_marc_write_marcxml_ns(mt, wr,
909 "info:lc/xmlns/marcxchange-v1",
915 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length)
918 struct yaz_marc_subfield *s;
919 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
921 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
922 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
925 //TODO consider if safe
928 strncpy(field + 1, n->u.datafield.tag, 3);
930 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
932 if (n->u.datafield.indicator)
935 for (i = 0; n->u.datafield.indicator[i]; i++)
940 ind_val[0] = n->u.datafield.indicator[i];
943 sprintf(ind_str, "ind%d", i+1);
944 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
947 sprintf(ind_str, "i%d", i+1);
948 xmlNewTextChild(ptr, ns_record, BAD_CAST ind_str, BAD_CAST ind_val);
952 WRBUF subfield_name = wrbuf_alloc();
953 for (s = n->u.datafield.subfields; s; s = s->next)
955 xmlNode *ptr_subfield;
956 size_t using_code_len = get_subfield_len(mt, s->code_data,
958 wrbuf_rewind(wr_cdata);
959 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
960 marc_iconv_reset(mt, wr_cdata);
963 ptr_subfield = xmlNewTextChild(
965 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
966 wrbuf_rewind(wr_cdata);
967 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
968 xmlNewProp(ptr_subfield, BAD_CAST "code",
969 BAD_CAST wrbuf_cstr(wr_cdata));
971 else { // Turbo format
972 wrbuf_rewind(subfield_name);
973 wrbuf_puts(subfield_name, "s");
974 // TODO Map special codes to something possible for XML ELEMENT names
975 if ((s->code_data[0] >= '0' && s->code_data[0] <= '9') ||
976 (s->code_data[0] >= 'a' && s->code_data[0] <= 'z') ||
977 (s->code_data[0] >= 'A' && s->code_data[0] <= 'Z'))
979 wrbuf_iconv_write(subfield_name, mt->iconv_cd,s->code_data, using_code_len);
982 char buffer[2*using_code_len + 1];
984 for (index = 0; index < using_code_len; index++) {
985 sprintf(buffer + 2*index, "%02X", (unsigned char) s->code_data[index] & 0xFF);
987 buffer[2*(index+1)] = 0;
988 wrbuf_puts(subfield_name, "-");
989 wrbuf_puts(subfield_name, buffer);
990 yaz_log(YLOG_WARN, "Using numeric value in element name: %s", buffer);
992 ptr_subfield = xmlNewTextChild(ptr, ns_record,
993 BAD_CAST wrbuf_cstr(subfield_name),
994 BAD_CAST wrbuf_cstr(wr_cdata));
997 wrbuf_destroy(subfield_name);
1000 int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
1005 struct yaz_marc_node *n;
1006 int identifier_length;
1007 const char *leader = 0;
1008 xmlNode *record_ptr;
1011 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
1012 for (n = mt->nodes; n; n = n->next)
1013 if (n->which == YAZ_MARC_LEADER)
1015 leader = n->u.leader;
1021 if (!atoi_n_check(leader+11, 1, &identifier_length))
1024 wr_cdata = wrbuf_alloc();
1026 record_ptr = xmlNewNode(0, BAD_CAST "r");
1027 *root_ptr = record_ptr;
1029 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1030 xmlSetNs(record_ptr, ns_record);
1033 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1035 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1036 for (n = mt->nodes; n; n = n->next)
1038 struct yaz_marc_subfield *s;
1043 case YAZ_MARC_DATAFIELD:
1044 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
1046 case YAZ_MARC_CONTROLFIELD:
1047 wrbuf_rewind(wr_cdata);
1048 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1049 marc_iconv_reset(mt, wr_cdata);
1052 ptr = xmlNewTextChild(record_ptr, ns_record,
1053 BAD_CAST "controlfield",
1054 BAD_CAST wrbuf_cstr(wr_cdata));
1055 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1058 // TODO required iconv?
1061 strncpy(field + 1, n->u.controlfield.tag, 3);
1063 ptr = xmlNewTextChild(record_ptr, ns_record,
1065 BAD_CAST wrbuf_cstr(wr_cdata));
1069 case YAZ_MARC_COMMENT:
1070 ptr = xmlNewComment(BAD_CAST n->u.comment);
1071 xmlAddChild(record_ptr, ptr);
1073 case YAZ_MARC_LEADER:
1075 char *field = "leader";
1078 xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
1079 BAD_CAST n->u.leader);
1084 wrbuf_destroy(wr_cdata);
1089 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
1094 struct yaz_marc_node *n;
1095 int identifier_length;
1096 const char *leader = 0;
1097 xmlNode *record_ptr;
1101 for (n = mt->nodes; n; n = n->next)
1102 if (n->which == YAZ_MARC_LEADER)
1104 leader = n->u.leader;
1110 if (!atoi_n_check(leader+11, 1, &identifier_length))
1113 wr_cdata = wrbuf_alloc();
1115 record_ptr = xmlNewNode(0, BAD_CAST "record");
1116 *root_ptr = record_ptr;
1118 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1119 xmlSetNs(record_ptr, ns_record);
1122 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1124 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1125 for (n = mt->nodes; n; n = n->next)
1127 struct yaz_marc_subfield *s;
1132 case YAZ_MARC_DATAFIELD:
1133 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1134 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1135 if (n->u.datafield.indicator)
1138 for (i = 0; n->u.datafield.indicator[i]; i++)
1143 sprintf(ind_str, "ind%d", i+1);
1144 ind_val[0] = n->u.datafield.indicator[i];
1146 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1149 for (s = n->u.datafield.subfields; s; s = s->next)
1151 xmlNode *ptr_subfield;
1152 size_t using_code_len = get_subfield_len(mt, s->code_data,
1154 wrbuf_rewind(wr_cdata);
1155 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1156 s->code_data + using_code_len);
1157 marc_iconv_reset(mt, wr_cdata);
1158 ptr_subfield = xmlNewTextChild(
1160 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1162 wrbuf_rewind(wr_cdata);
1163 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1164 s->code_data, using_code_len);
1165 xmlNewProp(ptr_subfield, BAD_CAST "code",
1166 BAD_CAST wrbuf_cstr(wr_cdata));
1169 case YAZ_MARC_CONTROLFIELD:
1170 wrbuf_rewind(wr_cdata);
1171 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1172 marc_iconv_reset(mt, wr_cdata);
1174 ptr = xmlNewTextChild(record_ptr, ns_record,
1175 BAD_CAST "controlfield",
1176 BAD_CAST wrbuf_cstr(wr_cdata));
1178 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1180 case YAZ_MARC_COMMENT:
1181 ptr = xmlNewComment(BAD_CAST n->u.comment);
1182 xmlAddChild(record_ptr, ptr);
1184 case YAZ_MARC_LEADER:
1185 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1186 BAD_CAST n->u.leader);
1190 wrbuf_destroy(wr_cdata);
1199 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1201 struct yaz_marc_node *n;
1202 int indicator_length;
1203 int identifier_length;
1204 int length_data_entry;
1205 int length_starting;
1206 int length_implementation;
1207 int data_offset = 0;
1208 const char *leader = 0;
1209 WRBUF wr_dir, wr_head, wr_data_tmp;
1212 for (n = mt->nodes; n; n = n->next)
1213 if (n->which == YAZ_MARC_LEADER)
1214 leader = n->u.leader;
1218 if (!atoi_n_check(leader+10, 1, &indicator_length))
1220 if (!atoi_n_check(leader+11, 1, &identifier_length))
1222 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1224 if (!atoi_n_check(leader+21, 1, &length_starting))
1226 if (!atoi_n_check(leader+22, 1, &length_implementation))
1229 wr_data_tmp = wrbuf_alloc();
1230 wr_dir = wrbuf_alloc();
1231 for (n = mt->nodes; n; n = n->next)
1233 int data_length = 0;
1234 struct yaz_marc_subfield *s;
1238 case YAZ_MARC_DATAFIELD:
1239 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1240 data_length += indicator_length;
1241 wrbuf_rewind(wr_data_tmp);
1242 for (s = n->u.datafield.subfields; s; s = s->next)
1244 /* write dummy IDFS + content */
1245 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1246 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1247 marc_iconv_reset(mt, wr_data_tmp);
1249 /* write dummy FS (makes MARC-8 to become ASCII) */
1250 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1251 marc_iconv_reset(mt, wr_data_tmp);
1252 data_length += wrbuf_len(wr_data_tmp);
1254 case YAZ_MARC_CONTROLFIELD:
1255 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1257 wrbuf_rewind(wr_data_tmp);
1258 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1259 n->u.controlfield.data);
1260 marc_iconv_reset(mt, wr_data_tmp);
1261 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1262 marc_iconv_reset(mt, wr_data_tmp);
1263 data_length += wrbuf_len(wr_data_tmp);
1265 case YAZ_MARC_COMMENT:
1267 case YAZ_MARC_LEADER:
1272 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1273 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1274 data_offset += data_length;
1277 /* mark end of directory */
1278 wrbuf_putc(wr_dir, ISO2709_FS);
1280 /* base address of data (comes after leader+directory) */
1281 base_address = 24 + wrbuf_len(wr_dir);
1283 wr_head = wrbuf_alloc();
1285 /* write record length */
1286 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1287 /* from "original" leader */
1288 wrbuf_write(wr_head, leader+5, 7);
1289 /* base address of data */
1290 wrbuf_printf(wr_head, "%05d", base_address);
1291 /* from "original" leader */
1292 wrbuf_write(wr_head, leader+17, 7);
1294 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1295 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1296 wrbuf_destroy(wr_head);
1297 wrbuf_destroy(wr_dir);
1298 wrbuf_destroy(wr_data_tmp);
1300 for (n = mt->nodes; n; n = n->next)
1302 struct yaz_marc_subfield *s;
1306 case YAZ_MARC_DATAFIELD:
1307 wrbuf_printf(wr, "%.*s", indicator_length,
1308 n->u.datafield.indicator);
1309 for (s = n->u.datafield.subfields; s; s = s->next)
1311 wrbuf_putc(wr, ISO2709_IDFS);
1312 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1313 marc_iconv_reset(mt, wr);
1315 wrbuf_putc(wr, ISO2709_FS);
1317 case YAZ_MARC_CONTROLFIELD:
1318 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1319 marc_iconv_reset(mt, wr);
1320 wrbuf_putc(wr, ISO2709_FS);
1322 case YAZ_MARC_COMMENT:
1324 case YAZ_MARC_LEADER:
1328 wrbuf_printf(wr, "%c", ISO2709_RS);
1333 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1335 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1338 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1340 return -1; /* error */
1341 return r; /* OK, return length > 0 */
1344 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1345 const char **result, size_t *rsize)
1349 wrbuf_rewind(mt->m_wr);
1350 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1352 *result = wrbuf_cstr(mt->m_wr);
1354 *rsize = wrbuf_len(mt->m_wr);
1358 void yaz_marc_set_read_format(yaz_marc_t mt, int format)
1361 mt->input_format = format;
1364 int yaz_marc_get_read_format(yaz_marc_t mt)
1367 return mt->input_format;
1372 void yaz_marc_set_write_format(yaz_marc_t mt, int format)
1375 mt->output_format = format;
1377 // Force using libxml2
1378 if (mt->output_format == YAZ_MARC_TMARCXML)
1379 mt->write_using_libxml2 = 1;
1384 int yaz_marc_get_write_format(yaz_marc_t mt)
1387 return mt->output_format;
1393 * Deprecated, use yaz_marc_set_write_format
1395 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1397 yaz_marc_set_write_format(mt, xmlmode);
1402 void yaz_marc_debug(yaz_marc_t mt, int level)
1408 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1413 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1415 return mt->iconv_cd;
1418 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1420 struct yaz_marc_node *n;
1422 for (n = mt->nodes; n; n = n->next)
1423 if (n->which == YAZ_MARC_LEADER)
1425 leader = n->u.leader;
1426 memcpy(leader+off, str, strlen(str));
1431 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1433 xfree(mt->leader_spec);
1434 mt->leader_spec = 0;
1437 char dummy_leader[24];
1438 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1440 mt->leader_spec = xstrdup(leader_spec);
1445 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1447 const char *cp = leader_spec;
1452 int no_read = 0, no = 0;
1454 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1455 if (no < 2 || no_read < 3)
1457 if (pos < 0 || (size_t) pos >= size)
1462 const char *vp = strchr(val+1, '\'');
1468 if (len + pos > size)
1470 memcpy(leader + pos, val+1, len);
1472 else if (*val >= '0' && *val <= '9')
1488 int yaz_marc_decode_formatstr(const char *arg)
1491 if (!strcmp(arg, "marc"))
1492 mode = YAZ_MARC_ISO2709;
1493 if (!strcmp(arg, "marcxml"))
1494 mode = YAZ_MARC_MARCXML;
1495 if (!strcmp(arg, "tmarcxml"))
1496 mode = YAZ_MARC_TMARCXML;
1497 if (!strcmp(arg, "marcxchange"))
1498 mode = YAZ_MARC_XCHANGE;
1499 if (!strcmp(arg, "line"))
1500 mode = YAZ_MARC_LINE;
1504 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1506 mt->write_using_libxml2 = enable;
1509 int yaz_marc_is_turbo_format(yaz_marc_t mt)
1511 return mt->output_format == YAZ_MARC_TMARCXML;
1518 * c-file-style: "Stroustrup"
1519 * indent-tabs-mode: nil
1521 * vim: shiftwidth=4 tabstop=8 expandtab