1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
35 enum yaz_collection_state {
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
45 YAZ_MARC_CONTROLFIELD,
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
54 struct yaz_marc_subfield *subfields;
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
63 /** \brief a comment node */
64 struct yaz_marc_comment {
68 /** \brief MARC node */
69 struct yaz_marc_node {
70 enum YAZ_MARC_NODE_TYPE which;
72 struct yaz_marc_datafield datafield;
73 struct yaz_marc_controlfield controlfield;
77 struct yaz_marc_node *next;
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
83 struct yaz_marc_subfield *next;
86 /** \brief the internals of a yaz_marc_t handle */
93 int write_using_libxml2;
94 enum yaz_collection_state enable_collection;
99 struct yaz_marc_node *nodes;
100 struct yaz_marc_node **nodes_pp;
101 struct yaz_marc_subfield **subfield_pp;
104 yaz_marc_t yaz_marc_create(void)
106 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
107 mt->output_format = YAZ_MARC_LINE;
109 mt->write_using_libxml2 = 0;
110 mt->enable_collection = no_collection;
111 mt->m_wr = wrbuf_alloc();
114 strcpy(mt->subfield_str, " $");
115 strcpy(mt->endline_str, "\n");
117 mt->nmem = nmem_create();
122 void yaz_marc_destroy(yaz_marc_t mt)
126 nmem_destroy(mt->nmem);
127 wrbuf_destroy(mt->m_wr);
128 xfree(mt->leader_spec);
132 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
137 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
139 wrbuf_iconv_reset(wr, mt->iconv_cd);
142 static int marc_exec_leader(const char *leader_spec, char *leader,
146 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
148 struct yaz_marc_node *n = (struct yaz_marc_node *)
149 nmem_malloc(mt->nmem, sizeof(*n));
152 mt->nodes_pp = &n->next;
157 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
158 const xmlNode *ptr_data)
160 struct yaz_marc_node *n = yaz_marc_add_node(mt);
161 n->which = YAZ_MARC_CONTROLFIELD;
162 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
163 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
166 void yaz_marc_add_controlfield_turbo_xml(yaz_marc_t mt, char *tag,
167 const xmlNode *ptr_data)
169 struct yaz_marc_node *n = yaz_marc_add_node(mt);
170 n->which = YAZ_MARC_CONTROLFIELD;
171 n->u.controlfield.tag = tag;
172 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
178 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
180 struct yaz_marc_node *n = yaz_marc_add_node(mt);
181 n->which = YAZ_MARC_COMMENT;
182 n->u.comment = nmem_strdup(mt->nmem, comment);
185 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
191 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
192 yaz_marc_add_comment(mt, buf);
196 int yaz_marc_get_debug(yaz_marc_t mt)
201 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
203 struct yaz_marc_node *n = yaz_marc_add_node(mt);
204 n->which = YAZ_MARC_LEADER;
205 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
206 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
209 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
210 const char *data, size_t data_len)
212 struct yaz_marc_node *n = yaz_marc_add_node(mt);
213 n->which = YAZ_MARC_CONTROLFIELD;
214 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
215 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
221 sprintf(msg, "controlfield:");
222 for (i = 0; i < 16 && i < data_len; i++)
223 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
225 sprintf(msg + strlen(msg), " ..");
226 yaz_marc_add_comment(mt, msg);
230 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
231 const char *indicator, size_t indicator_len)
233 struct yaz_marc_node *n = yaz_marc_add_node(mt);
234 n->which = YAZ_MARC_DATAFIELD;
235 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
236 n->u.datafield.indicator =
237 nmem_strdupn(mt->nmem, indicator, indicator_len);
238 n->u.datafield.subfields = 0;
240 /* make subfield_pp the current (last one) */
241 mt->subfield_pp = &n->u.datafield.subfields;
245 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
246 const char *indicator, size_t indicator_len)
248 struct yaz_marc_node *n = yaz_marc_add_node(mt);
249 n->which = YAZ_MARC_DATAFIELD;
250 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
251 n->u.datafield.indicator =
252 nmem_strdupn(mt->nmem, indicator, indicator_len);
253 n->u.datafield.subfields = 0;
255 /* make subfield_pp the current (last one) */
256 mt->subfield_pp = &n->u.datafield.subfields;
259 struct yaz_marc_node* yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value)
261 struct yaz_marc_node *n = yaz_marc_add_node(mt);
262 n->which = YAZ_MARC_DATAFIELD;
263 n->u.datafield.tag = tag_value;
264 n->u.datafield.indicator = 0;
265 n->u.datafield.subfields = 0;
267 /* make subfield_pp the current (last one) */
268 mt->subfield_pp = &n->u.datafield.subfields;
272 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
274 n->u.datafield.indicator = indicator;
279 void yaz_marc_add_subfield(yaz_marc_t mt,
280 const char *code_data, size_t code_data_len)
287 sprintf(msg, "subfield:");
288 for (i = 0; i < 16 && i < code_data_len; i++)
289 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
290 if (i < code_data_len)
291 sprintf(msg + strlen(msg), " ..");
292 yaz_marc_add_comment(mt, msg);
297 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
298 nmem_malloc(mt->nmem, sizeof(*n));
299 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
301 /* mark subfield_pp to point to this one, so we append here next */
302 *mt->subfield_pp = n;
303 mt->subfield_pp = &n->next;
307 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
308 int *indicator_length,
309 int *identifier_length,
311 int *length_data_entry,
312 int *length_starting,
313 int *length_implementation)
317 memcpy(leader, leader_c, 24);
319 if (!atoi_n_check(leader+10, 1, indicator_length))
322 "Indicator length at offset 10 should hold a digit."
325 *indicator_length = 2;
327 if (!atoi_n_check(leader+11, 1, identifier_length))
330 "Identifier length at offset 11 should hold a digit."
333 *identifier_length = 2;
335 if (!atoi_n_check(leader+12, 5, base_address))
338 "Base address at offsets 12..16 should hold a number."
342 if (!atoi_n_check(leader+20, 1, length_data_entry))
345 "Length data entry at offset 20 should hold a digit."
347 *length_data_entry = 4;
350 if (!atoi_n_check(leader+21, 1, length_starting))
353 "Length starting at offset 21 should hold a digit."
355 *length_starting = 5;
358 if (!atoi_n_check(leader+22, 1, length_implementation))
361 "Length implementation at offset 22 should hold a digit."
363 *length_implementation = 0;
369 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
370 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
371 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
372 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
373 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
374 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
376 yaz_marc_add_leader(mt, leader, 24);
379 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
381 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
382 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
385 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
387 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
388 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
391 /* try to guess how many bytes the identifier really is! */
392 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
397 for (i = 1; i<5; i++)
400 size_t outbytesleft = sizeof(outbuf);
402 const char *inp = buf;
404 size_t inbytesleft = i;
405 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
406 &outp, &outbytesleft);
407 if (r != (size_t) (-1))
408 return i; /* got a complete sequence */
410 return 1; /* giving up */
412 return 1; /* we don't know */
415 void yaz_marc_reset(yaz_marc_t mt)
417 nmem_reset(mt->nmem);
419 mt->nodes_pp = &mt->nodes;
423 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
425 struct yaz_marc_node *n;
426 int identifier_length;
427 const char *leader = 0;
429 for (n = mt->nodes; n; n = n->next)
430 if (n->which == YAZ_MARC_LEADER)
432 leader = n->u.leader;
438 if (!atoi_n_check(leader+11, 1, &identifier_length))
441 for (n = mt->nodes; n; n = n->next)
445 case YAZ_MARC_COMMENT:
446 wrbuf_iconv_write(wr, mt->iconv_cd,
447 n->u.comment, strlen(n->u.comment));
448 wrbuf_puts(wr, "\n");
457 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
458 int identifier_length)
460 /* if identifier length is 2 (most MARCs) or less (probably an error),
461 the code is a single character .. However we've
462 seen multibyte codes, so see how big it really is */
463 if (identifier_length > 2)
464 return identifier_length - 1;
466 return cdata_one_character(mt, data);
469 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
471 struct yaz_marc_node *n;
472 int identifier_length;
473 const char *leader = 0;
475 for (n = mt->nodes; n; n = n->next)
476 if (n->which == YAZ_MARC_LEADER)
478 leader = n->u.leader;
484 if (!atoi_n_check(leader+11, 1, &identifier_length))
487 for (n = mt->nodes; n; n = n->next)
489 struct yaz_marc_subfield *s;
492 case YAZ_MARC_DATAFIELD:
493 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
494 n->u.datafield.indicator);
495 for (s = n->u.datafield.subfields; s; s = s->next)
497 size_t using_code_len = get_subfield_len(mt, s->code_data,
500 wrbuf_puts (wr, mt->subfield_str);
501 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
503 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
504 wrbuf_iconv_puts(wr, mt->iconv_cd,
505 s->code_data + using_code_len);
506 marc_iconv_reset(mt, wr);
508 wrbuf_puts (wr, mt->endline_str);
510 case YAZ_MARC_CONTROLFIELD:
511 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
512 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
513 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
514 marc_iconv_reset(mt, wr);
515 wrbuf_puts (wr, mt->endline_str);
517 case YAZ_MARC_COMMENT:
519 wrbuf_iconv_write(wr, mt->iconv_cd,
520 n->u.comment, strlen(n->u.comment));
521 marc_iconv_reset(mt, wr);
522 wrbuf_puts(wr, ")\n");
524 case YAZ_MARC_LEADER:
525 wrbuf_printf(wr, "%s\n", n->u.leader);
528 wrbuf_puts(wr, "\n");
532 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
534 if (mt->enable_collection == collection_second)
536 switch(mt->output_format)
538 case YAZ_MARC_MARCXML:
539 case YAZ_MARC_TMARCXML:
540 wrbuf_printf(wr, "</collection>\n");
542 case YAZ_MARC_XCHANGE:
543 wrbuf_printf(wr, "</collection>\n");
550 void yaz_marc_enable_collection(yaz_marc_t mt)
552 mt->enable_collection = collection_first;
555 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
557 switch(mt->output_format)
560 return yaz_marc_write_line(mt, wr);
561 case YAZ_MARC_MARCXML:
562 case YAZ_MARC_TMARCXML:
563 return yaz_marc_write_marcxml(mt, wr);
564 case YAZ_MARC_XCHANGE:
565 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
566 case YAZ_MARC_ISO2709:
567 return yaz_marc_write_iso2709(mt, wr);
569 return yaz_marc_write_check(mt, wr);
574 const char *collection_name[2] = { "collection", "collection"};
575 const char *record_name[2] = { "record", "r"};
576 const char *leader_name[2] = { "leader", "l"};
577 const char *controlfield_name[2]= { "controlfield", "c"};
578 const char *datafield_name[2] = { "datafield", "d"};
579 const char *subfield_name[2] = { "subfield", "s"};
582 /** \brief common MARC XML/Xchange writer
584 \param wr WRBUF output
585 \param ns XMLNS for the elements
586 \param format record format (e.g. "MARC21")
587 \param type record type (e.g. "Bibliographic")
589 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
594 struct yaz_marc_node *n;
595 int identifier_length;
596 const char *leader = 0;
598 int turbo = yaz_marc_get_write_format(mt) == YAZ_MARC_TMARCXML;
600 for (n = mt->nodes; n; n = n->next)
601 if (n->which == YAZ_MARC_LEADER)
603 leader = n->u.leader;
609 if (!atoi_n_check(leader+11, 1, &identifier_length))
612 if (mt->enable_collection != no_collection)
614 if (mt->enable_collection == collection_first) {
615 wrbuf_printf(wr, "<%s xmlns=\"%s\">\n", collection_name[turbo], ns);
616 mt->enable_collection = collection_second;
618 wrbuf_printf(wr, "<%s", record_name[turbo]);
622 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
625 wrbuf_printf(wr, " format=\"%.80s\"", format);
627 wrbuf_printf(wr, " type=\"%.80s\"", type);
628 wrbuf_printf(wr, ">\n");
629 for (n = mt->nodes; n; n = n->next)
631 struct yaz_marc_subfield *s;
635 case YAZ_MARC_DATAFIELD:
637 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
639 wrbuf_printf(wr, " tag=\"");
640 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
641 strlen(n->u.datafield.tag));
642 wrbuf_printf(wr, "\"");
643 if (n->u.datafield.indicator)
646 for (i = 0; n->u.datafield.indicator[i]; i++)
648 wrbuf_printf(wr, " ind%d=\"", i+1);
649 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
650 n->u.datafield.indicator+i, 1);
651 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
654 wrbuf_printf(wr, ">\n");
657 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
658 strlen(n->u.datafield.tag));
660 wrbuf_printf(wr, ">\n");
661 if (n->u.datafield.indicator)
664 for (i = 0; n->u.datafield.indicator[i]; i++)
666 wrbuf_printf(wr, " <i%d>", i+1);
667 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
668 n->u.datafield.indicator+i, 1);
669 wrbuf_printf(wr, "</i%d>", i+1);
670 wrbuf_puts(wr, "\n");
674 for (s = n->u.datafield.subfields; s; s = s->next)
676 size_t using_code_len = get_subfield_len(mt, s->code_data,
678 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
680 wrbuf_printf(wr, " code=\"");
681 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
682 s->code_data, using_code_len);
683 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
685 // TODO check this. encode special characters.
686 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
687 s->code_data, using_code_len);
690 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
691 s->code_data + using_code_len,
692 strlen(s->code_data + using_code_len));
693 marc_iconv_reset(mt, wr);
694 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
696 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
697 s->code_data, using_code_len);
698 wrbuf_puts(wr, ">\n");
700 wrbuf_printf(wr, " </%s", datafield_name[turbo]);
703 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
704 strlen(n->u.datafield.tag));
705 wrbuf_printf(wr, ">\n", datafield_name[turbo]);
707 case YAZ_MARC_CONTROLFIELD:
708 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
710 wrbuf_printf(wr, " tag=\"");
711 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
712 strlen(n->u.controlfield.tag));
713 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
716 //TODO convert special
717 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
718 strlen(n->u.controlfield.tag));
719 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
721 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
722 n->u.controlfield.data,
723 strlen(n->u.controlfield.data));
724 marc_iconv_reset(mt, wr);
725 wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
726 //TODO convert special
728 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
729 strlen(n->u.controlfield.tag));
730 wrbuf_puts(wr, ">\n");
732 case YAZ_MARC_COMMENT:
733 wrbuf_printf(wr, "<!-- ");
734 wrbuf_puts(wr, n->u.comment);
735 wrbuf_printf(wr, " -->\n");
737 case YAZ_MARC_LEADER:
738 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
739 wrbuf_iconv_write_cdata(wr,
740 0 /* no charset conversion for leader */,
741 n->u.leader, strlen(n->u.leader));
742 wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
745 wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
749 static int yaz_marc_write_marcxml_ns2(yaz_marc_t mt, WRBUF wr,
754 struct yaz_marc_node *n;
755 int identifier_length;
756 const char *leader = 0;
758 for (n = mt->nodes; n; n = n->next)
759 if (n->which == YAZ_MARC_LEADER)
761 leader = n->u.leader;
767 if (!atoi_n_check(leader+11, 1, &identifier_length))
770 if (mt->enable_collection != no_collection)
772 if (mt->enable_collection == collection_first)
773 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
774 mt->enable_collection = collection_second;
775 wrbuf_printf(wr, "<record");
779 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
782 wrbuf_printf(wr, " format=\"%.80s\"", format);
784 wrbuf_printf(wr, " type=\"%.80s\"", type);
785 wrbuf_printf(wr, ">\n");
786 for (n = mt->nodes; n; n = n->next)
788 struct yaz_marc_subfield *s;
792 case YAZ_MARC_DATAFIELD:
793 wrbuf_printf(wr, " <datafield tag=\"");
794 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
795 strlen(n->u.datafield.tag));
796 wrbuf_printf(wr, "\"");
797 if (n->u.datafield.indicator)
800 for (i = 0; n->u.datafield.indicator[i]; i++)
802 wrbuf_printf(wr, " ind%d=\"", i+1);
803 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
804 n->u.datafield.indicator+i, 1);
805 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
808 wrbuf_printf(wr, ">\n");
809 for (s = n->u.datafield.subfields; s; s = s->next)
811 size_t using_code_len = get_subfield_len(mt, s->code_data,
813 wrbuf_iconv_puts(wr, mt->iconv_cd, " <subfield code=\"");
814 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
815 s->code_data, using_code_len);
816 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
817 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
818 s->code_data + using_code_len,
819 strlen(s->code_data + using_code_len));
820 marc_iconv_reset(mt, wr);
821 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
822 wrbuf_puts(wr, "\n");
824 wrbuf_printf(wr, " </datafield>\n");
826 case YAZ_MARC_CONTROLFIELD:
827 wrbuf_printf(wr, " <controlfield tag=\"");
828 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
829 strlen(n->u.controlfield.tag));
830 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
831 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
832 n->u.controlfield.data,
833 strlen(n->u.controlfield.data));
835 marc_iconv_reset(mt, wr);
836 wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
837 wrbuf_puts(wr, "\n");
839 case YAZ_MARC_COMMENT:
840 wrbuf_printf(wr, "<!-- ");
841 wrbuf_puts(wr, n->u.comment);
842 wrbuf_printf(wr, " -->\n");
844 case YAZ_MARC_LEADER:
845 wrbuf_printf(wr, " <leader>");
846 wrbuf_iconv_write_cdata(wr,
847 0 /* no charset conversion for leader */,
848 n->u.leader, strlen(n->u.leader));
849 wrbuf_printf(wr, "</leader>\n");
852 wrbuf_puts(wr, "</record>\n");
857 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
862 if (mt->write_using_libxml2)
868 if (yaz_marc_get_write_format(mt) == YAZ_MARC_MARCXML)
869 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
870 else // Check for Turbo XML
871 ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type);
875 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
878 xmlDocSetRootElement(doc, root_ptr);
879 xmlDocDumpMemory(doc, &buf_out, &len_out);
881 wrbuf_write(wr, (const char *) buf_out, len_out);
892 return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
895 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
897 /* set leader 09 to 'a' for UNICODE */
898 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
899 if (!mt->leader_spec)
900 yaz_marc_modify_leader(mt, 9, "a");
901 char *name_space = "http://www.loc.gov/MARC21/slim";
902 if (mt->output_format == YAZ_MARC_TMARCXML)
903 name_space = "http://www.indexdata.com/MARC21/turboxml";
904 return yaz_marc_write_marcxml_ns(mt, wr, name_space,
908 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
912 return yaz_marc_write_marcxml_ns(mt, wr,
913 "info:lc/xmlns/marcxchange-v1",
919 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length)
922 struct yaz_marc_subfield *s;
923 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
925 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
926 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
929 //TODO consider if safe
932 strncpy(field + 1, n->u.datafield.tag, 3);
934 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
936 if (n->u.datafield.indicator)
939 for (i = 0; n->u.datafield.indicator[i]; i++)
944 ind_val[0] = n->u.datafield.indicator[i];
947 sprintf(ind_str, "ind%d", i+1);
948 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
951 sprintf(ind_str, "i%d", i+1);
952 xmlNewTextChild(ptr, ns_record, BAD_CAST ind_str, BAD_CAST ind_val);
956 WRBUF subfield_name = wrbuf_alloc();
957 for (s = n->u.datafield.subfields; s; s = s->next)
959 xmlNode *ptr_subfield;
960 size_t using_code_len = get_subfield_len(mt, s->code_data,
962 wrbuf_rewind(wr_cdata);
963 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
964 marc_iconv_reset(mt, wr_cdata);
967 ptr_subfield = xmlNewTextChild(
969 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
970 wrbuf_rewind(wr_cdata);
971 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
972 xmlNewProp(ptr_subfield, BAD_CAST "code",
973 BAD_CAST wrbuf_cstr(wr_cdata));
975 else { // Turbo format
976 wrbuf_rewind(subfield_name);
977 wrbuf_puts(subfield_name, "s");
978 // TODO Map special codes to something possible for XML ELEMENT names
979 if ((s->code_data[0] >= '0' && s->code_data[0] <= '9') ||
980 (s->code_data[0] >= 'a' && s->code_data[0] <= 'z') ||
981 (s->code_data[0] >= 'A' && s->code_data[0] <= 'Z'))
983 wrbuf_iconv_write(subfield_name, mt->iconv_cd,s->code_data, using_code_len);
986 char buffer[2*using_code_len + 1];
988 for (index = 0; index < using_code_len; index++) {
989 sprintf(buffer + 2*index, "%02X", (unsigned char) s->code_data[index] & 0xFF);
991 buffer[2*(index+1)] = 0;
992 wrbuf_puts(subfield_name, "-");
993 wrbuf_puts(subfield_name, buffer);
994 yaz_log(YLOG_WARN, "Using numeric value in element name: %s", buffer);
996 ptr_subfield = xmlNewTextChild(ptr, ns_record,
997 BAD_CAST wrbuf_cstr(subfield_name),
998 BAD_CAST wrbuf_cstr(wr_cdata));
1001 wrbuf_destroy(subfield_name);
1004 int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
1009 struct yaz_marc_node *n;
1010 int identifier_length;
1011 const char *leader = 0;
1012 xmlNode *record_ptr;
1015 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
1016 for (n = mt->nodes; n; n = n->next)
1017 if (n->which == YAZ_MARC_LEADER)
1019 leader = n->u.leader;
1025 if (!atoi_n_check(leader+11, 1, &identifier_length))
1028 wr_cdata = wrbuf_alloc();
1030 record_ptr = xmlNewNode(0, BAD_CAST "r");
1031 *root_ptr = record_ptr;
1033 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1034 xmlSetNs(record_ptr, ns_record);
1037 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1039 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1040 for (n = mt->nodes; n; n = n->next)
1042 struct yaz_marc_subfield *s;
1047 case YAZ_MARC_DATAFIELD:
1048 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
1050 case YAZ_MARC_CONTROLFIELD:
1051 wrbuf_rewind(wr_cdata);
1052 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1053 marc_iconv_reset(mt, wr_cdata);
1056 ptr = xmlNewTextChild(record_ptr, ns_record,
1057 BAD_CAST "controlfield",
1058 BAD_CAST wrbuf_cstr(wr_cdata));
1059 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1062 // TODO required iconv?
1065 strncpy(field + 1, n->u.controlfield.tag, 3);
1067 ptr = xmlNewTextChild(record_ptr, ns_record,
1069 BAD_CAST wrbuf_cstr(wr_cdata));
1073 case YAZ_MARC_COMMENT:
1074 ptr = xmlNewComment(BAD_CAST n->u.comment);
1075 xmlAddChild(record_ptr, ptr);
1077 case YAZ_MARC_LEADER:
1079 char *field = "leader";
1082 xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
1083 BAD_CAST n->u.leader);
1088 wrbuf_destroy(wr_cdata);
1093 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
1098 struct yaz_marc_node *n;
1099 int identifier_length;
1100 const char *leader = 0;
1101 xmlNode *record_ptr;
1105 for (n = mt->nodes; n; n = n->next)
1106 if (n->which == YAZ_MARC_LEADER)
1108 leader = n->u.leader;
1114 if (!atoi_n_check(leader+11, 1, &identifier_length))
1117 wr_cdata = wrbuf_alloc();
1119 record_ptr = xmlNewNode(0, BAD_CAST "record");
1120 *root_ptr = record_ptr;
1122 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1123 xmlSetNs(record_ptr, ns_record);
1126 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1128 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1129 for (n = mt->nodes; n; n = n->next)
1131 struct yaz_marc_subfield *s;
1136 case YAZ_MARC_DATAFIELD:
1137 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1138 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1139 if (n->u.datafield.indicator)
1142 for (i = 0; n->u.datafield.indicator[i]; i++)
1147 sprintf(ind_str, "ind%d", i+1);
1148 ind_val[0] = n->u.datafield.indicator[i];
1150 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1153 for (s = n->u.datafield.subfields; s; s = s->next)
1155 xmlNode *ptr_subfield;
1156 size_t using_code_len = get_subfield_len(mt, s->code_data,
1158 wrbuf_rewind(wr_cdata);
1159 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1160 s->code_data + using_code_len);
1161 marc_iconv_reset(mt, wr_cdata);
1162 ptr_subfield = xmlNewTextChild(
1164 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1166 wrbuf_rewind(wr_cdata);
1167 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1168 s->code_data, using_code_len);
1169 xmlNewProp(ptr_subfield, BAD_CAST "code",
1170 BAD_CAST wrbuf_cstr(wr_cdata));
1173 case YAZ_MARC_CONTROLFIELD:
1174 wrbuf_rewind(wr_cdata);
1175 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1176 marc_iconv_reset(mt, wr_cdata);
1178 ptr = xmlNewTextChild(record_ptr, ns_record,
1179 BAD_CAST "controlfield",
1180 BAD_CAST wrbuf_cstr(wr_cdata));
1182 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1184 case YAZ_MARC_COMMENT:
1185 ptr = xmlNewComment(BAD_CAST n->u.comment);
1186 xmlAddChild(record_ptr, ptr);
1188 case YAZ_MARC_LEADER:
1189 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1190 BAD_CAST n->u.leader);
1194 wrbuf_destroy(wr_cdata);
1203 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1205 struct yaz_marc_node *n;
1206 int indicator_length;
1207 int identifier_length;
1208 int length_data_entry;
1209 int length_starting;
1210 int length_implementation;
1211 int data_offset = 0;
1212 const char *leader = 0;
1213 WRBUF wr_dir, wr_head, wr_data_tmp;
1216 for (n = mt->nodes; n; n = n->next)
1217 if (n->which == YAZ_MARC_LEADER)
1218 leader = n->u.leader;
1222 if (!atoi_n_check(leader+10, 1, &indicator_length))
1224 if (!atoi_n_check(leader+11, 1, &identifier_length))
1226 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1228 if (!atoi_n_check(leader+21, 1, &length_starting))
1230 if (!atoi_n_check(leader+22, 1, &length_implementation))
1233 wr_data_tmp = wrbuf_alloc();
1234 wr_dir = wrbuf_alloc();
1235 for (n = mt->nodes; n; n = n->next)
1237 int data_length = 0;
1238 struct yaz_marc_subfield *s;
1242 case YAZ_MARC_DATAFIELD:
1243 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1244 data_length += indicator_length;
1245 wrbuf_rewind(wr_data_tmp);
1246 for (s = n->u.datafield.subfields; s; s = s->next)
1248 /* write dummy IDFS + content */
1249 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1250 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1251 marc_iconv_reset(mt, wr_data_tmp);
1253 /* write dummy FS (makes MARC-8 to become ASCII) */
1254 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1255 marc_iconv_reset(mt, wr_data_tmp);
1256 data_length += wrbuf_len(wr_data_tmp);
1258 case YAZ_MARC_CONTROLFIELD:
1259 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1261 wrbuf_rewind(wr_data_tmp);
1262 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1263 n->u.controlfield.data);
1264 marc_iconv_reset(mt, wr_data_tmp);
1265 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1266 marc_iconv_reset(mt, wr_data_tmp);
1267 data_length += wrbuf_len(wr_data_tmp);
1269 case YAZ_MARC_COMMENT:
1271 case YAZ_MARC_LEADER:
1276 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1277 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1278 data_offset += data_length;
1281 /* mark end of directory */
1282 wrbuf_putc(wr_dir, ISO2709_FS);
1284 /* base address of data (comes after leader+directory) */
1285 base_address = 24 + wrbuf_len(wr_dir);
1287 wr_head = wrbuf_alloc();
1289 /* write record length */
1290 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1291 /* from "original" leader */
1292 wrbuf_write(wr_head, leader+5, 7);
1293 /* base address of data */
1294 wrbuf_printf(wr_head, "%05d", base_address);
1295 /* from "original" leader */
1296 wrbuf_write(wr_head, leader+17, 7);
1298 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1299 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1300 wrbuf_destroy(wr_head);
1301 wrbuf_destroy(wr_dir);
1302 wrbuf_destroy(wr_data_tmp);
1304 for (n = mt->nodes; n; n = n->next)
1306 struct yaz_marc_subfield *s;
1310 case YAZ_MARC_DATAFIELD:
1311 wrbuf_printf(wr, "%.*s", indicator_length,
1312 n->u.datafield.indicator);
1313 for (s = n->u.datafield.subfields; s; s = s->next)
1315 wrbuf_putc(wr, ISO2709_IDFS);
1316 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1317 marc_iconv_reset(mt, wr);
1319 wrbuf_putc(wr, ISO2709_FS);
1321 case YAZ_MARC_CONTROLFIELD:
1322 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1323 marc_iconv_reset(mt, wr);
1324 wrbuf_putc(wr, ISO2709_FS);
1326 case YAZ_MARC_COMMENT:
1328 case YAZ_MARC_LEADER:
1332 wrbuf_printf(wr, "%c", ISO2709_RS);
1337 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1339 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1342 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1344 return -1; /* error */
1345 return r; /* OK, return length > 0 */
1348 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1349 const char **result, size_t *rsize)
1353 wrbuf_rewind(mt->m_wr);
1354 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1356 *result = wrbuf_cstr(mt->m_wr);
1358 *rsize = wrbuf_len(mt->m_wr);
1362 void yaz_marc_set_read_format(yaz_marc_t mt, int format)
1365 mt->input_format = format;
1368 int yaz_marc_get_read_format(yaz_marc_t mt)
1371 return mt->input_format;
1376 void yaz_marc_set_write_format(yaz_marc_t mt, int format)
1379 mt->output_format = format;
1381 // Force using libxml2
1382 if (mt->output_format == YAZ_MARC_TMARCXML)
1383 mt->write_using_libxml2 = 1;
1388 int yaz_marc_get_write_format(yaz_marc_t mt)
1391 return mt->output_format;
1397 * Deprecated, use yaz_marc_set_write_format
1399 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1401 yaz_marc_set_write_format(mt, xmlmode);
1406 void yaz_marc_debug(yaz_marc_t mt, int level)
1412 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1417 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1419 return mt->iconv_cd;
1422 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1424 struct yaz_marc_node *n;
1426 for (n = mt->nodes; n; n = n->next)
1427 if (n->which == YAZ_MARC_LEADER)
1429 leader = n->u.leader;
1430 memcpy(leader+off, str, strlen(str));
1435 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1437 xfree(mt->leader_spec);
1438 mt->leader_spec = 0;
1441 char dummy_leader[24];
1442 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1444 mt->leader_spec = xstrdup(leader_spec);
1449 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1451 const char *cp = leader_spec;
1456 int no_read = 0, no = 0;
1458 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1459 if (no < 2 || no_read < 3)
1461 if (pos < 0 || (size_t) pos >= size)
1466 const char *vp = strchr(val+1, '\'');
1472 if (len + pos > size)
1474 memcpy(leader + pos, val+1, len);
1476 else if (*val >= '0' && *val <= '9')
1492 int yaz_marc_decode_formatstr(const char *arg)
1495 if (!strcmp(arg, "marc"))
1496 mode = YAZ_MARC_ISO2709;
1497 if (!strcmp(arg, "marcxml"))
1498 mode = YAZ_MARC_MARCXML;
1499 if (!strcmp(arg, "tmarcxml"))
1500 mode = YAZ_MARC_TMARCXML;
1501 if (!strcmp(arg, "marcxchange"))
1502 mode = YAZ_MARC_XCHANGE;
1503 if (!strcmp(arg, "line"))
1504 mode = YAZ_MARC_LINE;
1508 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1510 mt->write_using_libxml2 = enable;
1513 int yaz_marc_is_turbo_format(yaz_marc_t mt)
1515 return mt->output_format == YAZ_MARC_TMARCXML;
1522 * c-file-style: "Stroustrup"
1523 * indent-tabs-mode: nil
1525 * vim: shiftwidth=4 tabstop=8 expandtab