1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
35 enum yaz_collection_state {
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
45 YAZ_MARC_CONTROLFIELD,
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
54 struct yaz_marc_subfield *subfields;
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
63 /** \brief a comment node */
64 struct yaz_marc_comment {
68 /** \brief MARC node */
69 struct yaz_marc_node {
70 enum YAZ_MARC_NODE_TYPE which;
72 struct yaz_marc_datafield datafield;
73 struct yaz_marc_controlfield controlfield;
77 struct yaz_marc_node *next;
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
83 struct yaz_marc_subfield *next;
86 /** \brief the internals of a yaz_marc_t handle */
93 int write_using_libxml2;
94 enum yaz_collection_state enable_collection;
99 struct yaz_marc_node *nodes;
100 struct yaz_marc_node **nodes_pp;
101 struct yaz_marc_subfield **subfield_pp;
104 yaz_marc_t yaz_marc_create(void)
106 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
107 mt->output_format = YAZ_MARC_LINE;
109 mt->write_using_libxml2 = 0;
110 mt->enable_collection = no_collection;
111 mt->m_wr = wrbuf_alloc();
114 strcpy(mt->subfield_str, " $");
115 strcpy(mt->endline_str, "\n");
117 mt->nmem = nmem_create();
122 void yaz_marc_destroy(yaz_marc_t mt)
126 nmem_destroy(mt->nmem);
127 wrbuf_destroy(mt->m_wr);
128 xfree(mt->leader_spec);
132 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
137 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
139 wrbuf_iconv_reset(wr, mt->iconv_cd);
142 static int marc_exec_leader(const char *leader_spec, char *leader,
146 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
148 struct yaz_marc_node *n = (struct yaz_marc_node *)
149 nmem_malloc(mt->nmem, sizeof(*n));
152 mt->nodes_pp = &n->next;
157 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
158 const xmlNode *ptr_data)
160 struct yaz_marc_node *n = yaz_marc_add_node(mt);
161 n->which = YAZ_MARC_CONTROLFIELD;
162 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
163 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
166 void yaz_marc_add_controlfield_turbo_xml(yaz_marc_t mt, char *tag,
167 const xmlNode *ptr_data)
169 struct yaz_marc_node *n = yaz_marc_add_node(mt);
170 n->which = YAZ_MARC_CONTROLFIELD;
171 n->u.controlfield.tag = tag;
172 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
178 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
180 struct yaz_marc_node *n = yaz_marc_add_node(mt);
181 n->which = YAZ_MARC_COMMENT;
182 n->u.comment = nmem_strdup(mt->nmem, comment);
185 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
191 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
192 yaz_marc_add_comment(mt, buf);
196 int yaz_marc_get_debug(yaz_marc_t mt)
201 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
203 struct yaz_marc_node *n = yaz_marc_add_node(mt);
204 n->which = YAZ_MARC_LEADER;
205 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
206 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
209 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
210 const char *data, size_t data_len)
212 struct yaz_marc_node *n = yaz_marc_add_node(mt);
213 n->which = YAZ_MARC_CONTROLFIELD;
214 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
215 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
221 sprintf(msg, "controlfield:");
222 for (i = 0; i < 16 && i < data_len; i++)
223 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
225 sprintf(msg + strlen(msg), " ..");
226 yaz_marc_add_comment(mt, msg);
230 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
231 const char *indicator, size_t indicator_len)
233 struct yaz_marc_node *n = yaz_marc_add_node(mt);
234 n->which = YAZ_MARC_DATAFIELD;
235 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
236 n->u.datafield.indicator =
237 nmem_strdupn(mt->nmem, indicator, indicator_len);
238 n->u.datafield.subfields = 0;
240 /* make subfield_pp the current (last one) */
241 mt->subfield_pp = &n->u.datafield.subfields;
244 // Magic function: adds a attribute value to the element name if it is plain characters.
245 // if not, and if the attribute name is not null, it will append a attribute element with the value
246 // if attribute name is null it will return a non-zero value meaning it couldnt handle the value.
248 int element_name_append_attribute_value(yaz_marc_t mt, WRBUF buffer, const char *attribute_name, char *code_data, size_t code_len) {
249 // TODO Map special codes to something possible for XML ELEMENT names
253 for (index = 0; index < code_len; index++) {
254 if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
255 (code_data[index] >= 'a' && code_data[index] <= 'z') ||
256 (code_data[index] >= 'A' && code_data[index] <= 'Z')))
261 if (encode && attribute_name)
262 wrbuf_printf(buffer, " %s=\"", attribute_name);
264 if (!encode || attribute_name)
265 wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len);
269 if (encode && attribute_name)
270 wrbuf_printf(buffer, "\""); // return error if we couldn't handle it.
275 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
276 const char *indicator, size_t indicator_len)
278 struct yaz_marc_node *n = yaz_marc_add_node(mt);
279 n->which = YAZ_MARC_DATAFIELD;
280 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
281 n->u.datafield.indicator =
282 nmem_strdupn(mt->nmem, indicator, indicator_len);
283 n->u.datafield.subfields = 0;
285 /* make subfield_pp the current (last one) */
286 mt->subfield_pp = &n->u.datafield.subfields;
289 void yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value, char *indicators)
291 struct yaz_marc_node *n = yaz_marc_add_node(mt);
292 n->which = YAZ_MARC_DATAFIELD;
293 n->u.datafield.tag = tag_value;
294 n->u.datafield.indicator = indicators;
295 n->u.datafield.subfields = 0;
297 // make subfield_pp the current (last one)
298 mt->subfield_pp = &n->u.datafield.subfields;
301 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
303 n->u.datafield.indicator = indicator;
308 void yaz_marc_add_subfield(yaz_marc_t mt,
309 const char *code_data, size_t code_data_len)
316 sprintf(msg, "subfield:");
317 for (i = 0; i < 16 && i < code_data_len; i++)
318 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
319 if (i < code_data_len)
320 sprintf(msg + strlen(msg), " ..");
321 yaz_marc_add_comment(mt, msg);
326 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
327 nmem_malloc(mt->nmem, sizeof(*n));
328 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
330 /* mark subfield_pp to point to this one, so we append here next */
331 *mt->subfield_pp = n;
332 mt->subfield_pp = &n->next;
336 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
337 int *indicator_length,
338 int *identifier_length,
340 int *length_data_entry,
341 int *length_starting,
342 int *length_implementation)
346 memcpy(leader, leader_c, 24);
348 if (!atoi_n_check(leader+10, 1, indicator_length))
351 "Indicator length at offset 10 should hold a digit."
354 *indicator_length = 2;
356 if (!atoi_n_check(leader+11, 1, identifier_length))
359 "Identifier length at offset 11 should hold a digit."
362 *identifier_length = 2;
364 if (!atoi_n_check(leader+12, 5, base_address))
367 "Base address at offsets 12..16 should hold a number."
371 if (!atoi_n_check(leader+20, 1, length_data_entry))
374 "Length data entry at offset 20 should hold a digit."
376 *length_data_entry = 4;
379 if (!atoi_n_check(leader+21, 1, length_starting))
382 "Length starting at offset 21 should hold a digit."
384 *length_starting = 5;
387 if (!atoi_n_check(leader+22, 1, length_implementation))
390 "Length implementation at offset 22 should hold a digit."
392 *length_implementation = 0;
398 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
399 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
400 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
401 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
402 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
403 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
405 yaz_marc_add_leader(mt, leader, 24);
408 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
410 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
411 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
414 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
416 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
417 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
420 /* try to guess how many bytes the identifier really is! */
421 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
426 for (i = 1; i<5; i++)
429 size_t outbytesleft = sizeof(outbuf);
431 const char *inp = buf;
433 size_t inbytesleft = i;
434 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
435 &outp, &outbytesleft);
436 if (r != (size_t) (-1))
437 return i; /* got a complete sequence */
439 return 1; /* giving up */
441 return 1; /* we don't know */
444 void yaz_marc_reset(yaz_marc_t mt)
446 nmem_reset(mt->nmem);
448 mt->nodes_pp = &mt->nodes;
452 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
454 struct yaz_marc_node *n;
455 int identifier_length;
456 const char *leader = 0;
458 for (n = mt->nodes; n; n = n->next)
459 if (n->which == YAZ_MARC_LEADER)
461 leader = n->u.leader;
467 if (!atoi_n_check(leader+11, 1, &identifier_length))
470 for (n = mt->nodes; n; n = n->next)
474 case YAZ_MARC_COMMENT:
475 wrbuf_iconv_write(wr, mt->iconv_cd,
476 n->u.comment, strlen(n->u.comment));
477 wrbuf_puts(wr, "\n");
486 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
487 int identifier_length)
489 /* if identifier length is 2 (most MARCs) or less (probably an error),
490 the code is a single character .. However we've
491 seen multibyte codes, so see how big it really is */
492 if (identifier_length > 2)
493 return identifier_length - 1;
495 return cdata_one_character(mt, data);
498 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
500 struct yaz_marc_node *n;
501 int identifier_length;
502 const char *leader = 0;
504 for (n = mt->nodes; n; n = n->next)
505 if (n->which == YAZ_MARC_LEADER)
507 leader = n->u.leader;
513 if (!atoi_n_check(leader+11, 1, &identifier_length))
516 for (n = mt->nodes; n; n = n->next)
518 struct yaz_marc_subfield *s;
521 case YAZ_MARC_DATAFIELD:
522 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
523 n->u.datafield.indicator);
524 for (s = n->u.datafield.subfields; s; s = s->next)
526 size_t using_code_len = get_subfield_len(mt, s->code_data,
529 wrbuf_puts (wr, mt->subfield_str);
530 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
532 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
533 wrbuf_iconv_puts(wr, mt->iconv_cd,
534 s->code_data + using_code_len);
535 marc_iconv_reset(mt, wr);
537 wrbuf_puts (wr, mt->endline_str);
539 case YAZ_MARC_CONTROLFIELD:
540 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
541 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
542 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
543 marc_iconv_reset(mt, wr);
544 wrbuf_puts (wr, mt->endline_str);
546 case YAZ_MARC_COMMENT:
548 wrbuf_iconv_write(wr, mt->iconv_cd,
549 n->u.comment, strlen(n->u.comment));
550 marc_iconv_reset(mt, wr);
551 wrbuf_puts(wr, ")\n");
553 case YAZ_MARC_LEADER:
554 wrbuf_printf(wr, "%s\n", n->u.leader);
557 wrbuf_puts(wr, "\n");
561 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
563 if (mt->enable_collection == collection_second)
565 switch(mt->output_format)
567 case YAZ_MARC_MARCXML:
568 case YAZ_MARC_TMARCXML:
569 wrbuf_printf(wr, "</collection>\n");
571 case YAZ_MARC_XCHANGE:
572 wrbuf_printf(wr, "</collection>\n");
579 void yaz_marc_enable_collection(yaz_marc_t mt)
581 mt->enable_collection = collection_first;
584 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
586 switch(mt->output_format)
589 return yaz_marc_write_line(mt, wr);
590 case YAZ_MARC_MARCXML:
591 case YAZ_MARC_TMARCXML:
592 return yaz_marc_write_marcxml(mt, wr);
593 case YAZ_MARC_XCHANGE:
594 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
595 case YAZ_MARC_ISO2709:
596 return yaz_marc_write_iso2709(mt, wr);
598 return yaz_marc_write_check(mt, wr);
603 const char *collection_name[2] = { "collection", "collection"};
604 const char *record_name[2] = { "record", "r"};
605 const char *leader_name[2] = { "leader", "l"};
606 const char *controlfield_name[2]= { "controlfield", "c"};
607 const char *datafield_name[2] = { "datafield", "d"};
608 const char *indicator_name[2] = { "ind", "i"};
609 const char *subfield_name[2] = { "subfield", "s"};
612 /** \brief common MARC XML/Xchange writer
614 \param wr WRBUF output
615 \param ns XMLNS for the elements
616 \param format record format (e.g. "MARC21")
617 \param type record type (e.g. "Bibliographic")
619 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
624 struct yaz_marc_node *n;
625 int identifier_length;
626 const char *leader = 0;
628 int turbo = yaz_marc_get_write_format(mt) == YAZ_MARC_TMARCXML;
630 for (n = mt->nodes; n; n = n->next)
631 if (n->which == YAZ_MARC_LEADER)
633 leader = n->u.leader;
639 if (!atoi_n_check(leader+11, 1, &identifier_length))
642 if (mt->enable_collection != no_collection)
644 if (mt->enable_collection == collection_first) {
645 wrbuf_printf(wr, "<%s xmlns=\"%s\">\n", collection_name[turbo], ns);
646 mt->enable_collection = collection_second;
648 wrbuf_printf(wr, "<%s", record_name[turbo]);
652 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
655 wrbuf_printf(wr, " format=\"%.80s\"", format);
657 wrbuf_printf(wr, " type=\"%.80s\"", type);
658 wrbuf_printf(wr, ">\n");
659 for (n = mt->nodes; n; n = n->next)
661 struct yaz_marc_subfield *s;
665 case YAZ_MARC_DATAFIELD:
667 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
669 wrbuf_printf(wr, " tag=\"");
670 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
671 strlen(n->u.datafield.tag));
673 wrbuf_printf(wr, "\"");
674 if (n->u.datafield.indicator)
677 for (i = 0; n->u.datafield.indicator[i]; i++)
679 wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
680 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
681 n->u.datafield.indicator+i, 1);
682 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
685 wrbuf_printf(wr, ">\n");
686 for (s = n->u.datafield.subfields; s; s = s->next)
688 size_t using_code_len = get_subfield_len(mt, s->code_data,
690 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
692 wrbuf_printf(wr, " code=\"");
693 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
694 s->code_data, using_code_len);
695 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
697 element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
700 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
701 s->code_data + using_code_len,
702 strlen(s->code_data + using_code_len));
703 marc_iconv_reset(mt, wr);
704 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
706 element_name_append_attribute_value(mt, wr, 0, s->code_data, using_code_len);
707 wrbuf_puts(wr, ">\n");
709 wrbuf_printf(wr, " </%s", datafield_name[turbo]);
712 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
713 strlen(n->u.datafield.tag));
714 wrbuf_printf(wr, ">\n", datafield_name[turbo]);
716 case YAZ_MARC_CONTROLFIELD:
717 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
719 wrbuf_printf(wr, " tag=\"");
720 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
721 strlen(n->u.controlfield.tag));
722 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
725 //TODO convert special
726 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
727 strlen(n->u.controlfield.tag));
728 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
730 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
731 n->u.controlfield.data,
732 strlen(n->u.controlfield.data));
733 marc_iconv_reset(mt, wr);
734 wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
735 //TODO convert special
737 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
738 strlen(n->u.controlfield.tag));
739 wrbuf_puts(wr, ">\n");
741 case YAZ_MARC_COMMENT:
742 wrbuf_printf(wr, "<!-- ");
743 wrbuf_puts(wr, n->u.comment);
744 wrbuf_printf(wr, " -->\n");
746 case YAZ_MARC_LEADER:
747 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
748 wrbuf_iconv_write_cdata(wr,
749 0 , /* no charset conversion for leader */
750 n->u.leader, strlen(n->u.leader));
751 wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
754 wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
758 static int yaz_marc_write_marcxml_ns2(yaz_marc_t mt, WRBUF wr,
763 struct yaz_marc_node *n;
764 int identifier_length;
765 const char *leader = 0;
767 for (n = mt->nodes; n; n = n->next)
768 if (n->which == YAZ_MARC_LEADER)
770 leader = n->u.leader;
776 if (!atoi_n_check(leader+11, 1, &identifier_length))
779 if (mt->enable_collection != no_collection)
781 if (mt->enable_collection == collection_first)
782 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
783 mt->enable_collection = collection_second;
784 wrbuf_printf(wr, "<record");
788 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
791 wrbuf_printf(wr, " format=\"%.80s\"", format);
793 wrbuf_printf(wr, " type=\"%.80s\"", type);
794 wrbuf_printf(wr, ">\n");
795 for (n = mt->nodes; n; n = n->next)
797 struct yaz_marc_subfield *s;
801 case YAZ_MARC_DATAFIELD:
802 wrbuf_printf(wr, " <datafield tag=\"");
803 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
804 strlen(n->u.datafield.tag));
805 wrbuf_printf(wr, "\"");
806 if (n->u.datafield.indicator)
809 for (i = 0; n->u.datafield.indicator[i]; i++)
811 wrbuf_printf(wr, " ind%d=\"", i+1);
812 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
813 n->u.datafield.indicator+i, 1);
814 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
817 wrbuf_printf(wr, ">\n");
818 for (s = n->u.datafield.subfields; s; s = s->next)
820 size_t using_code_len = get_subfield_len(mt, s->code_data,
822 wrbuf_iconv_puts(wr, mt->iconv_cd, " <subfield code=\"");
823 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
824 s->code_data, using_code_len);
825 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
826 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
827 s->code_data + using_code_len,
828 strlen(s->code_data + using_code_len));
829 marc_iconv_reset(mt, wr);
830 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
831 wrbuf_puts(wr, "\n");
833 wrbuf_printf(wr, " </datafield>\n");
835 case YAZ_MARC_CONTROLFIELD:
836 wrbuf_printf(wr, " <controlfield tag=\"");
837 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
838 strlen(n->u.controlfield.tag));
839 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
840 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
841 n->u.controlfield.data,
842 strlen(n->u.controlfield.data));
844 marc_iconv_reset(mt, wr);
845 wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
846 wrbuf_puts(wr, "\n");
848 case YAZ_MARC_COMMENT:
849 wrbuf_printf(wr, "<!-- ");
850 wrbuf_puts(wr, n->u.comment);
851 wrbuf_printf(wr, " -->\n");
853 case YAZ_MARC_LEADER:
854 wrbuf_printf(wr, " <leader>");
855 wrbuf_iconv_write_cdata(wr,
856 0 /* no charset conversion for leader */,
857 n->u.leader, strlen(n->u.leader));
858 wrbuf_printf(wr, "</leader>\n");
861 wrbuf_puts(wr, "</record>\n");
866 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
871 if (mt->write_using_libxml2)
877 if (yaz_marc_get_write_format(mt) == YAZ_MARC_MARCXML)
878 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
879 else // Check for Turbo XML
880 ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type);
884 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
887 xmlDocSetRootElement(doc, root_ptr);
888 xmlDocDumpMemory(doc, &buf_out, &len_out);
890 wrbuf_write(wr, (const char *) buf_out, len_out);
901 return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
904 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
906 /* set leader 09 to 'a' for UNICODE */
907 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
908 if (!mt->leader_spec)
909 yaz_marc_modify_leader(mt, 9, "a");
910 char *name_space = "http://www.loc.gov/MARC21/slim";
911 if (mt->output_format == YAZ_MARC_TMARCXML)
912 name_space = "http://www.indexdata.com/MARC21/turboxml";
913 return yaz_marc_write_marcxml_ns(mt, wr, name_space,
917 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
921 return yaz_marc_write_marcxml_ns(mt, wr,
922 "info:lc/xmlns/marcxchange-v1",
928 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length)
931 struct yaz_marc_subfield *s;
932 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
934 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
935 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
938 //TODO consider if safe
941 strncpy(field + 1, n->u.datafield.tag, 3);
943 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
945 if (n->u.datafield.indicator)
948 for (i = 0; n->u.datafield.indicator[i]; i++)
953 ind_val[0] = n->u.datafield.indicator[i];
955 sprintf(ind_str, "%s%d", indicator_name[turbo], i+1);
956 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
959 WRBUF subfield_name = wrbuf_alloc();
960 for (s = n->u.datafield.subfields; s; s = s->next)
962 xmlNode *ptr_subfield;
963 size_t using_code_len = get_subfield_len(mt, s->code_data,
965 wrbuf_rewind(wr_cdata);
966 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
967 marc_iconv_reset(mt, wr_cdata);
970 ptr_subfield = xmlNewTextChild(
972 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
973 // Generate code attribute value and add
974 wrbuf_rewind(wr_cdata);
975 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
976 xmlNewProp(ptr_subfield, BAD_CAST "code",
977 BAD_CAST wrbuf_cstr(wr_cdata));
979 else { // Turbo format
980 wrbuf_rewind(subfield_name);
981 wrbuf_puts(subfield_name, "s");
982 int not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0;
983 ptr_subfield = xmlNewTextChild(ptr, ns_record,
984 BAD_CAST wrbuf_cstr(subfield_name),
985 BAD_CAST wrbuf_cstr(wr_cdata));
987 // Generate code attribute value and add
988 wrbuf_rewind(wr_cdata);
989 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
990 xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata));
994 wrbuf_destroy(subfield_name);
997 int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
1002 struct yaz_marc_node *n;
1003 int identifier_length;
1004 const char *leader = 0;
1005 xmlNode *record_ptr;
1008 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
1009 for (n = mt->nodes; n; n = n->next)
1010 if (n->which == YAZ_MARC_LEADER)
1012 leader = n->u.leader;
1018 if (!atoi_n_check(leader+11, 1, &identifier_length))
1021 wr_cdata = wrbuf_alloc();
1023 record_ptr = xmlNewNode(0, BAD_CAST "r");
1024 *root_ptr = record_ptr;
1026 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1027 xmlSetNs(record_ptr, ns_record);
1030 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1032 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1033 for (n = mt->nodes; n; n = n->next)
1035 struct yaz_marc_subfield *s;
1040 case YAZ_MARC_DATAFIELD:
1041 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
1043 case YAZ_MARC_CONTROLFIELD:
1044 wrbuf_rewind(wr_cdata);
1045 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1046 marc_iconv_reset(mt, wr_cdata);
1049 ptr = xmlNewTextChild(record_ptr, ns_record,
1050 BAD_CAST "controlfield",
1051 BAD_CAST wrbuf_cstr(wr_cdata));
1052 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1055 // TODO required iconv?
1058 strncpy(field + 1, n->u.controlfield.tag, 3);
1060 ptr = xmlNewTextChild(record_ptr, ns_record,
1062 BAD_CAST wrbuf_cstr(wr_cdata));
1066 case YAZ_MARC_COMMENT:
1067 ptr = xmlNewComment(BAD_CAST n->u.comment);
1068 xmlAddChild(record_ptr, ptr);
1070 case YAZ_MARC_LEADER:
1072 char *field = "leader";
1075 xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
1076 BAD_CAST n->u.leader);
1081 wrbuf_destroy(wr_cdata);
1086 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
1091 struct yaz_marc_node *n;
1092 int identifier_length;
1093 const char *leader = 0;
1094 xmlNode *record_ptr;
1098 for (n = mt->nodes; n; n = n->next)
1099 if (n->which == YAZ_MARC_LEADER)
1101 leader = n->u.leader;
1107 if (!atoi_n_check(leader+11, 1, &identifier_length))
1110 wr_cdata = wrbuf_alloc();
1112 record_ptr = xmlNewNode(0, BAD_CAST "record");
1113 *root_ptr = record_ptr;
1115 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1116 xmlSetNs(record_ptr, ns_record);
1119 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1121 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1122 for (n = mt->nodes; n; n = n->next)
1124 struct yaz_marc_subfield *s;
1129 case YAZ_MARC_DATAFIELD:
1130 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1131 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1132 if (n->u.datafield.indicator)
1135 for (i = 0; n->u.datafield.indicator[i]; i++)
1140 sprintf(ind_str, "ind%d", i+1);
1141 ind_val[0] = n->u.datafield.indicator[i];
1143 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1146 for (s = n->u.datafield.subfields; s; s = s->next)
1148 xmlNode *ptr_subfield;
1149 size_t using_code_len = get_subfield_len(mt, s->code_data,
1151 wrbuf_rewind(wr_cdata);
1152 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1153 s->code_data + using_code_len);
1154 marc_iconv_reset(mt, wr_cdata);
1155 ptr_subfield = xmlNewTextChild(
1157 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1159 wrbuf_rewind(wr_cdata);
1160 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1161 s->code_data, using_code_len);
1162 xmlNewProp(ptr_subfield, BAD_CAST "code",
1163 BAD_CAST wrbuf_cstr(wr_cdata));
1166 case YAZ_MARC_CONTROLFIELD:
1167 wrbuf_rewind(wr_cdata);
1168 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1169 marc_iconv_reset(mt, wr_cdata);
1171 ptr = xmlNewTextChild(record_ptr, ns_record,
1172 BAD_CAST "controlfield",
1173 BAD_CAST wrbuf_cstr(wr_cdata));
1175 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1177 case YAZ_MARC_COMMENT:
1178 ptr = xmlNewComment(BAD_CAST n->u.comment);
1179 xmlAddChild(record_ptr, ptr);
1181 case YAZ_MARC_LEADER:
1182 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1183 BAD_CAST n->u.leader);
1187 wrbuf_destroy(wr_cdata);
1196 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1198 struct yaz_marc_node *n;
1199 int indicator_length;
1200 int identifier_length;
1201 int length_data_entry;
1202 int length_starting;
1203 int length_implementation;
1204 int data_offset = 0;
1205 const char *leader = 0;
1206 WRBUF wr_dir, wr_head, wr_data_tmp;
1209 for (n = mt->nodes; n; n = n->next)
1210 if (n->which == YAZ_MARC_LEADER)
1211 leader = n->u.leader;
1215 if (!atoi_n_check(leader+10, 1, &indicator_length))
1217 if (!atoi_n_check(leader+11, 1, &identifier_length))
1219 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1221 if (!atoi_n_check(leader+21, 1, &length_starting))
1223 if (!atoi_n_check(leader+22, 1, &length_implementation))
1226 wr_data_tmp = wrbuf_alloc();
1227 wr_dir = wrbuf_alloc();
1228 for (n = mt->nodes; n; n = n->next)
1230 int data_length = 0;
1231 struct yaz_marc_subfield *s;
1235 case YAZ_MARC_DATAFIELD:
1236 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1237 data_length += indicator_length;
1238 wrbuf_rewind(wr_data_tmp);
1239 for (s = n->u.datafield.subfields; s; s = s->next)
1241 /* write dummy IDFS + content */
1242 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1243 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1244 marc_iconv_reset(mt, wr_data_tmp);
1246 /* write dummy FS (makes MARC-8 to become ASCII) */
1247 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1248 marc_iconv_reset(mt, wr_data_tmp);
1249 data_length += wrbuf_len(wr_data_tmp);
1251 case YAZ_MARC_CONTROLFIELD:
1252 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1254 wrbuf_rewind(wr_data_tmp);
1255 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1256 n->u.controlfield.data);
1257 marc_iconv_reset(mt, wr_data_tmp);
1258 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1259 marc_iconv_reset(mt, wr_data_tmp);
1260 data_length += wrbuf_len(wr_data_tmp);
1262 case YAZ_MARC_COMMENT:
1264 case YAZ_MARC_LEADER:
1269 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1270 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1271 data_offset += data_length;
1274 /* mark end of directory */
1275 wrbuf_putc(wr_dir, ISO2709_FS);
1277 /* base address of data (comes after leader+directory) */
1278 base_address = 24 + wrbuf_len(wr_dir);
1280 wr_head = wrbuf_alloc();
1282 /* write record length */
1283 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1284 /* from "original" leader */
1285 wrbuf_write(wr_head, leader+5, 7);
1286 /* base address of data */
1287 wrbuf_printf(wr_head, "%05d", base_address);
1288 /* from "original" leader */
1289 wrbuf_write(wr_head, leader+17, 7);
1291 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1292 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1293 wrbuf_destroy(wr_head);
1294 wrbuf_destroy(wr_dir);
1295 wrbuf_destroy(wr_data_tmp);
1297 for (n = mt->nodes; n; n = n->next)
1299 struct yaz_marc_subfield *s;
1303 case YAZ_MARC_DATAFIELD:
1304 wrbuf_printf(wr, "%.*s", indicator_length,
1305 n->u.datafield.indicator);
1306 for (s = n->u.datafield.subfields; s; s = s->next)
1308 wrbuf_putc(wr, ISO2709_IDFS);
1309 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1310 marc_iconv_reset(mt, wr);
1312 wrbuf_putc(wr, ISO2709_FS);
1314 case YAZ_MARC_CONTROLFIELD:
1315 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1316 marc_iconv_reset(mt, wr);
1317 wrbuf_putc(wr, ISO2709_FS);
1319 case YAZ_MARC_COMMENT:
1321 case YAZ_MARC_LEADER:
1325 wrbuf_printf(wr, "%c", ISO2709_RS);
1330 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1332 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1335 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1337 return -1; /* error */
1338 return r; /* OK, return length > 0 */
1341 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1342 const char **result, size_t *rsize)
1346 wrbuf_rewind(mt->m_wr);
1347 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1349 *result = wrbuf_cstr(mt->m_wr);
1351 *rsize = wrbuf_len(mt->m_wr);
1355 void yaz_marc_set_read_format(yaz_marc_t mt, int format)
1358 mt->input_format = format;
1361 int yaz_marc_get_read_format(yaz_marc_t mt)
1364 return mt->input_format;
1369 void yaz_marc_set_write_format(yaz_marc_t mt, int format)
1372 mt->output_format = format;
1376 int yaz_marc_get_write_format(yaz_marc_t mt)
1379 return mt->output_format;
1385 * Deprecated, use yaz_marc_set_write_format
1387 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1389 yaz_marc_set_write_format(mt, xmlmode);
1394 void yaz_marc_debug(yaz_marc_t mt, int level)
1400 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1405 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1407 return mt->iconv_cd;
1410 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1412 struct yaz_marc_node *n;
1414 for (n = mt->nodes; n; n = n->next)
1415 if (n->which == YAZ_MARC_LEADER)
1417 leader = n->u.leader;
1418 memcpy(leader+off, str, strlen(str));
1423 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1425 xfree(mt->leader_spec);
1426 mt->leader_spec = 0;
1429 char dummy_leader[24];
1430 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1432 mt->leader_spec = xstrdup(leader_spec);
1437 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1439 const char *cp = leader_spec;
1444 int no_read = 0, no = 0;
1446 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1447 if (no < 2 || no_read < 3)
1449 if (pos < 0 || (size_t) pos >= size)
1454 const char *vp = strchr(val+1, '\'');
1460 if (len + pos > size)
1462 memcpy(leader + pos, val+1, len);
1464 else if (*val >= '0' && *val <= '9')
1480 int yaz_marc_decode_formatstr(const char *arg)
1483 if (!strcmp(arg, "marc"))
1484 mode = YAZ_MARC_ISO2709;
1485 if (!strcmp(arg, "marcxml"))
1486 mode = YAZ_MARC_MARCXML;
1487 if (!strcmp(arg, "tmarcxml"))
1488 mode = YAZ_MARC_TMARCXML;
1489 if (!strcmp(arg, "marcxchange"))
1490 mode = YAZ_MARC_XCHANGE;
1491 if (!strcmp(arg, "line"))
1492 mode = YAZ_MARC_LINE;
1496 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1498 mt->write_using_libxml2 = enable;
1501 int yaz_marc_is_turbo_format(yaz_marc_t mt)
1503 return mt->output_format == YAZ_MARC_TMARCXML;
1510 * c-file-style: "Stroustrup"
1511 * indent-tabs-mode: nil
1513 * vim: shiftwidth=4 tabstop=8 expandtab