1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
35 enum yaz_collection_state {
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
45 YAZ_MARC_CONTROLFIELD,
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
54 struct yaz_marc_subfield *subfields;
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
63 /** \brief a comment node */
64 struct yaz_marc_comment {
68 /** \brief MARC node */
69 struct yaz_marc_node {
70 enum YAZ_MARC_NODE_TYPE which;
72 struct yaz_marc_datafield datafield;
73 struct yaz_marc_controlfield controlfield;
77 struct yaz_marc_node *next;
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
83 struct yaz_marc_subfield *next;
86 /** \brief the internals of a yaz_marc_t handle */
93 int write_using_libxml2;
94 enum yaz_collection_state enable_collection;
99 struct yaz_marc_node *nodes;
100 struct yaz_marc_node **nodes_pp;
101 struct yaz_marc_subfield **subfield_pp;
104 yaz_marc_t yaz_marc_create(void)
106 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
107 mt->output_format = YAZ_MARC_LINE;
109 mt->write_using_libxml2 = 0;
110 mt->enable_collection = no_collection;
111 mt->m_wr = wrbuf_alloc();
114 strcpy(mt->subfield_str, " $");
115 strcpy(mt->endline_str, "\n");
117 mt->nmem = nmem_create();
122 void yaz_marc_destroy(yaz_marc_t mt)
126 nmem_destroy(mt->nmem);
127 wrbuf_destroy(mt->m_wr);
128 xfree(mt->leader_spec);
132 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
137 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
139 wrbuf_iconv_reset(wr, mt->iconv_cd);
142 static int marc_exec_leader(const char *leader_spec, char *leader,
146 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
148 struct yaz_marc_node *n = (struct yaz_marc_node *)
149 nmem_malloc(mt->nmem, sizeof(*n));
152 mt->nodes_pp = &n->next;
157 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
158 const xmlNode *ptr_data)
160 struct yaz_marc_node *n = yaz_marc_add_node(mt);
161 n->which = YAZ_MARC_CONTROLFIELD;
162 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
163 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
166 void yaz_marc_add_controlfield_turbo_xml(yaz_marc_t mt, char *tag,
167 const xmlNode *ptr_data)
169 struct yaz_marc_node *n = yaz_marc_add_node(mt);
170 n->which = YAZ_MARC_CONTROLFIELD;
171 n->u.controlfield.tag = tag;
172 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
178 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
180 struct yaz_marc_node *n = yaz_marc_add_node(mt);
181 n->which = YAZ_MARC_COMMENT;
182 n->u.comment = nmem_strdup(mt->nmem, comment);
185 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
191 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
192 yaz_marc_add_comment(mt, buf);
196 int yaz_marc_get_debug(yaz_marc_t mt)
201 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
203 struct yaz_marc_node *n = yaz_marc_add_node(mt);
204 n->which = YAZ_MARC_LEADER;
205 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
206 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
209 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
210 const char *data, size_t data_len)
212 struct yaz_marc_node *n = yaz_marc_add_node(mt);
213 n->which = YAZ_MARC_CONTROLFIELD;
214 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
215 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
221 sprintf(msg, "controlfield:");
222 for (i = 0; i < 16 && i < data_len; i++)
223 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
225 sprintf(msg + strlen(msg), " ..");
226 yaz_marc_add_comment(mt, msg);
230 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
231 const char *indicator, size_t indicator_len)
233 struct yaz_marc_node *n = yaz_marc_add_node(mt);
234 n->which = YAZ_MARC_DATAFIELD;
235 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
236 n->u.datafield.indicator =
237 nmem_strdupn(mt->nmem, indicator, indicator_len);
238 n->u.datafield.subfields = 0;
240 /* make subfield_pp the current (last one) */
241 mt->subfield_pp = &n->u.datafield.subfields;
244 char *element_name_encode(yaz_marc_t mt, WRBUF buffer, char *code_data, size_t code_len) {
245 // TODO Map special codes to something possible for XML ELEMENT names
249 for (index = 0; index < code_len; index++) {
250 if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
251 (code_data[index] >= 'a' && code_data[index] <= 'z') ||
252 (code_data[index] >= 'A' && code_data[index] <= 'Z')))
256 wrbuf_iconv_write(buffer, mt->iconv_cd, code_data, code_len);
259 char temp[2*code_len + 1];
260 wrbuf_puts(buffer, "-");
262 for (index = 0; index < code_len; index++) {
263 sprintf(temp+2*index, "%02X", (unsigned char) code_data[index] & 0xFF);
265 temp[2*code_len+1] = 0;
266 wrbuf_puts(buffer, temp);
267 yaz_log(YLOG_WARN, "Using numeric value in element name: %s", temp);
272 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
273 const char *indicator, size_t indicator_len)
275 struct yaz_marc_node *n = yaz_marc_add_node(mt);
276 n->which = YAZ_MARC_DATAFIELD;
277 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
278 n->u.datafield.indicator =
279 nmem_strdupn(mt->nmem, indicator, indicator_len);
280 n->u.datafield.subfields = 0;
282 /* make subfield_pp the current (last one) */
283 mt->subfield_pp = &n->u.datafield.subfields;
286 struct yaz_marc_node* yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value)
288 struct yaz_marc_node *n = yaz_marc_add_node(mt);
289 n->which = YAZ_MARC_DATAFIELD;
290 n->u.datafield.tag = tag_value;
291 n->u.datafield.indicator = 0;
292 n->u.datafield.subfields = 0;
294 /* make subfield_pp the current (last one) */
295 mt->subfield_pp = &n->u.datafield.subfields;
299 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
301 n->u.datafield.indicator = indicator;
306 void yaz_marc_add_subfield(yaz_marc_t mt,
307 const char *code_data, size_t code_data_len)
314 sprintf(msg, "subfield:");
315 for (i = 0; i < 16 && i < code_data_len; i++)
316 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
317 if (i < code_data_len)
318 sprintf(msg + strlen(msg), " ..");
319 yaz_marc_add_comment(mt, msg);
324 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
325 nmem_malloc(mt->nmem, sizeof(*n));
326 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
328 /* mark subfield_pp to point to this one, so we append here next */
329 *mt->subfield_pp = n;
330 mt->subfield_pp = &n->next;
334 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
335 int *indicator_length,
336 int *identifier_length,
338 int *length_data_entry,
339 int *length_starting,
340 int *length_implementation)
344 memcpy(leader, leader_c, 24);
346 if (!atoi_n_check(leader+10, 1, indicator_length))
349 "Indicator length at offset 10 should hold a digit."
352 *indicator_length = 2;
354 if (!atoi_n_check(leader+11, 1, identifier_length))
357 "Identifier length at offset 11 should hold a digit."
360 *identifier_length = 2;
362 if (!atoi_n_check(leader+12, 5, base_address))
365 "Base address at offsets 12..16 should hold a number."
369 if (!atoi_n_check(leader+20, 1, length_data_entry))
372 "Length data entry at offset 20 should hold a digit."
374 *length_data_entry = 4;
377 if (!atoi_n_check(leader+21, 1, length_starting))
380 "Length starting at offset 21 should hold a digit."
382 *length_starting = 5;
385 if (!atoi_n_check(leader+22, 1, length_implementation))
388 "Length implementation at offset 22 should hold a digit."
390 *length_implementation = 0;
396 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
397 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
398 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
399 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
400 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
401 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
403 yaz_marc_add_leader(mt, leader, 24);
406 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
408 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
409 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
412 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
414 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
415 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
418 /* try to guess how many bytes the identifier really is! */
419 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
424 for (i = 1; i<5; i++)
427 size_t outbytesleft = sizeof(outbuf);
429 const char *inp = buf;
431 size_t inbytesleft = i;
432 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
433 &outp, &outbytesleft);
434 if (r != (size_t) (-1))
435 return i; /* got a complete sequence */
437 return 1; /* giving up */
439 return 1; /* we don't know */
442 void yaz_marc_reset(yaz_marc_t mt)
444 nmem_reset(mt->nmem);
446 mt->nodes_pp = &mt->nodes;
450 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
452 struct yaz_marc_node *n;
453 int identifier_length;
454 const char *leader = 0;
456 for (n = mt->nodes; n; n = n->next)
457 if (n->which == YAZ_MARC_LEADER)
459 leader = n->u.leader;
465 if (!atoi_n_check(leader+11, 1, &identifier_length))
468 for (n = mt->nodes; n; n = n->next)
472 case YAZ_MARC_COMMENT:
473 wrbuf_iconv_write(wr, mt->iconv_cd,
474 n->u.comment, strlen(n->u.comment));
475 wrbuf_puts(wr, "\n");
484 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
485 int identifier_length)
487 /* if identifier length is 2 (most MARCs) or less (probably an error),
488 the code is a single character .. However we've
489 seen multibyte codes, so see how big it really is */
490 if (identifier_length > 2)
491 return identifier_length - 1;
493 return cdata_one_character(mt, data);
496 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
498 struct yaz_marc_node *n;
499 int identifier_length;
500 const char *leader = 0;
502 for (n = mt->nodes; n; n = n->next)
503 if (n->which == YAZ_MARC_LEADER)
505 leader = n->u.leader;
511 if (!atoi_n_check(leader+11, 1, &identifier_length))
514 for (n = mt->nodes; n; n = n->next)
516 struct yaz_marc_subfield *s;
519 case YAZ_MARC_DATAFIELD:
520 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
521 n->u.datafield.indicator);
522 for (s = n->u.datafield.subfields; s; s = s->next)
524 size_t using_code_len = get_subfield_len(mt, s->code_data,
527 wrbuf_puts (wr, mt->subfield_str);
528 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
530 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
531 wrbuf_iconv_puts(wr, mt->iconv_cd,
532 s->code_data + using_code_len);
533 marc_iconv_reset(mt, wr);
535 wrbuf_puts (wr, mt->endline_str);
537 case YAZ_MARC_CONTROLFIELD:
538 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
539 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
540 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
541 marc_iconv_reset(mt, wr);
542 wrbuf_puts (wr, mt->endline_str);
544 case YAZ_MARC_COMMENT:
546 wrbuf_iconv_write(wr, mt->iconv_cd,
547 n->u.comment, strlen(n->u.comment));
548 marc_iconv_reset(mt, wr);
549 wrbuf_puts(wr, ")\n");
551 case YAZ_MARC_LEADER:
552 wrbuf_printf(wr, "%s\n", n->u.leader);
555 wrbuf_puts(wr, "\n");
559 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
561 if (mt->enable_collection == collection_second)
563 switch(mt->output_format)
565 case YAZ_MARC_MARCXML:
566 case YAZ_MARC_TMARCXML:
567 wrbuf_printf(wr, "</collection>\n");
569 case YAZ_MARC_XCHANGE:
570 wrbuf_printf(wr, "</collection>\n");
577 void yaz_marc_enable_collection(yaz_marc_t mt)
579 mt->enable_collection = collection_first;
582 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
584 switch(mt->output_format)
587 return yaz_marc_write_line(mt, wr);
588 case YAZ_MARC_MARCXML:
589 case YAZ_MARC_TMARCXML:
590 return yaz_marc_write_marcxml(mt, wr);
591 case YAZ_MARC_XCHANGE:
592 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
593 case YAZ_MARC_ISO2709:
594 return yaz_marc_write_iso2709(mt, wr);
596 return yaz_marc_write_check(mt, wr);
601 const char *collection_name[2] = { "collection", "collection"};
602 const char *record_name[2] = { "record", "r"};
603 const char *leader_name[2] = { "leader", "l"};
604 const char *controlfield_name[2]= { "controlfield", "c"};
605 const char *datafield_name[2] = { "datafield", "d"};
606 const char *indicator_name[2] = { "ind", "i"};
607 const char *subfield_name[2] = { "subfield", "s"};
610 /** \brief common MARC XML/Xchange writer
612 \param wr WRBUF output
613 \param ns XMLNS for the elements
614 \param format record format (e.g. "MARC21")
615 \param type record type (e.g. "Bibliographic")
617 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
622 struct yaz_marc_node *n;
623 int identifier_length;
624 const char *leader = 0;
626 int turbo = yaz_marc_get_write_format(mt) == YAZ_MARC_TMARCXML;
628 for (n = mt->nodes; n; n = n->next)
629 if (n->which == YAZ_MARC_LEADER)
631 leader = n->u.leader;
637 if (!atoi_n_check(leader+11, 1, &identifier_length))
640 if (mt->enable_collection != no_collection)
642 if (mt->enable_collection == collection_first) {
643 wrbuf_printf(wr, "<%s xmlns=\"%s\">\n", collection_name[turbo], ns);
644 mt->enable_collection = collection_second;
646 wrbuf_printf(wr, "<%s", record_name[turbo]);
650 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
653 wrbuf_printf(wr, " format=\"%.80s\"", format);
655 wrbuf_printf(wr, " type=\"%.80s\"", type);
656 wrbuf_printf(wr, ">\n");
657 for (n = mt->nodes; n; n = n->next)
659 struct yaz_marc_subfield *s;
663 case YAZ_MARC_DATAFIELD:
665 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
667 wrbuf_printf(wr, " tag=\"");
668 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
669 strlen(n->u.datafield.tag));
671 wrbuf_printf(wr, "\"");
672 if (n->u.datafield.indicator)
675 for (i = 0; n->u.datafield.indicator[i]; i++)
677 wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
678 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
679 n->u.datafield.indicator+i, 1);
680 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
683 wrbuf_printf(wr, ">\n");
684 for (s = n->u.datafield.subfields; s; s = s->next)
686 size_t using_code_len = get_subfield_len(mt, s->code_data,
688 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
690 wrbuf_printf(wr, " code=\"");
691 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
692 s->code_data, using_code_len);
693 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
695 element_name_encode(mt, wr, s->code_data, using_code_len);
698 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
699 s->code_data + using_code_len,
700 strlen(s->code_data + using_code_len));
701 marc_iconv_reset(mt, wr);
702 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
704 element_name_encode(mt, wr, s->code_data, using_code_len);
705 wrbuf_puts(wr, ">\n");
707 wrbuf_printf(wr, " </%s", datafield_name[turbo]);
710 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
711 strlen(n->u.datafield.tag));
712 wrbuf_printf(wr, ">\n", datafield_name[turbo]);
714 case YAZ_MARC_CONTROLFIELD:
715 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
717 wrbuf_printf(wr, " tag=\"");
718 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
719 strlen(n->u.controlfield.tag));
720 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
723 //TODO convert special
724 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
725 strlen(n->u.controlfield.tag));
726 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
728 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
729 n->u.controlfield.data,
730 strlen(n->u.controlfield.data));
731 marc_iconv_reset(mt, wr);
732 wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
733 //TODO convert special
735 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
736 strlen(n->u.controlfield.tag));
737 wrbuf_puts(wr, ">\n");
739 case YAZ_MARC_COMMENT:
740 wrbuf_printf(wr, "<!-- ");
741 wrbuf_puts(wr, n->u.comment);
742 wrbuf_printf(wr, " -->\n");
744 case YAZ_MARC_LEADER:
745 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
746 wrbuf_iconv_write_cdata(wr,
747 0 , /* no charset conversion for leader */
748 n->u.leader, strlen(n->u.leader));
749 wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
752 wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
756 static int yaz_marc_write_marcxml_ns2(yaz_marc_t mt, WRBUF wr,
761 struct yaz_marc_node *n;
762 int identifier_length;
763 const char *leader = 0;
765 for (n = mt->nodes; n; n = n->next)
766 if (n->which == YAZ_MARC_LEADER)
768 leader = n->u.leader;
774 if (!atoi_n_check(leader+11, 1, &identifier_length))
777 if (mt->enable_collection != no_collection)
779 if (mt->enable_collection == collection_first)
780 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
781 mt->enable_collection = collection_second;
782 wrbuf_printf(wr, "<record");
786 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
789 wrbuf_printf(wr, " format=\"%.80s\"", format);
791 wrbuf_printf(wr, " type=\"%.80s\"", type);
792 wrbuf_printf(wr, ">\n");
793 for (n = mt->nodes; n; n = n->next)
795 struct yaz_marc_subfield *s;
799 case YAZ_MARC_DATAFIELD:
800 wrbuf_printf(wr, " <datafield tag=\"");
801 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
802 strlen(n->u.datafield.tag));
803 wrbuf_printf(wr, "\"");
804 if (n->u.datafield.indicator)
807 for (i = 0; n->u.datafield.indicator[i]; i++)
809 wrbuf_printf(wr, " ind%d=\"", i+1);
810 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
811 n->u.datafield.indicator+i, 1);
812 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
815 wrbuf_printf(wr, ">\n");
816 for (s = n->u.datafield.subfields; s; s = s->next)
818 size_t using_code_len = get_subfield_len(mt, s->code_data,
820 wrbuf_iconv_puts(wr, mt->iconv_cd, " <subfield code=\"");
821 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
822 s->code_data, using_code_len);
823 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
824 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
825 s->code_data + using_code_len,
826 strlen(s->code_data + using_code_len));
827 marc_iconv_reset(mt, wr);
828 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
829 wrbuf_puts(wr, "\n");
831 wrbuf_printf(wr, " </datafield>\n");
833 case YAZ_MARC_CONTROLFIELD:
834 wrbuf_printf(wr, " <controlfield tag=\"");
835 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
836 strlen(n->u.controlfield.tag));
837 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
838 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
839 n->u.controlfield.data,
840 strlen(n->u.controlfield.data));
842 marc_iconv_reset(mt, wr);
843 wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
844 wrbuf_puts(wr, "\n");
846 case YAZ_MARC_COMMENT:
847 wrbuf_printf(wr, "<!-- ");
848 wrbuf_puts(wr, n->u.comment);
849 wrbuf_printf(wr, " -->\n");
851 case YAZ_MARC_LEADER:
852 wrbuf_printf(wr, " <leader>");
853 wrbuf_iconv_write_cdata(wr,
854 0 /* no charset conversion for leader */,
855 n->u.leader, strlen(n->u.leader));
856 wrbuf_printf(wr, "</leader>\n");
859 wrbuf_puts(wr, "</record>\n");
864 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
869 if (mt->write_using_libxml2)
875 if (yaz_marc_get_write_format(mt) == YAZ_MARC_MARCXML)
876 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
877 else // Check for Turbo XML
878 ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type);
882 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
885 xmlDocSetRootElement(doc, root_ptr);
886 xmlDocDumpMemory(doc, &buf_out, &len_out);
888 wrbuf_write(wr, (const char *) buf_out, len_out);
899 return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
902 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
904 /* set leader 09 to 'a' for UNICODE */
905 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
906 if (!mt->leader_spec)
907 yaz_marc_modify_leader(mt, 9, "a");
908 char *name_space = "http://www.loc.gov/MARC21/slim";
909 if (mt->output_format == YAZ_MARC_TMARCXML)
910 name_space = "http://www.indexdata.com/MARC21/turboxml";
911 return yaz_marc_write_marcxml_ns(mt, wr, name_space,
915 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
919 return yaz_marc_write_marcxml_ns(mt, wr,
920 "info:lc/xmlns/marcxchange-v1",
926 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length)
929 struct yaz_marc_subfield *s;
930 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
932 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
933 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
936 //TODO consider if safe
939 strncpy(field + 1, n->u.datafield.tag, 3);
941 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
943 if (n->u.datafield.indicator)
946 for (i = 0; n->u.datafield.indicator[i]; i++)
951 ind_val[0] = n->u.datafield.indicator[i];
953 sprintf(ind_str, "%s%d", indicator_name[turbo], i+1);
954 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
957 WRBUF subfield_name = wrbuf_alloc();
958 for (s = n->u.datafield.subfields; s; s = s->next)
960 xmlNode *ptr_subfield;
961 size_t using_code_len = get_subfield_len(mt, s->code_data,
963 wrbuf_rewind(wr_cdata);
964 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
965 marc_iconv_reset(mt, wr_cdata);
968 ptr_subfield = xmlNewTextChild(
970 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
971 wrbuf_rewind(wr_cdata);
972 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
973 xmlNewProp(ptr_subfield, BAD_CAST "code",
974 BAD_CAST wrbuf_cstr(wr_cdata));
976 else { // Turbo format
977 wrbuf_rewind(subfield_name);
978 wrbuf_puts(subfield_name, "s");
979 element_name_encode(mt, subfield_name, s->code_data, using_code_len);
980 ptr_subfield = xmlNewTextChild(ptr, ns_record,
981 BAD_CAST wrbuf_cstr(subfield_name),
982 BAD_CAST wrbuf_cstr(wr_cdata));
985 wrbuf_destroy(subfield_name);
988 int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
993 struct yaz_marc_node *n;
994 int identifier_length;
995 const char *leader = 0;
999 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
1000 for (n = mt->nodes; n; n = n->next)
1001 if (n->which == YAZ_MARC_LEADER)
1003 leader = n->u.leader;
1009 if (!atoi_n_check(leader+11, 1, &identifier_length))
1012 wr_cdata = wrbuf_alloc();
1014 record_ptr = xmlNewNode(0, BAD_CAST "r");
1015 *root_ptr = record_ptr;
1017 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1018 xmlSetNs(record_ptr, ns_record);
1021 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1023 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1024 for (n = mt->nodes; n; n = n->next)
1026 struct yaz_marc_subfield *s;
1031 case YAZ_MARC_DATAFIELD:
1032 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
1034 case YAZ_MARC_CONTROLFIELD:
1035 wrbuf_rewind(wr_cdata);
1036 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1037 marc_iconv_reset(mt, wr_cdata);
1040 ptr = xmlNewTextChild(record_ptr, ns_record,
1041 BAD_CAST "controlfield",
1042 BAD_CAST wrbuf_cstr(wr_cdata));
1043 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1046 // TODO required iconv?
1049 strncpy(field + 1, n->u.controlfield.tag, 3);
1051 ptr = xmlNewTextChild(record_ptr, ns_record,
1053 BAD_CAST wrbuf_cstr(wr_cdata));
1057 case YAZ_MARC_COMMENT:
1058 ptr = xmlNewComment(BAD_CAST n->u.comment);
1059 xmlAddChild(record_ptr, ptr);
1061 case YAZ_MARC_LEADER:
1063 char *field = "leader";
1066 xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
1067 BAD_CAST n->u.leader);
1072 wrbuf_destroy(wr_cdata);
1077 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
1082 struct yaz_marc_node *n;
1083 int identifier_length;
1084 const char *leader = 0;
1085 xmlNode *record_ptr;
1089 for (n = mt->nodes; n; n = n->next)
1090 if (n->which == YAZ_MARC_LEADER)
1092 leader = n->u.leader;
1098 if (!atoi_n_check(leader+11, 1, &identifier_length))
1101 wr_cdata = wrbuf_alloc();
1103 record_ptr = xmlNewNode(0, BAD_CAST "record");
1104 *root_ptr = record_ptr;
1106 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1107 xmlSetNs(record_ptr, ns_record);
1110 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1112 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1113 for (n = mt->nodes; n; n = n->next)
1115 struct yaz_marc_subfield *s;
1120 case YAZ_MARC_DATAFIELD:
1121 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1122 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1123 if (n->u.datafield.indicator)
1126 for (i = 0; n->u.datafield.indicator[i]; i++)
1131 sprintf(ind_str, "ind%d", i+1);
1132 ind_val[0] = n->u.datafield.indicator[i];
1134 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1137 for (s = n->u.datafield.subfields; s; s = s->next)
1139 xmlNode *ptr_subfield;
1140 size_t using_code_len = get_subfield_len(mt, s->code_data,
1142 wrbuf_rewind(wr_cdata);
1143 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1144 s->code_data + using_code_len);
1145 marc_iconv_reset(mt, wr_cdata);
1146 ptr_subfield = xmlNewTextChild(
1148 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1150 wrbuf_rewind(wr_cdata);
1151 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1152 s->code_data, using_code_len);
1153 xmlNewProp(ptr_subfield, BAD_CAST "code",
1154 BAD_CAST wrbuf_cstr(wr_cdata));
1157 case YAZ_MARC_CONTROLFIELD:
1158 wrbuf_rewind(wr_cdata);
1159 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1160 marc_iconv_reset(mt, wr_cdata);
1162 ptr = xmlNewTextChild(record_ptr, ns_record,
1163 BAD_CAST "controlfield",
1164 BAD_CAST wrbuf_cstr(wr_cdata));
1166 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1168 case YAZ_MARC_COMMENT:
1169 ptr = xmlNewComment(BAD_CAST n->u.comment);
1170 xmlAddChild(record_ptr, ptr);
1172 case YAZ_MARC_LEADER:
1173 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1174 BAD_CAST n->u.leader);
1178 wrbuf_destroy(wr_cdata);
1187 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1189 struct yaz_marc_node *n;
1190 int indicator_length;
1191 int identifier_length;
1192 int length_data_entry;
1193 int length_starting;
1194 int length_implementation;
1195 int data_offset = 0;
1196 const char *leader = 0;
1197 WRBUF wr_dir, wr_head, wr_data_tmp;
1200 for (n = mt->nodes; n; n = n->next)
1201 if (n->which == YAZ_MARC_LEADER)
1202 leader = n->u.leader;
1206 if (!atoi_n_check(leader+10, 1, &indicator_length))
1208 if (!atoi_n_check(leader+11, 1, &identifier_length))
1210 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1212 if (!atoi_n_check(leader+21, 1, &length_starting))
1214 if (!atoi_n_check(leader+22, 1, &length_implementation))
1217 wr_data_tmp = wrbuf_alloc();
1218 wr_dir = wrbuf_alloc();
1219 for (n = mt->nodes; n; n = n->next)
1221 int data_length = 0;
1222 struct yaz_marc_subfield *s;
1226 case YAZ_MARC_DATAFIELD:
1227 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1228 data_length += indicator_length;
1229 wrbuf_rewind(wr_data_tmp);
1230 for (s = n->u.datafield.subfields; s; s = s->next)
1232 /* write dummy IDFS + content */
1233 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1234 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1235 marc_iconv_reset(mt, wr_data_tmp);
1237 /* write dummy FS (makes MARC-8 to become ASCII) */
1238 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1239 marc_iconv_reset(mt, wr_data_tmp);
1240 data_length += wrbuf_len(wr_data_tmp);
1242 case YAZ_MARC_CONTROLFIELD:
1243 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1245 wrbuf_rewind(wr_data_tmp);
1246 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1247 n->u.controlfield.data);
1248 marc_iconv_reset(mt, wr_data_tmp);
1249 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1250 marc_iconv_reset(mt, wr_data_tmp);
1251 data_length += wrbuf_len(wr_data_tmp);
1253 case YAZ_MARC_COMMENT:
1255 case YAZ_MARC_LEADER:
1260 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1261 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1262 data_offset += data_length;
1265 /* mark end of directory */
1266 wrbuf_putc(wr_dir, ISO2709_FS);
1268 /* base address of data (comes after leader+directory) */
1269 base_address = 24 + wrbuf_len(wr_dir);
1271 wr_head = wrbuf_alloc();
1273 /* write record length */
1274 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1275 /* from "original" leader */
1276 wrbuf_write(wr_head, leader+5, 7);
1277 /* base address of data */
1278 wrbuf_printf(wr_head, "%05d", base_address);
1279 /* from "original" leader */
1280 wrbuf_write(wr_head, leader+17, 7);
1282 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1283 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1284 wrbuf_destroy(wr_head);
1285 wrbuf_destroy(wr_dir);
1286 wrbuf_destroy(wr_data_tmp);
1288 for (n = mt->nodes; n; n = n->next)
1290 struct yaz_marc_subfield *s;
1294 case YAZ_MARC_DATAFIELD:
1295 wrbuf_printf(wr, "%.*s", indicator_length,
1296 n->u.datafield.indicator);
1297 for (s = n->u.datafield.subfields; s; s = s->next)
1299 wrbuf_putc(wr, ISO2709_IDFS);
1300 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1301 marc_iconv_reset(mt, wr);
1303 wrbuf_putc(wr, ISO2709_FS);
1305 case YAZ_MARC_CONTROLFIELD:
1306 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1307 marc_iconv_reset(mt, wr);
1308 wrbuf_putc(wr, ISO2709_FS);
1310 case YAZ_MARC_COMMENT:
1312 case YAZ_MARC_LEADER:
1316 wrbuf_printf(wr, "%c", ISO2709_RS);
1321 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1323 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1326 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1328 return -1; /* error */
1329 return r; /* OK, return length > 0 */
1332 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1333 const char **result, size_t *rsize)
1337 wrbuf_rewind(mt->m_wr);
1338 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1340 *result = wrbuf_cstr(mt->m_wr);
1342 *rsize = wrbuf_len(mt->m_wr);
1346 void yaz_marc_set_read_format(yaz_marc_t mt, int format)
1349 mt->input_format = format;
1352 int yaz_marc_get_read_format(yaz_marc_t mt)
1355 return mt->input_format;
1360 void yaz_marc_set_write_format(yaz_marc_t mt, int format)
1363 mt->output_format = format;
1365 // Force using libxml2
1366 if (mt->output_format == YAZ_MARC_TMARCXML)
1367 mt->write_using_libxml2 = 1;
1372 int yaz_marc_get_write_format(yaz_marc_t mt)
1375 return mt->output_format;
1381 * Deprecated, use yaz_marc_set_write_format
1383 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1385 yaz_marc_set_write_format(mt, xmlmode);
1390 void yaz_marc_debug(yaz_marc_t mt, int level)
1396 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1401 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1403 return mt->iconv_cd;
1406 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1408 struct yaz_marc_node *n;
1410 for (n = mt->nodes; n; n = n->next)
1411 if (n->which == YAZ_MARC_LEADER)
1413 leader = n->u.leader;
1414 memcpy(leader+off, str, strlen(str));
1419 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1421 xfree(mt->leader_spec);
1422 mt->leader_spec = 0;
1425 char dummy_leader[24];
1426 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1428 mt->leader_spec = xstrdup(leader_spec);
1433 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1435 const char *cp = leader_spec;
1440 int no_read = 0, no = 0;
1442 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1443 if (no < 2 || no_read < 3)
1445 if (pos < 0 || (size_t) pos >= size)
1450 const char *vp = strchr(val+1, '\'');
1456 if (len + pos > size)
1458 memcpy(leader + pos, val+1, len);
1460 else if (*val >= '0' && *val <= '9')
1476 int yaz_marc_decode_formatstr(const char *arg)
1479 if (!strcmp(arg, "marc"))
1480 mode = YAZ_MARC_ISO2709;
1481 if (!strcmp(arg, "marcxml"))
1482 mode = YAZ_MARC_MARCXML;
1483 if (!strcmp(arg, "tmarcxml"))
1484 mode = YAZ_MARC_TMARCXML;
1485 if (!strcmp(arg, "marcxchange"))
1486 mode = YAZ_MARC_XCHANGE;
1487 if (!strcmp(arg, "line"))
1488 mode = YAZ_MARC_LINE;
1492 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1494 mt->write_using_libxml2 = enable;
1497 int yaz_marc_is_turbo_format(yaz_marc_t mt)
1499 return mt->output_format == YAZ_MARC_TMARCXML;
1506 * c-file-style: "Stroustrup"
1507 * indent-tabs-mode: nil
1509 * vim: shiftwidth=4 tabstop=8 expandtab