1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
35 enum yaz_collection_state {
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
45 YAZ_MARC_CONTROLFIELD,
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
54 struct yaz_marc_subfield *subfields;
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
63 /** \brief a comment node */
64 struct yaz_marc_comment {
68 /** \brief MARC node */
69 struct yaz_marc_node {
70 enum YAZ_MARC_NODE_TYPE which;
72 struct yaz_marc_datafield datafield;
73 struct yaz_marc_controlfield controlfield;
77 struct yaz_marc_node *next;
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
83 struct yaz_marc_subfield *next;
86 /** \brief the internals of a yaz_marc_t handle */
93 int write_using_libxml2;
94 enum yaz_collection_state enable_collection;
99 struct yaz_marc_node *nodes;
100 struct yaz_marc_node **nodes_pp;
101 struct yaz_marc_subfield **subfield_pp;
104 yaz_marc_t yaz_marc_create(void)
106 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
107 mt->output_format = YAZ_MARC_LINE;
109 mt->write_using_libxml2 = 0;
110 mt->enable_collection = no_collection;
111 mt->m_wr = wrbuf_alloc();
114 strcpy(mt->subfield_str, " $");
115 strcpy(mt->endline_str, "\n");
117 mt->nmem = nmem_create();
122 void yaz_marc_destroy(yaz_marc_t mt)
126 nmem_destroy(mt->nmem);
127 wrbuf_destroy(mt->m_wr);
128 xfree(mt->leader_spec);
132 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
137 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
139 wrbuf_iconv_reset(wr, mt->iconv_cd);
142 static int marc_exec_leader(const char *leader_spec, char *leader,
146 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
148 struct yaz_marc_node *n = (struct yaz_marc_node *)
149 nmem_malloc(mt->nmem, sizeof(*n));
152 mt->nodes_pp = &n->next;
157 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
158 const xmlNode *ptr_data)
160 struct yaz_marc_node *n = yaz_marc_add_node(mt);
161 n->which = YAZ_MARC_CONTROLFIELD;
162 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
163 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
166 void yaz_marc_add_controlfield_turbo_xml(yaz_marc_t mt, char *tag,
167 const xmlNode *ptr_data)
169 struct yaz_marc_node *n = yaz_marc_add_node(mt);
170 n->which = YAZ_MARC_CONTROLFIELD;
171 n->u.controlfield.tag = tag;
172 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
178 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
180 struct yaz_marc_node *n = yaz_marc_add_node(mt);
181 n->which = YAZ_MARC_COMMENT;
182 n->u.comment = nmem_strdup(mt->nmem, comment);
185 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
191 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
192 yaz_marc_add_comment(mt, buf);
196 int yaz_marc_get_debug(yaz_marc_t mt)
201 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
203 struct yaz_marc_node *n = yaz_marc_add_node(mt);
204 n->which = YAZ_MARC_LEADER;
205 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
206 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
209 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
210 const char *data, size_t data_len)
212 struct yaz_marc_node *n = yaz_marc_add_node(mt);
213 n->which = YAZ_MARC_CONTROLFIELD;
214 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
215 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
221 sprintf(msg, "controlfield:");
222 for (i = 0; i < 16 && i < data_len; i++)
223 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
225 sprintf(msg + strlen(msg), " ..");
226 yaz_marc_add_comment(mt, msg);
230 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
231 const char *indicator, size_t indicator_len)
233 struct yaz_marc_node *n = yaz_marc_add_node(mt);
234 n->which = YAZ_MARC_DATAFIELD;
235 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
236 n->u.datafield.indicator =
237 nmem_strdupn(mt->nmem, indicator, indicator_len);
238 n->u.datafield.subfields = 0;
240 /* make subfield_pp the current (last one) */
241 mt->subfield_pp = &n->u.datafield.subfields;
244 char *element_name_encode(yaz_marc_t mt, WRBUF buffer, char *code_data, size_t code_len) {
245 // TODO Map special codes to something possible for XML ELEMENT names
249 for (index = 0; index < code_len; index++) {
250 if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
251 (code_data[index] >= 'a' && code_data[index] <= 'z') ||
252 (code_data[index] >= 'A' && code_data[index] <= 'Z')))
256 wrbuf_iconv_write(buffer, mt->iconv_cd, code_data, code_len);
259 char temp[2*code_len + 1];
260 wrbuf_puts(buffer, "-");
262 for (index = 0; index < code_len; index++) {
263 sprintf(temp, "%02X", (unsigned char) code_data[index] & 0xFF);
265 wrbuf_puts(buffer, temp);
267 yaz_log(YLOG_WARN, "Using numeric value in element name: %s", wrbuf_cstr(buffer));
272 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
273 const char *indicator, size_t indicator_len)
275 struct yaz_marc_node *n = yaz_marc_add_node(mt);
276 n->which = YAZ_MARC_DATAFIELD;
277 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
278 n->u.datafield.indicator =
279 nmem_strdupn(mt->nmem, indicator, indicator_len);
280 n->u.datafield.subfields = 0;
282 /* make subfield_pp the current (last one) */
283 mt->subfield_pp = &n->u.datafield.subfields;
286 struct yaz_marc_node* yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value)
288 struct yaz_marc_node *n = yaz_marc_add_node(mt);
289 n->which = YAZ_MARC_DATAFIELD;
290 n->u.datafield.tag = tag_value;
291 n->u.datafield.indicator = 0;
292 n->u.datafield.subfields = 0;
294 /* make subfield_pp the current (last one) */
295 mt->subfield_pp = &n->u.datafield.subfields;
299 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
301 n->u.datafield.indicator = indicator;
306 void yaz_marc_add_subfield(yaz_marc_t mt,
307 const char *code_data, size_t code_data_len)
314 sprintf(msg, "subfield:");
315 for (i = 0; i < 16 && i < code_data_len; i++)
316 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
317 if (i < code_data_len)
318 sprintf(msg + strlen(msg), " ..");
319 yaz_marc_add_comment(mt, msg);
324 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
325 nmem_malloc(mt->nmem, sizeof(*n));
326 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
328 /* mark subfield_pp to point to this one, so we append here next */
329 *mt->subfield_pp = n;
330 mt->subfield_pp = &n->next;
334 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
335 int *indicator_length,
336 int *identifier_length,
338 int *length_data_entry,
339 int *length_starting,
340 int *length_implementation)
344 memcpy(leader, leader_c, 24);
346 if (!atoi_n_check(leader+10, 1, indicator_length))
349 "Indicator length at offset 10 should hold a digit."
352 *indicator_length = 2;
354 if (!atoi_n_check(leader+11, 1, identifier_length))
357 "Identifier length at offset 11 should hold a digit."
360 *identifier_length = 2;
362 if (!atoi_n_check(leader+12, 5, base_address))
365 "Base address at offsets 12..16 should hold a number."
369 if (!atoi_n_check(leader+20, 1, length_data_entry))
372 "Length data entry at offset 20 should hold a digit."
374 *length_data_entry = 4;
377 if (!atoi_n_check(leader+21, 1, length_starting))
380 "Length starting at offset 21 should hold a digit."
382 *length_starting = 5;
385 if (!atoi_n_check(leader+22, 1, length_implementation))
388 "Length implementation at offset 22 should hold a digit."
390 *length_implementation = 0;
396 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
397 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
398 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
399 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
400 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
401 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
403 yaz_marc_add_leader(mt, leader, 24);
406 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
408 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
409 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
412 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
414 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
415 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
418 /* try to guess how many bytes the identifier really is! */
419 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
424 for (i = 1; i<5; i++)
427 size_t outbytesleft = sizeof(outbuf);
429 const char *inp = buf;
431 size_t inbytesleft = i;
432 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
433 &outp, &outbytesleft);
434 if (r != (size_t) (-1))
435 return i; /* got a complete sequence */
437 return 1; /* giving up */
439 return 1; /* we don't know */
442 void yaz_marc_reset(yaz_marc_t mt)
444 nmem_reset(mt->nmem);
446 mt->nodes_pp = &mt->nodes;
450 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
452 struct yaz_marc_node *n;
453 int identifier_length;
454 const char *leader = 0;
456 for (n = mt->nodes; n; n = n->next)
457 if (n->which == YAZ_MARC_LEADER)
459 leader = n->u.leader;
465 if (!atoi_n_check(leader+11, 1, &identifier_length))
468 for (n = mt->nodes; n; n = n->next)
472 case YAZ_MARC_COMMENT:
473 wrbuf_iconv_write(wr, mt->iconv_cd,
474 n->u.comment, strlen(n->u.comment));
475 wrbuf_puts(wr, "\n");
484 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
485 int identifier_length)
487 /* if identifier length is 2 (most MARCs) or less (probably an error),
488 the code is a single character .. However we've
489 seen multibyte codes, so see how big it really is */
490 if (identifier_length > 2)
491 return identifier_length - 1;
493 return cdata_one_character(mt, data);
496 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
498 struct yaz_marc_node *n;
499 int identifier_length;
500 const char *leader = 0;
502 for (n = mt->nodes; n; n = n->next)
503 if (n->which == YAZ_MARC_LEADER)
505 leader = n->u.leader;
511 if (!atoi_n_check(leader+11, 1, &identifier_length))
514 for (n = mt->nodes; n; n = n->next)
516 struct yaz_marc_subfield *s;
519 case YAZ_MARC_DATAFIELD:
520 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
521 n->u.datafield.indicator);
522 for (s = n->u.datafield.subfields; s; s = s->next)
524 size_t using_code_len = get_subfield_len(mt, s->code_data,
527 wrbuf_puts (wr, mt->subfield_str);
528 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
530 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
531 wrbuf_iconv_puts(wr, mt->iconv_cd,
532 s->code_data + using_code_len);
533 marc_iconv_reset(mt, wr);
535 wrbuf_puts (wr, mt->endline_str);
537 case YAZ_MARC_CONTROLFIELD:
538 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
539 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
540 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
541 marc_iconv_reset(mt, wr);
542 wrbuf_puts (wr, mt->endline_str);
544 case YAZ_MARC_COMMENT:
546 wrbuf_iconv_write(wr, mt->iconv_cd,
547 n->u.comment, strlen(n->u.comment));
548 marc_iconv_reset(mt, wr);
549 wrbuf_puts(wr, ")\n");
551 case YAZ_MARC_LEADER:
552 wrbuf_printf(wr, "%s\n", n->u.leader);
555 wrbuf_puts(wr, "\n");
559 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
561 if (mt->enable_collection == collection_second)
563 switch(mt->output_format)
565 case YAZ_MARC_MARCXML:
566 case YAZ_MARC_TMARCXML:
567 wrbuf_printf(wr, "</collection>\n");
569 case YAZ_MARC_XCHANGE:
570 wrbuf_printf(wr, "</collection>\n");
577 void yaz_marc_enable_collection(yaz_marc_t mt)
579 mt->enable_collection = collection_first;
582 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
584 switch(mt->output_format)
587 return yaz_marc_write_line(mt, wr);
588 case YAZ_MARC_MARCXML:
589 case YAZ_MARC_TMARCXML:
590 return yaz_marc_write_marcxml(mt, wr);
591 case YAZ_MARC_XCHANGE:
592 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
593 case YAZ_MARC_ISO2709:
594 return yaz_marc_write_iso2709(mt, wr);
596 return yaz_marc_write_check(mt, wr);
601 const char *collection_name[2] = { "collection", "collection"};
602 const char *record_name[2] = { "record", "r"};
603 const char *leader_name[2] = { "leader", "l"};
604 const char *controlfield_name[2]= { "controlfield", "c"};
605 const char *datafield_name[2] = { "datafield", "d"};
606 const char *indicator_name[2] = { "ind", "i"};
607 const char *subfield_name[2] = { "subfield", "s"};
610 /** \brief common MARC XML/Xchange writer
612 \param wr WRBUF output
613 \param ns XMLNS for the elements
614 \param format record format (e.g. "MARC21")
615 \param type record type (e.g. "Bibliographic")
617 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
622 struct yaz_marc_node *n;
623 int identifier_length;
624 const char *leader = 0;
626 int turbo = yaz_marc_get_write_format(mt) == YAZ_MARC_TMARCXML;
628 for (n = mt->nodes; n; n = n->next)
629 if (n->which == YAZ_MARC_LEADER)
631 leader = n->u.leader;
637 if (!atoi_n_check(leader+11, 1, &identifier_length))
640 if (mt->enable_collection != no_collection)
642 if (mt->enable_collection == collection_first) {
643 wrbuf_printf(wr, "<%s xmlns=\"%s\">\n", collection_name[turbo], ns);
644 mt->enable_collection = collection_second;
646 wrbuf_printf(wr, "<%s", record_name[turbo]);
650 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
653 wrbuf_printf(wr, " format=\"%.80s\"", format);
655 wrbuf_printf(wr, " type=\"%.80s\"", type);
656 wrbuf_printf(wr, ">\n");
657 for (n = mt->nodes; n; n = n->next)
659 struct yaz_marc_subfield *s;
663 case YAZ_MARC_DATAFIELD:
665 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
667 wrbuf_printf(wr, " tag=\"");
668 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
669 strlen(n->u.datafield.tag));
671 wrbuf_printf(wr, "\"");
672 if (n->u.datafield.indicator)
675 for (i = 0; n->u.datafield.indicator[i]; i++)
677 wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
678 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
679 n->u.datafield.indicator+i, 1);
680 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
683 wrbuf_printf(wr, ">\n");
684 for (s = n->u.datafield.subfields; s; s = s->next)
686 size_t using_code_len = get_subfield_len(mt, s->code_data,
688 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
690 wrbuf_printf(wr, " code=\"");
691 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
692 s->code_data, using_code_len);
693 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
695 element_name_encode(mt, wr, s->code_data, using_code_len);
698 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
699 s->code_data + using_code_len,
700 strlen(s->code_data + using_code_len));
701 marc_iconv_reset(mt, wr);
702 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
704 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
705 s->code_data, using_code_len);
706 wrbuf_puts(wr, ">\n");
708 wrbuf_printf(wr, " </%s", datafield_name[turbo]);
711 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
712 strlen(n->u.datafield.tag));
713 wrbuf_printf(wr, ">\n", datafield_name[turbo]);
715 case YAZ_MARC_CONTROLFIELD:
716 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
718 wrbuf_printf(wr, " tag=\"");
719 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
720 strlen(n->u.controlfield.tag));
721 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
724 //TODO convert special
725 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
726 strlen(n->u.controlfield.tag));
727 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
729 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
730 n->u.controlfield.data,
731 strlen(n->u.controlfield.data));
732 marc_iconv_reset(mt, wr);
733 wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
734 //TODO convert special
736 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
737 strlen(n->u.controlfield.tag));
738 wrbuf_puts(wr, ">\n");
740 case YAZ_MARC_COMMENT:
741 wrbuf_printf(wr, "<!-- ");
742 wrbuf_puts(wr, n->u.comment);
743 wrbuf_printf(wr, " -->\n");
745 case YAZ_MARC_LEADER:
746 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
747 wrbuf_iconv_write_cdata(wr,
748 0 /* no charset conversion for leader */,
749 n->u.leader, strlen(n->u.leader));
750 wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
753 wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
757 static int yaz_marc_write_marcxml_ns2(yaz_marc_t mt, WRBUF wr,
762 struct yaz_marc_node *n;
763 int identifier_length;
764 const char *leader = 0;
766 for (n = mt->nodes; n; n = n->next)
767 if (n->which == YAZ_MARC_LEADER)
769 leader = n->u.leader;
775 if (!atoi_n_check(leader+11, 1, &identifier_length))
778 if (mt->enable_collection != no_collection)
780 if (mt->enable_collection == collection_first)
781 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
782 mt->enable_collection = collection_second;
783 wrbuf_printf(wr, "<record");
787 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
790 wrbuf_printf(wr, " format=\"%.80s\"", format);
792 wrbuf_printf(wr, " type=\"%.80s\"", type);
793 wrbuf_printf(wr, ">\n");
794 for (n = mt->nodes; n; n = n->next)
796 struct yaz_marc_subfield *s;
800 case YAZ_MARC_DATAFIELD:
801 wrbuf_printf(wr, " <datafield tag=\"");
802 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
803 strlen(n->u.datafield.tag));
804 wrbuf_printf(wr, "\"");
805 if (n->u.datafield.indicator)
808 for (i = 0; n->u.datafield.indicator[i]; i++)
810 wrbuf_printf(wr, " ind%d=\"", i+1);
811 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
812 n->u.datafield.indicator+i, 1);
813 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
816 wrbuf_printf(wr, ">\n");
817 for (s = n->u.datafield.subfields; s; s = s->next)
819 size_t using_code_len = get_subfield_len(mt, s->code_data,
821 wrbuf_iconv_puts(wr, mt->iconv_cd, " <subfield code=\"");
822 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
823 s->code_data, using_code_len);
824 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
825 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
826 s->code_data + using_code_len,
827 strlen(s->code_data + using_code_len));
828 marc_iconv_reset(mt, wr);
829 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
830 wrbuf_puts(wr, "\n");
832 wrbuf_printf(wr, " </datafield>\n");
834 case YAZ_MARC_CONTROLFIELD:
835 wrbuf_printf(wr, " <controlfield tag=\"");
836 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
837 strlen(n->u.controlfield.tag));
838 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
839 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
840 n->u.controlfield.data,
841 strlen(n->u.controlfield.data));
843 marc_iconv_reset(mt, wr);
844 wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
845 wrbuf_puts(wr, "\n");
847 case YAZ_MARC_COMMENT:
848 wrbuf_printf(wr, "<!-- ");
849 wrbuf_puts(wr, n->u.comment);
850 wrbuf_printf(wr, " -->\n");
852 case YAZ_MARC_LEADER:
853 wrbuf_printf(wr, " <leader>");
854 wrbuf_iconv_write_cdata(wr,
855 0 /* no charset conversion for leader */,
856 n->u.leader, strlen(n->u.leader));
857 wrbuf_printf(wr, "</leader>\n");
860 wrbuf_puts(wr, "</record>\n");
865 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
870 if (mt->write_using_libxml2)
876 if (yaz_marc_get_write_format(mt) == YAZ_MARC_MARCXML)
877 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
878 else // Check for Turbo XML
879 ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type);
883 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
886 xmlDocSetRootElement(doc, root_ptr);
887 xmlDocDumpMemory(doc, &buf_out, &len_out);
889 wrbuf_write(wr, (const char *) buf_out, len_out);
900 return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
903 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
905 /* set leader 09 to 'a' for UNICODE */
906 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
907 if (!mt->leader_spec)
908 yaz_marc_modify_leader(mt, 9, "a");
909 char *name_space = "http://www.loc.gov/MARC21/slim";
910 if (mt->output_format == YAZ_MARC_TMARCXML)
911 name_space = "http://www.indexdata.com/MARC21/turboxml";
912 return yaz_marc_write_marcxml_ns(mt, wr, name_space,
916 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
920 return yaz_marc_write_marcxml_ns(mt, wr,
921 "info:lc/xmlns/marcxchange-v1",
927 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length)
930 struct yaz_marc_subfield *s;
931 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
933 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
934 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
937 //TODO consider if safe
940 strncpy(field + 1, n->u.datafield.tag, 3);
942 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
944 if (n->u.datafield.indicator)
947 for (i = 0; n->u.datafield.indicator[i]; i++)
952 ind_val[0] = n->u.datafield.indicator[i];
954 sprintf(ind_str, "%s%d", indicator_name[turbo], i+1);
955 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
958 WRBUF subfield_name = wrbuf_alloc();
959 for (s = n->u.datafield.subfields; s; s = s->next)
961 xmlNode *ptr_subfield;
962 size_t using_code_len = get_subfield_len(mt, s->code_data,
964 wrbuf_rewind(wr_cdata);
965 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
966 marc_iconv_reset(mt, wr_cdata);
969 ptr_subfield = xmlNewTextChild(
971 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
972 wrbuf_rewind(wr_cdata);
973 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
974 xmlNewProp(ptr_subfield, BAD_CAST "code",
975 BAD_CAST wrbuf_cstr(wr_cdata));
977 else { // Turbo format
978 wrbuf_rewind(subfield_name);
979 wrbuf_puts(subfield_name, "s");
980 element_name_encode(mt, subfield_name, s->code_data, using_code_len);
981 ptr_subfield = xmlNewTextChild(ptr, ns_record,
982 BAD_CAST wrbuf_cstr(subfield_name),
983 BAD_CAST wrbuf_cstr(wr_cdata));
986 wrbuf_destroy(subfield_name);
989 int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
994 struct yaz_marc_node *n;
995 int identifier_length;
996 const char *leader = 0;
1000 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
1001 for (n = mt->nodes; n; n = n->next)
1002 if (n->which == YAZ_MARC_LEADER)
1004 leader = n->u.leader;
1010 if (!atoi_n_check(leader+11, 1, &identifier_length))
1013 wr_cdata = wrbuf_alloc();
1015 record_ptr = xmlNewNode(0, BAD_CAST "r");
1016 *root_ptr = record_ptr;
1018 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1019 xmlSetNs(record_ptr, ns_record);
1022 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1024 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1025 for (n = mt->nodes; n; n = n->next)
1027 struct yaz_marc_subfield *s;
1032 case YAZ_MARC_DATAFIELD:
1033 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
1035 case YAZ_MARC_CONTROLFIELD:
1036 wrbuf_rewind(wr_cdata);
1037 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1038 marc_iconv_reset(mt, wr_cdata);
1041 ptr = xmlNewTextChild(record_ptr, ns_record,
1042 BAD_CAST "controlfield",
1043 BAD_CAST wrbuf_cstr(wr_cdata));
1044 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1047 // TODO required iconv?
1050 strncpy(field + 1, n->u.controlfield.tag, 3);
1052 ptr = xmlNewTextChild(record_ptr, ns_record,
1054 BAD_CAST wrbuf_cstr(wr_cdata));
1058 case YAZ_MARC_COMMENT:
1059 ptr = xmlNewComment(BAD_CAST n->u.comment);
1060 xmlAddChild(record_ptr, ptr);
1062 case YAZ_MARC_LEADER:
1064 char *field = "leader";
1067 xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
1068 BAD_CAST n->u.leader);
1073 wrbuf_destroy(wr_cdata);
1078 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
1083 struct yaz_marc_node *n;
1084 int identifier_length;
1085 const char *leader = 0;
1086 xmlNode *record_ptr;
1090 for (n = mt->nodes; n; n = n->next)
1091 if (n->which == YAZ_MARC_LEADER)
1093 leader = n->u.leader;
1099 if (!atoi_n_check(leader+11, 1, &identifier_length))
1102 wr_cdata = wrbuf_alloc();
1104 record_ptr = xmlNewNode(0, BAD_CAST "record");
1105 *root_ptr = record_ptr;
1107 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1108 xmlSetNs(record_ptr, ns_record);
1111 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1113 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1114 for (n = mt->nodes; n; n = n->next)
1116 struct yaz_marc_subfield *s;
1121 case YAZ_MARC_DATAFIELD:
1122 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1123 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1124 if (n->u.datafield.indicator)
1127 for (i = 0; n->u.datafield.indicator[i]; i++)
1132 sprintf(ind_str, "ind%d", i+1);
1133 ind_val[0] = n->u.datafield.indicator[i];
1135 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1138 for (s = n->u.datafield.subfields; s; s = s->next)
1140 xmlNode *ptr_subfield;
1141 size_t using_code_len = get_subfield_len(mt, s->code_data,
1143 wrbuf_rewind(wr_cdata);
1144 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1145 s->code_data + using_code_len);
1146 marc_iconv_reset(mt, wr_cdata);
1147 ptr_subfield = xmlNewTextChild(
1149 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1151 wrbuf_rewind(wr_cdata);
1152 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1153 s->code_data, using_code_len);
1154 xmlNewProp(ptr_subfield, BAD_CAST "code",
1155 BAD_CAST wrbuf_cstr(wr_cdata));
1158 case YAZ_MARC_CONTROLFIELD:
1159 wrbuf_rewind(wr_cdata);
1160 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1161 marc_iconv_reset(mt, wr_cdata);
1163 ptr = xmlNewTextChild(record_ptr, ns_record,
1164 BAD_CAST "controlfield",
1165 BAD_CAST wrbuf_cstr(wr_cdata));
1167 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1169 case YAZ_MARC_COMMENT:
1170 ptr = xmlNewComment(BAD_CAST n->u.comment);
1171 xmlAddChild(record_ptr, ptr);
1173 case YAZ_MARC_LEADER:
1174 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1175 BAD_CAST n->u.leader);
1179 wrbuf_destroy(wr_cdata);
1188 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1190 struct yaz_marc_node *n;
1191 int indicator_length;
1192 int identifier_length;
1193 int length_data_entry;
1194 int length_starting;
1195 int length_implementation;
1196 int data_offset = 0;
1197 const char *leader = 0;
1198 WRBUF wr_dir, wr_head, wr_data_tmp;
1201 for (n = mt->nodes; n; n = n->next)
1202 if (n->which == YAZ_MARC_LEADER)
1203 leader = n->u.leader;
1207 if (!atoi_n_check(leader+10, 1, &indicator_length))
1209 if (!atoi_n_check(leader+11, 1, &identifier_length))
1211 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1213 if (!atoi_n_check(leader+21, 1, &length_starting))
1215 if (!atoi_n_check(leader+22, 1, &length_implementation))
1218 wr_data_tmp = wrbuf_alloc();
1219 wr_dir = wrbuf_alloc();
1220 for (n = mt->nodes; n; n = n->next)
1222 int data_length = 0;
1223 struct yaz_marc_subfield *s;
1227 case YAZ_MARC_DATAFIELD:
1228 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1229 data_length += indicator_length;
1230 wrbuf_rewind(wr_data_tmp);
1231 for (s = n->u.datafield.subfields; s; s = s->next)
1233 /* write dummy IDFS + content */
1234 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1235 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1236 marc_iconv_reset(mt, wr_data_tmp);
1238 /* write dummy FS (makes MARC-8 to become ASCII) */
1239 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1240 marc_iconv_reset(mt, wr_data_tmp);
1241 data_length += wrbuf_len(wr_data_tmp);
1243 case YAZ_MARC_CONTROLFIELD:
1244 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1246 wrbuf_rewind(wr_data_tmp);
1247 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1248 n->u.controlfield.data);
1249 marc_iconv_reset(mt, wr_data_tmp);
1250 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1251 marc_iconv_reset(mt, wr_data_tmp);
1252 data_length += wrbuf_len(wr_data_tmp);
1254 case YAZ_MARC_COMMENT:
1256 case YAZ_MARC_LEADER:
1261 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1262 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1263 data_offset += data_length;
1266 /* mark end of directory */
1267 wrbuf_putc(wr_dir, ISO2709_FS);
1269 /* base address of data (comes after leader+directory) */
1270 base_address = 24 + wrbuf_len(wr_dir);
1272 wr_head = wrbuf_alloc();
1274 /* write record length */
1275 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1276 /* from "original" leader */
1277 wrbuf_write(wr_head, leader+5, 7);
1278 /* base address of data */
1279 wrbuf_printf(wr_head, "%05d", base_address);
1280 /* from "original" leader */
1281 wrbuf_write(wr_head, leader+17, 7);
1283 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1284 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1285 wrbuf_destroy(wr_head);
1286 wrbuf_destroy(wr_dir);
1287 wrbuf_destroy(wr_data_tmp);
1289 for (n = mt->nodes; n; n = n->next)
1291 struct yaz_marc_subfield *s;
1295 case YAZ_MARC_DATAFIELD:
1296 wrbuf_printf(wr, "%.*s", indicator_length,
1297 n->u.datafield.indicator);
1298 for (s = n->u.datafield.subfields; s; s = s->next)
1300 wrbuf_putc(wr, ISO2709_IDFS);
1301 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1302 marc_iconv_reset(mt, wr);
1304 wrbuf_putc(wr, ISO2709_FS);
1306 case YAZ_MARC_CONTROLFIELD:
1307 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1308 marc_iconv_reset(mt, wr);
1309 wrbuf_putc(wr, ISO2709_FS);
1311 case YAZ_MARC_COMMENT:
1313 case YAZ_MARC_LEADER:
1317 wrbuf_printf(wr, "%c", ISO2709_RS);
1322 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1324 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1327 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1329 return -1; /* error */
1330 return r; /* OK, return length > 0 */
1333 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1334 const char **result, size_t *rsize)
1338 wrbuf_rewind(mt->m_wr);
1339 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1341 *result = wrbuf_cstr(mt->m_wr);
1343 *rsize = wrbuf_len(mt->m_wr);
1347 void yaz_marc_set_read_format(yaz_marc_t mt, int format)
1350 mt->input_format = format;
1353 int yaz_marc_get_read_format(yaz_marc_t mt)
1356 return mt->input_format;
1361 void yaz_marc_set_write_format(yaz_marc_t mt, int format)
1364 mt->output_format = format;
1366 // Force using libxml2
1367 if (mt->output_format == YAZ_MARC_TMARCXML)
1368 mt->write_using_libxml2 = 1;
1373 int yaz_marc_get_write_format(yaz_marc_t mt)
1376 return mt->output_format;
1382 * Deprecated, use yaz_marc_set_write_format
1384 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1386 yaz_marc_set_write_format(mt, xmlmode);
1391 void yaz_marc_debug(yaz_marc_t mt, int level)
1397 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1402 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1404 return mt->iconv_cd;
1407 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1409 struct yaz_marc_node *n;
1411 for (n = mt->nodes; n; n = n->next)
1412 if (n->which == YAZ_MARC_LEADER)
1414 leader = n->u.leader;
1415 memcpy(leader+off, str, strlen(str));
1420 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1422 xfree(mt->leader_spec);
1423 mt->leader_spec = 0;
1426 char dummy_leader[24];
1427 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1429 mt->leader_spec = xstrdup(leader_spec);
1434 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1436 const char *cp = leader_spec;
1441 int no_read = 0, no = 0;
1443 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1444 if (no < 2 || no_read < 3)
1446 if (pos < 0 || (size_t) pos >= size)
1451 const char *vp = strchr(val+1, '\'');
1457 if (len + pos > size)
1459 memcpy(leader + pos, val+1, len);
1461 else if (*val >= '0' && *val <= '9')
1477 int yaz_marc_decode_formatstr(const char *arg)
1480 if (!strcmp(arg, "marc"))
1481 mode = YAZ_MARC_ISO2709;
1482 if (!strcmp(arg, "marcxml"))
1483 mode = YAZ_MARC_MARCXML;
1484 if (!strcmp(arg, "tmarcxml"))
1485 mode = YAZ_MARC_TMARCXML;
1486 if (!strcmp(arg, "marcxchange"))
1487 mode = YAZ_MARC_XCHANGE;
1488 if (!strcmp(arg, "line"))
1489 mode = YAZ_MARC_LINE;
1493 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1495 mt->write_using_libxml2 = enable;
1498 int yaz_marc_is_turbo_format(yaz_marc_t mt)
1500 return mt->output_format == YAZ_MARC_TMARCXML;
1507 * c-file-style: "Stroustrup"
1508 * indent-tabs-mode: nil
1510 * vim: shiftwidth=4 tabstop=8 expandtab