1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
23 #include <yaz/marcdisp.h>
24 #include <yaz/wrbuf.h>
25 #include <yaz/yaz-util.h>
26 #include <yaz/nmem_xml.h>
27 #include <yaz/snprintf.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
34 enum yaz_collection_state {
40 /** \brief node types for yaz_marc_node */
41 enum YAZ_MARC_NODE_TYPE
44 YAZ_MARC_CONTROLFIELD,
49 /** \brief represets a data field */
50 struct yaz_marc_datafield {
53 struct yaz_marc_subfield *subfields;
56 /** \brief represents a control field */
57 struct yaz_marc_controlfield {
62 /** \brief a comment node */
63 struct yaz_marc_comment {
67 /** \brief MARC node */
68 struct yaz_marc_node {
69 enum YAZ_MARC_NODE_TYPE which;
71 struct yaz_marc_datafield datafield;
72 struct yaz_marc_controlfield controlfield;
76 struct yaz_marc_node *next;
79 /** \brief represents a subfield */
80 struct yaz_marc_subfield {
82 struct yaz_marc_subfield *next;
85 /** \brief the internals of a yaz_marc_t handle */
91 int write_using_libxml2;
92 enum yaz_collection_state enable_collection;
97 struct yaz_marc_node *nodes;
98 struct yaz_marc_node **nodes_pp;
99 struct yaz_marc_subfield **subfield_pp;
102 yaz_marc_t yaz_marc_create(void)
104 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
105 mt->output_format = YAZ_MARC_LINE;
107 mt->write_using_libxml2 = 0;
108 mt->enable_collection = no_collection;
109 mt->m_wr = wrbuf_alloc();
112 strcpy(mt->subfield_str, " $");
113 strcpy(mt->endline_str, "\n");
115 mt->nmem = nmem_create();
120 void yaz_marc_destroy(yaz_marc_t mt)
124 nmem_destroy(mt->nmem);
125 wrbuf_destroy(mt->m_wr);
126 xfree(mt->leader_spec);
130 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
135 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
137 wrbuf_iconv_reset(wr, mt->iconv_cd);
140 static int marc_exec_leader(const char *leader_spec, char *leader,
143 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
149 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
151 struct yaz_marc_node *n = (struct yaz_marc_node *)
152 nmem_malloc(mt->nmem, sizeof(*n));
155 mt->nodes_pp = &n->next;
160 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
161 const xmlNode *ptr_data)
163 struct yaz_marc_node *n = yaz_marc_add_node(mt);
164 n->which = YAZ_MARC_CONTROLFIELD;
165 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
166 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
169 void yaz_marc_add_controlfield_xml2(yaz_marc_t mt, char *tag,
170 const xmlNode *ptr_data)
172 struct yaz_marc_node *n = yaz_marc_add_node(mt);
173 n->which = YAZ_MARC_CONTROLFIELD;
174 n->u.controlfield.tag = tag;
175 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
181 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
183 struct yaz_marc_node *n = yaz_marc_add_node(mt);
184 n->which = YAZ_MARC_COMMENT;
185 n->u.comment = nmem_strdup(mt->nmem, comment);
188 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
194 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
195 yaz_marc_add_comment(mt, buf);
199 int yaz_marc_get_debug(yaz_marc_t mt)
204 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
206 struct yaz_marc_node *n = yaz_marc_add_node(mt);
207 n->which = YAZ_MARC_LEADER;
208 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
209 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
212 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
213 const char *data, size_t data_len)
215 struct yaz_marc_node *n = yaz_marc_add_node(mt);
216 n->which = YAZ_MARC_CONTROLFIELD;
217 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
218 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
224 sprintf(msg, "controlfield:");
225 for (i = 0; i < 16 && i < data_len; i++)
226 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
228 sprintf(msg + strlen(msg), " ..");
229 yaz_marc_add_comment(mt, msg);
233 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
234 const char *indicator, size_t indicator_len)
236 struct yaz_marc_node *n = yaz_marc_add_node(mt);
237 n->which = YAZ_MARC_DATAFIELD;
238 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
239 n->u.datafield.indicator =
240 nmem_strdupn(mt->nmem, indicator, indicator_len);
241 n->u.datafield.subfields = 0;
243 /* make subfield_pp the current (last one) */
244 mt->subfield_pp = &n->u.datafield.subfields;
247 /** \brief adds a attribute value to the element name if it is plain chars
249 If not, and if the attribute name is not null, it will append a
250 attribute element with the value if attribute name is null it will
251 return a non-zero value meaning it couldnt handle the value.
253 static int element_name_append_attribute_value(
254 yaz_marc_t mt, WRBUF buffer,
255 const char *attribute_name, char *code_data, size_t code_len)
257 /* TODO Map special codes to something possible for XML ELEMENT names */
262 for (index = 0; index < code_len; index++)
264 if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
265 (code_data[index] >= 'a' && code_data[index] <= 'z') ||
266 (code_data[index] >= 'A' && code_data[index] <= 'Z')))
269 /* Add as attribute */
270 if (encode && attribute_name)
271 wrbuf_printf(buffer, " %s=\"", attribute_name);
273 if (!encode || attribute_name)
274 wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len);
278 if (encode && attribute_name)
279 wrbuf_printf(buffer, "\""); /* return error if we couldn't handle it.*/
284 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
285 const char *indicator, size_t indicator_len)
287 struct yaz_marc_node *n = yaz_marc_add_node(mt);
288 n->which = YAZ_MARC_DATAFIELD;
289 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
290 n->u.datafield.indicator =
291 nmem_strdupn(mt->nmem, indicator, indicator_len);
292 n->u.datafield.subfields = 0;
294 /* make subfield_pp the current (last one) */
295 mt->subfield_pp = &n->u.datafield.subfields;
298 void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicators)
300 struct yaz_marc_node *n = yaz_marc_add_node(mt);
301 n->which = YAZ_MARC_DATAFIELD;
302 n->u.datafield.tag = tag_value;
303 n->u.datafield.indicator = indicators;
304 n->u.datafield.subfields = 0;
306 /* make subfield_pp the current (last one) */
307 mt->subfield_pp = &n->u.datafield.subfields;
310 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
312 n->u.datafield.indicator = indicator;
317 void yaz_marc_add_subfield(yaz_marc_t mt,
318 const char *code_data, size_t code_data_len)
325 sprintf(msg, "subfield:");
326 for (i = 0; i < 16 && i < code_data_len; i++)
327 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
328 if (i < code_data_len)
329 sprintf(msg + strlen(msg), " ..");
330 yaz_marc_add_comment(mt, msg);
335 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
336 nmem_malloc(mt->nmem, sizeof(*n));
337 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
339 /* mark subfield_pp to point to this one, so we append here next */
340 *mt->subfield_pp = n;
341 mt->subfield_pp = &n->next;
345 static void check_ascii(yaz_marc_t mt, char *leader, int offset,
348 if (leader[offset] < ' ' || leader[offset] > 127)
351 "Leader character at offset %d is non-ASCII. "
352 "Setting value to '%c'", offset, ch_default);
353 leader[offset] = ch_default;
357 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
358 int *indicator_length,
359 int *identifier_length,
361 int *length_data_entry,
362 int *length_starting,
363 int *length_implementation)
367 memcpy(leader, leader_c, 24);
369 check_ascii(mt, leader, 5, 'a');
370 check_ascii(mt, leader, 6, 'a');
371 check_ascii(mt, leader, 7, 'a');
372 check_ascii(mt, leader, 8, '#');
373 check_ascii(mt, leader, 9, '#');
374 if (!atoi_n_check(leader+10, 1, indicator_length) || *indicator_length == 0)
376 yaz_marc_cprintf(mt, "Indicator length at offset 10 should"
377 " hold a number 1-9. Assuming 2");
379 *indicator_length = 2;
381 if (!atoi_n_check(leader+11, 1, identifier_length) || *identifier_length == 0)
383 yaz_marc_cprintf(mt, "Identifier length at offset 11 should "
384 " hold a number 1-9. Assuming 2");
386 *identifier_length = 2;
388 if (!atoi_n_check(leader+12, 5, base_address))
390 yaz_marc_cprintf(mt, "Base address at offsets 12..16 should"
391 " hold a number. Assuming 0");
394 check_ascii(mt, leader, 17, '#');
395 check_ascii(mt, leader, 18, '#');
396 check_ascii(mt, leader, 19, '#');
397 if (!atoi_n_check(leader+20, 1, length_data_entry) ||
398 *length_data_entry < 3)
400 yaz_marc_cprintf(mt, "Length data entry at offset 20 should"
401 " hold a number 3-9. Assuming 4");
402 *length_data_entry = 4;
405 if (!atoi_n_check(leader+21, 1, length_starting) || *length_starting < 4)
407 yaz_marc_cprintf(mt, "Length starting at offset 21 should"
408 " hold a number 4-9. Assuming 5");
409 *length_starting = 5;
412 if (!atoi_n_check(leader+22, 1, length_implementation))
414 yaz_marc_cprintf(mt, "Length implementation at offset 22 should"
415 " hold a number. Assuming 0");
416 *length_implementation = 0;
419 check_ascii(mt, leader, 23, '0');
423 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
424 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
425 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
426 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
427 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
428 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
430 yaz_marc_add_leader(mt, leader, 24);
433 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
435 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
436 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
439 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
441 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
442 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
445 /* try to guess how many bytes the identifier really is! */
446 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
451 for (i = 1; i<5; i++)
454 size_t outbytesleft = sizeof(outbuf);
456 const char *inp = buf;
458 size_t inbytesleft = i;
459 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
460 &outp, &outbytesleft);
461 yaz_iconv(mt->iconv_cd, 0, 0, &outp, &outbytesleft);
462 if (r != (size_t) (-1))
463 return i; /* got a complete sequence */
465 return 1; /* giving up */
471 (void) yaz_read_UTF8_char((const unsigned char *) buf, strlen(buf),
473 if (error == 0 && no_read > 0)
476 return 1; /* we don't know */
479 void yaz_marc_reset(yaz_marc_t mt)
481 nmem_reset(mt->nmem);
483 mt->nodes_pp = &mt->nodes;
487 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
489 struct yaz_marc_node *n;
490 int identifier_length;
491 const char *leader = 0;
493 for (n = mt->nodes; n; n = n->next)
494 if (n->which == YAZ_MARC_LEADER)
496 leader = n->u.leader;
502 if (!atoi_n_check(leader+11, 1, &identifier_length))
505 for (n = mt->nodes; n; n = n->next)
509 case YAZ_MARC_COMMENT:
510 wrbuf_iconv_write(wr, mt->iconv_cd,
511 n->u.comment, strlen(n->u.comment));
512 wrbuf_puts(wr, "\n");
521 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
522 int identifier_length)
524 /* if identifier length is 2 (most MARCs) or less (probably an error),
525 the code is a single character .. However we've
526 seen multibyte codes, so see how big it really is */
527 if (identifier_length > 2)
528 return identifier_length - 1;
530 return cdata_one_character(mt, data);
533 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
535 struct yaz_marc_node *n;
536 int identifier_length;
537 const char *leader = 0;
539 for (n = mt->nodes; n; n = n->next)
540 if (n->which == YAZ_MARC_LEADER)
542 leader = n->u.leader;
548 if (!atoi_n_check(leader+11, 1, &identifier_length))
551 for (n = mt->nodes; n; n = n->next)
553 struct yaz_marc_subfield *s;
556 case YAZ_MARC_DATAFIELD:
557 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
558 n->u.datafield.indicator);
559 for (s = n->u.datafield.subfields; s; s = s->next)
561 size_t using_code_len = get_subfield_len(mt, s->code_data,
564 wrbuf_puts (wr, mt->subfield_str);
565 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
567 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
568 wrbuf_iconv_puts(wr, mt->iconv_cd,
569 s->code_data + using_code_len);
570 marc_iconv_reset(mt, wr);
572 wrbuf_puts (wr, mt->endline_str);
574 case YAZ_MARC_CONTROLFIELD:
575 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
576 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
577 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
578 marc_iconv_reset(mt, wr);
579 wrbuf_puts (wr, mt->endline_str);
581 case YAZ_MARC_COMMENT:
583 wrbuf_iconv_write(wr, mt->iconv_cd,
584 n->u.comment, strlen(n->u.comment));
585 marc_iconv_reset(mt, wr);
586 wrbuf_puts(wr, ")\n");
588 case YAZ_MARC_LEADER:
589 wrbuf_printf(wr, "%s\n", n->u.leader);
592 wrbuf_puts(wr, "\n");
596 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
598 if (mt->enable_collection == collection_second)
600 switch(mt->output_format)
602 case YAZ_MARC_MARCXML:
603 case YAZ_MARC_TURBOMARC:
604 wrbuf_printf(wr, "</collection>\n");
606 case YAZ_MARC_XCHANGE:
607 wrbuf_printf(wr, "</collection>\n");
614 void yaz_marc_enable_collection(yaz_marc_t mt)
616 mt->enable_collection = collection_first;
619 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
621 switch(mt->output_format)
624 return yaz_marc_write_line(mt, wr);
625 case YAZ_MARC_MARCXML:
626 return yaz_marc_write_marcxml(mt, wr);
627 case YAZ_MARC_TURBOMARC:
628 return yaz_marc_write_turbomarc(mt, wr);
629 case YAZ_MARC_XCHANGE:
630 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
631 case YAZ_MARC_ISO2709:
632 return yaz_marc_write_iso2709(mt, wr);
634 return yaz_marc_write_check(mt, wr);
636 return yaz_marc_write_json(mt, wr);
641 static const char *record_name[2] = { "record", "r"};
642 static const char *leader_name[2] = { "leader", "l"};
643 static const char *controlfield_name[2] = { "controlfield", "c"};
644 static const char *datafield_name[2] = { "datafield", "d"};
645 static const char *indicator_name[2] = { "ind", "i"};
646 static const char *subfield_name[2] = { "subfield", "s"};
648 /** \brief common MARC XML/Xchange/turbomarc writer
650 \param wr WRBUF output
651 \param ns XMLNS for the elements
652 \param format record format (e.g. "MARC21")
653 \param type record type (e.g. "Bibliographic")
654 \param turbo =1 for turbomarc
658 static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
664 struct yaz_marc_node *n;
665 int identifier_length;
666 const char *leader = 0;
668 for (n = mt->nodes; n; n = n->next)
669 if (n->which == YAZ_MARC_LEADER)
671 leader = n->u.leader;
677 if (!atoi_n_check(leader+11, 1, &identifier_length))
680 if (mt->enable_collection != no_collection)
682 if (mt->enable_collection == collection_first)
684 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
685 mt->enable_collection = collection_second;
687 wrbuf_printf(wr, "<%s", record_name[turbo]);
691 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
694 wrbuf_printf(wr, " format=\"%.80s\"", format);
696 wrbuf_printf(wr, " type=\"%.80s\"", type);
697 wrbuf_printf(wr, ">\n");
698 for (n = mt->nodes; n; n = n->next)
700 struct yaz_marc_subfield *s;
704 case YAZ_MARC_DATAFIELD:
706 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
708 wrbuf_printf(wr, " tag=\"");
709 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
710 strlen(n->u.datafield.tag));
712 wrbuf_printf(wr, "\"");
713 if (n->u.datafield.indicator)
716 for (i = 0; n->u.datafield.indicator[i]; i++)
718 wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
719 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
720 n->u.datafield.indicator+i, 1);
721 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
724 wrbuf_printf(wr, ">\n");
725 for (s = n->u.datafield.subfields; s; s = s->next)
727 size_t using_code_len = get_subfield_len(mt, s->code_data,
729 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
732 wrbuf_printf(wr, " code=\"");
733 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
734 s->code_data, using_code_len);
735 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
739 element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
742 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
743 s->code_data + using_code_len,
744 strlen(s->code_data + using_code_len));
745 marc_iconv_reset(mt, wr);
746 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
748 element_name_append_attribute_value(mt, wr, 0, s->code_data, using_code_len);
749 wrbuf_puts(wr, ">\n");
751 wrbuf_printf(wr, " </%s", datafield_name[turbo]);
754 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
755 strlen(n->u.datafield.tag));
756 wrbuf_printf(wr, ">\n");
758 case YAZ_MARC_CONTROLFIELD:
759 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
762 wrbuf_printf(wr, " tag=\"");
763 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
764 strlen(n->u.controlfield.tag));
765 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
769 /* TODO convert special */
770 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
771 strlen(n->u.controlfield.tag));
772 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
774 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
775 n->u.controlfield.data,
776 strlen(n->u.controlfield.data));
777 marc_iconv_reset(mt, wr);
778 wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
779 /* TODO convert special */
781 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
782 strlen(n->u.controlfield.tag));
783 wrbuf_puts(wr, ">\n");
785 case YAZ_MARC_COMMENT:
786 wrbuf_printf(wr, "<!-- ");
787 wrbuf_puts(wr, n->u.comment);
788 wrbuf_printf(wr, " -->\n");
790 case YAZ_MARC_LEADER:
791 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
792 wrbuf_iconv_write_cdata(wr,
793 0 , /* no charset conversion for leader */
794 n->u.leader, strlen(n->u.leader));
795 wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
798 wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
802 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
808 if (mt->write_using_libxml2)
815 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
817 ret = yaz_marc_write_xml_turbo_xml(mt, &root_ptr, ns, format, type);
821 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
824 xmlDocSetRootElement(doc, root_ptr);
825 xmlDocDumpMemory(doc, &buf_out, &len_out);
827 wrbuf_write(wr, (const char *) buf_out, len_out);
838 return yaz_marc_write_marcxml_wrbuf(mt, wr, ns, format, type, turbo);
841 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
843 /* set leader 09 to 'a' for UNICODE */
844 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
845 if (!mt->leader_spec)
846 yaz_marc_modify_leader(mt, 9, "a");
847 return yaz_marc_write_marcxml_ns(mt, wr,
848 "http://www.loc.gov/MARC21/slim",
852 int yaz_marc_write_turbomarc(yaz_marc_t mt, WRBUF wr)
854 /* set leader 09 to 'a' for UNICODE */
855 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
856 if (!mt->leader_spec)
857 yaz_marc_modify_leader(mt, 9, "a");
858 return yaz_marc_write_marcxml_ns(mt, wr,
859 "http://www.indexdata.com/turbomarc", 0, 0, 1);
862 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
866 return yaz_marc_write_marcxml_ns(mt, wr,
867 "info:lc/xmlns/marcxchange-v1",
873 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n,
875 xmlNsPtr ns_record, WRBUF wr_cdata,
876 int identifier_length)
879 struct yaz_marc_subfield *s;
880 WRBUF subfield_name = wrbuf_alloc();
882 /* TODO consider if safe */
885 strncpy(field + 1, n->u.datafield.tag, 3);
887 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
889 if (n->u.datafield.indicator)
892 for (i = 0; n->u.datafield.indicator[i]; i++)
897 ind_val[0] = n->u.datafield.indicator[i];
899 sprintf(ind_str, "%s%d", indicator_name[1], i+1);
900 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
903 for (s = n->u.datafield.subfields; s; s = s->next)
906 xmlNode *ptr_subfield;
907 size_t using_code_len = get_subfield_len(mt, s->code_data,
909 wrbuf_rewind(wr_cdata);
910 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
911 marc_iconv_reset(mt, wr_cdata);
913 wrbuf_rewind(subfield_name);
914 wrbuf_puts(subfield_name, "s");
915 not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0;
916 ptr_subfield = xmlNewTextChild(ptr, ns_record,
917 BAD_CAST wrbuf_cstr(subfield_name),
918 BAD_CAST wrbuf_cstr(wr_cdata));
921 /* Generate code attribute value and add */
922 wrbuf_rewind(wr_cdata);
923 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
924 xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata));
927 wrbuf_destroy(subfield_name);
930 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
935 struct yaz_marc_node *n;
936 int identifier_length;
937 const char *leader = 0;
942 for (n = mt->nodes; n; n = n->next)
943 if (n->which == YAZ_MARC_LEADER)
945 leader = n->u.leader;
951 if (!atoi_n_check(leader+11, 1, &identifier_length))
954 wr_cdata = wrbuf_alloc();
956 record_ptr = xmlNewNode(0, BAD_CAST "r");
957 *root_ptr = record_ptr;
959 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
960 xmlSetNs(record_ptr, ns_record);
963 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
965 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
966 for (n = mt->nodes; n; n = n->next)
976 case YAZ_MARC_DATAFIELD:
977 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
979 case YAZ_MARC_CONTROLFIELD:
980 wrbuf_rewind(wr_cdata);
981 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
982 marc_iconv_reset(mt, wr_cdata);
984 strncpy(field + 1, n->u.controlfield.tag, 3);
985 ptr = xmlNewTextChild(record_ptr, ns_record,
987 BAD_CAST wrbuf_cstr(wr_cdata));
989 case YAZ_MARC_COMMENT:
990 ptr = xmlNewComment(BAD_CAST n->u.comment);
991 xmlAddChild(record_ptr, ptr);
993 case YAZ_MARC_LEADER:
994 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "l",
995 BAD_CAST n->u.leader);
999 wrbuf_destroy(wr_cdata);
1004 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
1009 struct yaz_marc_node *n;
1010 int identifier_length;
1011 const char *leader = 0;
1012 xmlNode *record_ptr;
1016 for (n = mt->nodes; n; n = n->next)
1017 if (n->which == YAZ_MARC_LEADER)
1019 leader = n->u.leader;
1025 if (!atoi_n_check(leader+11, 1, &identifier_length))
1028 wr_cdata = wrbuf_alloc();
1030 record_ptr = xmlNewNode(0, BAD_CAST "record");
1031 *root_ptr = record_ptr;
1033 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1034 xmlSetNs(record_ptr, ns_record);
1037 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1039 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1040 for (n = mt->nodes; n; n = n->next)
1042 struct yaz_marc_subfield *s;
1047 case YAZ_MARC_DATAFIELD:
1048 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1049 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1050 if (n->u.datafield.indicator)
1053 for (i = 0; n->u.datafield.indicator[i]; i++)
1058 sprintf(ind_str, "ind%d", i+1);
1059 ind_val[0] = n->u.datafield.indicator[i];
1061 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1064 for (s = n->u.datafield.subfields; s; s = s->next)
1066 xmlNode *ptr_subfield;
1067 size_t using_code_len = get_subfield_len(mt, s->code_data,
1069 wrbuf_rewind(wr_cdata);
1070 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1071 s->code_data + using_code_len);
1072 marc_iconv_reset(mt, wr_cdata);
1073 ptr_subfield = xmlNewTextChild(
1075 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1077 wrbuf_rewind(wr_cdata);
1078 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1079 s->code_data, using_code_len);
1080 xmlNewProp(ptr_subfield, BAD_CAST "code",
1081 BAD_CAST wrbuf_cstr(wr_cdata));
1084 case YAZ_MARC_CONTROLFIELD:
1085 wrbuf_rewind(wr_cdata);
1086 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1087 marc_iconv_reset(mt, wr_cdata);
1089 ptr = xmlNewTextChild(record_ptr, ns_record,
1090 BAD_CAST "controlfield",
1091 BAD_CAST wrbuf_cstr(wr_cdata));
1093 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1095 case YAZ_MARC_COMMENT:
1096 ptr = xmlNewComment(BAD_CAST n->u.comment);
1097 xmlAddChild(record_ptr, ptr);
1099 case YAZ_MARC_LEADER:
1100 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1101 BAD_CAST n->u.leader);
1105 wrbuf_destroy(wr_cdata);
1111 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1113 struct yaz_marc_node *n;
1114 int indicator_length;
1115 int identifier_length;
1116 int length_data_entry;
1117 int length_starting;
1118 int length_implementation;
1119 int data_offset = 0;
1120 const char *leader = 0;
1121 WRBUF wr_dir, wr_head, wr_data_tmp;
1124 for (n = mt->nodes; n; n = n->next)
1125 if (n->which == YAZ_MARC_LEADER)
1126 leader = n->u.leader;
1130 if (!atoi_n_check(leader+10, 1, &indicator_length))
1132 if (!atoi_n_check(leader+11, 1, &identifier_length))
1134 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1136 if (!atoi_n_check(leader+21, 1, &length_starting))
1138 if (!atoi_n_check(leader+22, 1, &length_implementation))
1141 wr_data_tmp = wrbuf_alloc();
1142 wr_dir = wrbuf_alloc();
1143 for (n = mt->nodes; n; n = n->next)
1145 int data_length = 0;
1146 struct yaz_marc_subfield *s;
1150 case YAZ_MARC_DATAFIELD:
1151 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1152 data_length += indicator_length;
1153 wrbuf_rewind(wr_data_tmp);
1154 for (s = n->u.datafield.subfields; s; s = s->next)
1156 /* write dummy IDFS + content */
1157 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1158 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1159 marc_iconv_reset(mt, wr_data_tmp);
1161 /* write dummy FS (makes MARC-8 to become ASCII) */
1162 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1163 marc_iconv_reset(mt, wr_data_tmp);
1164 data_length += wrbuf_len(wr_data_tmp);
1166 case YAZ_MARC_CONTROLFIELD:
1167 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1169 wrbuf_rewind(wr_data_tmp);
1170 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1171 n->u.controlfield.data);
1172 marc_iconv_reset(mt, wr_data_tmp);
1173 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1174 marc_iconv_reset(mt, wr_data_tmp);
1175 data_length += wrbuf_len(wr_data_tmp);
1177 case YAZ_MARC_COMMENT:
1179 case YAZ_MARC_LEADER:
1184 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1185 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1186 data_offset += data_length;
1189 /* mark end of directory */
1190 wrbuf_putc(wr_dir, ISO2709_FS);
1192 /* base address of data (comes after leader+directory) */
1193 base_address = 24 + wrbuf_len(wr_dir);
1195 wr_head = wrbuf_alloc();
1197 /* write record length */
1198 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1199 /* from "original" leader */
1200 wrbuf_write(wr_head, leader+5, 7);
1201 /* base address of data */
1202 wrbuf_printf(wr_head, "%05d", base_address);
1203 /* from "original" leader */
1204 wrbuf_write(wr_head, leader+17, 7);
1206 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1207 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1208 wrbuf_destroy(wr_head);
1209 wrbuf_destroy(wr_dir);
1210 wrbuf_destroy(wr_data_tmp);
1212 for (n = mt->nodes; n; n = n->next)
1214 struct yaz_marc_subfield *s;
1218 case YAZ_MARC_DATAFIELD:
1219 wrbuf_write(wr, n->u.datafield.indicator, indicator_length);
1220 for (s = n->u.datafield.subfields; s; s = s->next)
1222 wrbuf_putc(wr, ISO2709_IDFS);
1223 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1224 marc_iconv_reset(mt, wr);
1226 wrbuf_putc(wr, ISO2709_FS);
1228 case YAZ_MARC_CONTROLFIELD:
1229 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1230 marc_iconv_reset(mt, wr);
1231 wrbuf_putc(wr, ISO2709_FS);
1233 case YAZ_MARC_COMMENT:
1235 case YAZ_MARC_LEADER:
1239 wrbuf_printf(wr, "%c", ISO2709_RS);
1243 int yaz_marc_write_json(yaz_marc_t mt, WRBUF w)
1245 int identifier_length;
1246 struct yaz_marc_node *n;
1247 const char *leader = 0;
1250 wrbuf_puts(w, "{\n");
1251 for (n = mt->nodes; n; n = n->next)
1252 if (n->which == YAZ_MARC_LEADER)
1253 leader = n->u.leader;
1258 if (!atoi_n_check(leader+11, 1, &identifier_length))
1261 wrbuf_puts(w, "\t\"leader\":\"");
1262 wrbuf_json_puts(w, leader);
1263 wrbuf_puts(w, "\",\n");
1264 wrbuf_puts(w, "\t\"fields\":\n\t[\n");
1266 for (n = mt->nodes; n; n = n->next)
1268 struct yaz_marc_subfield *s;
1269 const char *sep = "";
1272 case YAZ_MARC_LEADER:
1273 case YAZ_MARC_COMMENT:
1275 case YAZ_MARC_CONTROLFIELD:
1279 wrbuf_puts(w, ",\n");
1280 wrbuf_puts(w, "\t\t{\n\t\t\t\"");
1281 wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.tag);
1282 wrbuf_puts(w, "\":\"");
1283 wrbuf_iconv_json_puts(w, mt->iconv_cd, n->u.controlfield.data);
1284 wrbuf_puts(w, "\"\n\t\t}");
1286 case YAZ_MARC_DATAFIELD:
1290 wrbuf_puts(w, ",\n");
1292 wrbuf_puts(w, "\t\t{\n\t\t\t\"");
1293 wrbuf_json_puts(w, n->u.datafield.tag);
1294 wrbuf_puts(w, "\":\n\t\t\t{\n\t\t\t\t\"subfields\":\n\t\t\t\t[\n");
1295 for (s = n->u.datafield.subfields; s; s = s->next)
1297 size_t using_code_len = get_subfield_len(mt, s->code_data,
1301 wrbuf_puts(w, "\t\t\t\t\t{\n\t\t\t\t\t\t\"");
1302 wrbuf_iconv_json_write(w, mt->iconv_cd,
1303 s->code_data, using_code_len);
1304 wrbuf_puts(w, "\":\"");
1305 wrbuf_iconv_json_puts(w, mt->iconv_cd,
1306 s->code_data + using_code_len);
1307 wrbuf_puts(w, "\"\n\t\t\t\t\t}");
1309 wrbuf_puts(w, "\n\t\t\t\t]");
1310 if (n->u.datafield.indicator[0])
1313 for (i = 0; n->u.datafield.indicator[i]; i++)
1315 wrbuf_printf(w, ",\n\t\t\t\t\"ind%d\":\"%c\"", i + 1,
1316 n->u.datafield.indicator[i]);
1319 wrbuf_puts(w, "\n\t\t\t}\n");
1320 wrbuf_puts(w, "\n\t\t}");
1324 wrbuf_puts(w, "\n\t]\n");
1325 wrbuf_puts(w, "}\n");
1329 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1331 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1334 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1336 return -1; /* error */
1337 return r; /* OK, return length > 0 */
1340 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1341 const char **result, size_t *rsize)
1345 wrbuf_rewind(mt->m_wr);
1346 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1348 *result = wrbuf_cstr(mt->m_wr);
1350 *rsize = wrbuf_len(mt->m_wr);
1354 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1356 mt->output_format = xmlmode;
1359 void yaz_marc_debug(yaz_marc_t mt, int level)
1365 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1370 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1372 return mt->iconv_cd;
1375 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1377 struct yaz_marc_node *n;
1379 for (n = mt->nodes; n; n = n->next)
1380 if (n->which == YAZ_MARC_LEADER)
1382 leader = n->u.leader;
1383 memcpy(leader+off, str, strlen(str));
1388 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1390 xfree(mt->leader_spec);
1391 mt->leader_spec = 0;
1394 char dummy_leader[24];
1395 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1397 mt->leader_spec = xstrdup(leader_spec);
1402 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1404 const char *cp = leader_spec;
1409 int no_read = 0, no = 0;
1411 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1412 if (no < 2 || no_read < 3)
1414 if (pos < 0 || (size_t) pos >= size)
1419 const char *vp = strchr(val+1, '\'');
1425 if (len + pos > size)
1427 memcpy(leader + pos, val+1, len);
1429 else if (*val >= '0' && *val <= '9')
1445 int yaz_marc_decode_formatstr(const char *arg)
1448 if (!strcmp(arg, "marc"))
1449 mode = YAZ_MARC_ISO2709;
1450 if (!strcmp(arg, "marcxml"))
1451 mode = YAZ_MARC_MARCXML;
1452 if (!strcmp(arg, "turbomarc"))
1453 mode = YAZ_MARC_TURBOMARC;
1454 if (!strcmp(arg, "marcxchange"))
1455 mode = YAZ_MARC_XCHANGE;
1456 if (!strcmp(arg, "line"))
1457 mode = YAZ_MARC_LINE;
1458 if (!strcmp(arg, "json"))
1459 mode = YAZ_MARC_JSON;
1463 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1465 mt->write_using_libxml2 = enable;
1471 * c-file-style: "Stroustrup"
1472 * indent-tabs-mode: nil
1474 * vim: shiftwidth=4 tabstop=8 expandtab