1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2013 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
23 #include <yaz/marcdisp.h>
24 #include <yaz/wrbuf.h>
25 #include <yaz/yaz-util.h>
26 #include <yaz/nmem_xml.h>
27 #include <yaz/snprintf.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
34 enum yaz_collection_state {
40 /** \brief node types for yaz_marc_node */
41 enum YAZ_MARC_NODE_TYPE
44 YAZ_MARC_CONTROLFIELD,
49 /** \brief represets a data field */
50 struct yaz_marc_datafield {
53 struct yaz_marc_subfield *subfields;
56 /** \brief represents a control field */
57 struct yaz_marc_controlfield {
62 /** \brief a comment node */
63 struct yaz_marc_comment {
67 /** \brief MARC node */
68 struct yaz_marc_node {
69 enum YAZ_MARC_NODE_TYPE which;
71 struct yaz_marc_datafield datafield;
72 struct yaz_marc_controlfield controlfield;
76 struct yaz_marc_node *next;
79 /** \brief represents a subfield */
80 struct yaz_marc_subfield {
82 struct yaz_marc_subfield *next;
85 /** \brief the internals of a yaz_marc_t handle */
91 int write_using_libxml2;
92 enum yaz_collection_state enable_collection;
97 struct yaz_marc_node *nodes;
98 struct yaz_marc_node **nodes_pp;
99 struct yaz_marc_subfield **subfield_pp;
102 yaz_marc_t yaz_marc_create(void)
104 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
105 mt->output_format = YAZ_MARC_LINE;
107 mt->write_using_libxml2 = 0;
108 mt->enable_collection = no_collection;
109 mt->m_wr = wrbuf_alloc();
112 strcpy(mt->subfield_str, " $");
113 strcpy(mt->endline_str, "\n");
115 mt->nmem = nmem_create();
120 void yaz_marc_destroy(yaz_marc_t mt)
124 nmem_destroy(mt->nmem);
125 wrbuf_destroy(mt->m_wr);
126 xfree(mt->leader_spec);
130 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
135 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
137 wrbuf_iconv_reset(wr, mt->iconv_cd);
140 static int marc_exec_leader(const char *leader_spec, char *leader,
143 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
149 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
151 struct yaz_marc_node *n = (struct yaz_marc_node *)
152 nmem_malloc(mt->nmem, sizeof(*n));
155 mt->nodes_pp = &n->next;
160 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
161 const xmlNode *ptr_data)
163 struct yaz_marc_node *n = yaz_marc_add_node(mt);
164 n->which = YAZ_MARC_CONTROLFIELD;
165 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
166 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
169 void yaz_marc_add_controlfield_xml2(yaz_marc_t mt, char *tag,
170 const xmlNode *ptr_data)
172 struct yaz_marc_node *n = yaz_marc_add_node(mt);
173 n->which = YAZ_MARC_CONTROLFIELD;
174 n->u.controlfield.tag = tag;
175 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
181 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
183 struct yaz_marc_node *n = yaz_marc_add_node(mt);
184 n->which = YAZ_MARC_COMMENT;
185 n->u.comment = nmem_strdup(mt->nmem, comment);
188 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
194 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
195 yaz_marc_add_comment(mt, buf);
199 int yaz_marc_get_debug(yaz_marc_t mt)
204 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
206 struct yaz_marc_node *n = yaz_marc_add_node(mt);
207 n->which = YAZ_MARC_LEADER;
208 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
209 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
212 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
213 const char *data, size_t data_len)
215 struct yaz_marc_node *n = yaz_marc_add_node(mt);
216 n->which = YAZ_MARC_CONTROLFIELD;
217 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
218 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
224 sprintf(msg, "controlfield:");
225 for (i = 0; i < 16 && i < data_len; i++)
226 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
228 sprintf(msg + strlen(msg), " ..");
229 yaz_marc_add_comment(mt, msg);
233 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
234 const char *indicator, size_t indicator_len)
236 struct yaz_marc_node *n = yaz_marc_add_node(mt);
237 n->which = YAZ_MARC_DATAFIELD;
238 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
239 n->u.datafield.indicator =
240 nmem_strdupn(mt->nmem, indicator, indicator_len);
241 n->u.datafield.subfields = 0;
243 /* make subfield_pp the current (last one) */
244 mt->subfield_pp = &n->u.datafield.subfields;
247 /** \brief adds a attribute value to the element name if it is plain chars
249 If not, and if the attribute name is not null, it will append a
250 attribute element with the value if attribute name is null it will
251 return a non-zero value meaning it couldnt handle the value.
253 static int element_name_append_attribute_value(
254 yaz_marc_t mt, WRBUF buffer,
255 const char *attribute_name, char *code_data, size_t code_len)
257 /* TODO Map special codes to something possible for XML ELEMENT names */
262 for (index = 0; index < code_len; index++)
264 if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
265 (code_data[index] >= 'a' && code_data[index] <= 'z') ||
266 (code_data[index] >= 'A' && code_data[index] <= 'Z')))
269 /* Add as attribute */
270 if (encode && attribute_name)
271 wrbuf_printf(buffer, " %s=\"", attribute_name);
273 if (!encode || attribute_name)
274 wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len);
278 if (encode && attribute_name)
279 wrbuf_printf(buffer, "\""); /* return error if we couldn't handle it.*/
284 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
285 const char *indicator, size_t indicator_len)
287 struct yaz_marc_node *n = yaz_marc_add_node(mt);
288 n->which = YAZ_MARC_DATAFIELD;
289 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
290 n->u.datafield.indicator =
291 nmem_strdupn(mt->nmem, indicator, indicator_len);
292 n->u.datafield.subfields = 0;
294 /* make subfield_pp the current (last one) */
295 mt->subfield_pp = &n->u.datafield.subfields;
298 void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicators)
300 struct yaz_marc_node *n = yaz_marc_add_node(mt);
301 n->which = YAZ_MARC_DATAFIELD;
302 n->u.datafield.tag = tag_value;
303 n->u.datafield.indicator = indicators;
304 n->u.datafield.subfields = 0;
306 /* make subfield_pp the current (last one) */
307 mt->subfield_pp = &n->u.datafield.subfields;
310 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
312 n->u.datafield.indicator = indicator;
317 void yaz_marc_add_subfield(yaz_marc_t mt,
318 const char *code_data, size_t code_data_len)
325 sprintf(msg, "subfield:");
326 for (i = 0; i < 16 && i < code_data_len; i++)
327 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
328 if (i < code_data_len)
329 sprintf(msg + strlen(msg), " ..");
330 yaz_marc_add_comment(mt, msg);
335 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
336 nmem_malloc(mt->nmem, sizeof(*n));
337 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
339 /* mark subfield_pp to point to this one, so we append here next */
340 *mt->subfield_pp = n;
341 mt->subfield_pp = &n->next;
345 static void check_ascii(yaz_marc_t mt, char *leader, int offset,
348 if (leader[offset] < ' ' || leader[offset] > 127)
351 "Leader character at offset %d is non-ASCII. "
352 "Setting value to '%c'", offset, ch_default);
353 leader[offset] = ch_default;
357 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
358 int *indicator_length,
359 int *identifier_length,
361 int *length_data_entry,
362 int *length_starting,
363 int *length_implementation)
367 memcpy(leader, leader_c, 24);
369 check_ascii(mt, leader, 5, 'a');
370 check_ascii(mt, leader, 6, 'a');
371 check_ascii(mt, leader, 7, 'a');
372 check_ascii(mt, leader, 8, '#');
373 check_ascii(mt, leader, 9, '#');
374 if (!atoi_n_check(leader+10, 1, indicator_length))
377 "Indicator length at offset 10 should hold a digit."
380 *indicator_length = 2;
382 if (!atoi_n_check(leader+11, 1, identifier_length))
385 "Identifier length at offset 11 should hold a digit."
388 *identifier_length = 2;
390 if (!atoi_n_check(leader+12, 5, base_address))
393 "Base address at offsets 12..16 should hold a number."
397 check_ascii(mt, leader, 17, '#');
398 check_ascii(mt, leader, 18, '#');
399 check_ascii(mt, leader, 19, '#');
400 if (!atoi_n_check(leader+20, 1, length_data_entry))
403 "Length data entry at offset 20 should hold a digit."
405 *length_data_entry = 4;
408 if (!atoi_n_check(leader+21, 1, length_starting))
411 "Length starting at offset 21 should hold a digit."
413 *length_starting = 5;
416 if (!atoi_n_check(leader+22, 1, length_implementation))
419 "Length implementation at offset 22 should hold a digit."
421 *length_implementation = 0;
424 check_ascii(mt, leader, 23, '0');
428 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
429 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
430 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
431 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
432 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
433 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
435 yaz_marc_add_leader(mt, leader, 24);
438 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
440 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
441 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
444 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
446 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
447 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
450 /* try to guess how many bytes the identifier really is! */
451 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
456 for (i = 1; i<5; i++)
459 size_t outbytesleft = sizeof(outbuf);
461 const char *inp = buf;
463 size_t inbytesleft = i;
464 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
465 &outp, &outbytesleft);
466 if (r != (size_t) (-1))
467 return i; /* got a complete sequence */
469 return 1; /* giving up */
471 return 1; /* we don't know */
474 void yaz_marc_reset(yaz_marc_t mt)
476 nmem_reset(mt->nmem);
478 mt->nodes_pp = &mt->nodes;
482 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
484 struct yaz_marc_node *n;
485 int identifier_length;
486 const char *leader = 0;
488 for (n = mt->nodes; n; n = n->next)
489 if (n->which == YAZ_MARC_LEADER)
491 leader = n->u.leader;
497 if (!atoi_n_check(leader+11, 1, &identifier_length))
500 for (n = mt->nodes; n; n = n->next)
504 case YAZ_MARC_COMMENT:
505 wrbuf_iconv_write(wr, mt->iconv_cd,
506 n->u.comment, strlen(n->u.comment));
507 wrbuf_puts(wr, "\n");
516 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
517 int identifier_length)
519 /* if identifier length is 2 (most MARCs) or less (probably an error),
520 the code is a single character .. However we've
521 seen multibyte codes, so see how big it really is */
522 if (identifier_length > 2)
523 return identifier_length - 1;
525 return cdata_one_character(mt, data);
528 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
530 struct yaz_marc_node *n;
531 int identifier_length;
532 const char *leader = 0;
534 for (n = mt->nodes; n; n = n->next)
535 if (n->which == YAZ_MARC_LEADER)
537 leader = n->u.leader;
543 if (!atoi_n_check(leader+11, 1, &identifier_length))
546 for (n = mt->nodes; n; n = n->next)
548 struct yaz_marc_subfield *s;
551 case YAZ_MARC_DATAFIELD:
552 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
553 n->u.datafield.indicator);
554 for (s = n->u.datafield.subfields; s; s = s->next)
556 size_t using_code_len = get_subfield_len(mt, s->code_data,
559 wrbuf_puts (wr, mt->subfield_str);
560 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
562 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
563 wrbuf_iconv_puts(wr, mt->iconv_cd,
564 s->code_data + using_code_len);
565 marc_iconv_reset(mt, wr);
567 wrbuf_puts (wr, mt->endline_str);
569 case YAZ_MARC_CONTROLFIELD:
570 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
571 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
572 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
573 marc_iconv_reset(mt, wr);
574 wrbuf_puts (wr, mt->endline_str);
576 case YAZ_MARC_COMMENT:
578 wrbuf_iconv_write(wr, mt->iconv_cd,
579 n->u.comment, strlen(n->u.comment));
580 marc_iconv_reset(mt, wr);
581 wrbuf_puts(wr, ")\n");
583 case YAZ_MARC_LEADER:
584 wrbuf_printf(wr, "%s\n", n->u.leader);
587 wrbuf_puts(wr, "\n");
591 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
593 if (mt->enable_collection == collection_second)
595 switch(mt->output_format)
597 case YAZ_MARC_MARCXML:
598 case YAZ_MARC_TURBOMARC:
599 wrbuf_printf(wr, "</collection>\n");
601 case YAZ_MARC_XCHANGE:
602 wrbuf_printf(wr, "</collection>\n");
609 void yaz_marc_enable_collection(yaz_marc_t mt)
611 mt->enable_collection = collection_first;
614 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
616 switch(mt->output_format)
619 return yaz_marc_write_line(mt, wr);
620 case YAZ_MARC_MARCXML:
621 return yaz_marc_write_marcxml(mt, wr);
622 case YAZ_MARC_TURBOMARC:
623 return yaz_marc_write_turbomarc(mt, wr);
624 case YAZ_MARC_XCHANGE:
625 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
626 case YAZ_MARC_ISO2709:
627 return yaz_marc_write_iso2709(mt, wr);
629 return yaz_marc_write_check(mt, wr);
634 static const char *record_name[2] = { "record", "r"};
635 static const char *leader_name[2] = { "leader", "l"};
636 static const char *controlfield_name[2] = { "controlfield", "c"};
637 static const char *datafield_name[2] = { "datafield", "d"};
638 static const char *indicator_name[2] = { "ind", "i"};
639 static const char *subfield_name[2] = { "subfield", "s"};
641 /** \brief common MARC XML/Xchange/turbomarc writer
643 \param wr WRBUF output
644 \param ns XMLNS for the elements
645 \param format record format (e.g. "MARC21")
646 \param type record type (e.g. "Bibliographic")
647 \param turbo =1 for turbomarc
651 static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
657 struct yaz_marc_node *n;
658 int identifier_length;
659 const char *leader = 0;
661 for (n = mt->nodes; n; n = n->next)
662 if (n->which == YAZ_MARC_LEADER)
664 leader = n->u.leader;
670 if (!atoi_n_check(leader+11, 1, &identifier_length))
673 if (mt->enable_collection != no_collection)
675 if (mt->enable_collection == collection_first)
677 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
678 mt->enable_collection = collection_second;
680 wrbuf_printf(wr, "<%s", record_name[turbo]);
684 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
687 wrbuf_printf(wr, " format=\"%.80s\"", format);
689 wrbuf_printf(wr, " type=\"%.80s\"", type);
690 wrbuf_printf(wr, ">\n");
691 for (n = mt->nodes; n; n = n->next)
693 struct yaz_marc_subfield *s;
697 case YAZ_MARC_DATAFIELD:
699 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
701 wrbuf_printf(wr, " tag=\"");
702 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
703 strlen(n->u.datafield.tag));
705 wrbuf_printf(wr, "\"");
706 if (n->u.datafield.indicator)
709 for (i = 0; n->u.datafield.indicator[i]; i++)
711 wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
712 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
713 n->u.datafield.indicator+i, 1);
714 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
717 wrbuf_printf(wr, ">\n");
718 for (s = n->u.datafield.subfields; s; s = s->next)
720 size_t using_code_len = get_subfield_len(mt, s->code_data,
722 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
725 wrbuf_printf(wr, " code=\"");
726 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
727 s->code_data, using_code_len);
728 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
732 element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
735 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
736 s->code_data + using_code_len,
737 strlen(s->code_data + using_code_len));
738 marc_iconv_reset(mt, wr);
739 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
741 element_name_append_attribute_value(mt, wr, 0, s->code_data, using_code_len);
742 wrbuf_puts(wr, ">\n");
744 wrbuf_printf(wr, " </%s", datafield_name[turbo]);
747 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
748 strlen(n->u.datafield.tag));
749 wrbuf_printf(wr, ">\n");
751 case YAZ_MARC_CONTROLFIELD:
752 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
755 wrbuf_printf(wr, " tag=\"");
756 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
757 strlen(n->u.controlfield.tag));
758 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
762 /* TODO convert special */
763 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
764 strlen(n->u.controlfield.tag));
765 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
767 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
768 n->u.controlfield.data,
769 strlen(n->u.controlfield.data));
770 marc_iconv_reset(mt, wr);
771 wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
772 /* TODO convert special */
774 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
775 strlen(n->u.controlfield.tag));
776 wrbuf_puts(wr, ">\n");
778 case YAZ_MARC_COMMENT:
779 wrbuf_printf(wr, "<!-- ");
780 wrbuf_puts(wr, n->u.comment);
781 wrbuf_printf(wr, " -->\n");
783 case YAZ_MARC_LEADER:
784 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
785 wrbuf_iconv_write_cdata(wr,
786 0 , /* no charset conversion for leader */
787 n->u.leader, strlen(n->u.leader));
788 wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
791 wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
795 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
801 if (mt->write_using_libxml2)
808 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
810 ret = yaz_marc_write_xml_turbo_xml(mt, &root_ptr, ns, format, type);
814 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
817 xmlDocSetRootElement(doc, root_ptr);
818 xmlDocDumpMemory(doc, &buf_out, &len_out);
820 wrbuf_write(wr, (const char *) buf_out, len_out);
831 return yaz_marc_write_marcxml_wrbuf(mt, wr, ns, format, type, turbo);
834 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
836 /* set leader 09 to 'a' for UNICODE */
837 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
838 if (!mt->leader_spec)
839 yaz_marc_modify_leader(mt, 9, "a");
840 return yaz_marc_write_marcxml_ns(mt, wr,
841 "http://www.loc.gov/MARC21/slim",
845 int yaz_marc_write_turbomarc(yaz_marc_t mt, WRBUF wr)
847 /* set leader 09 to 'a' for UNICODE */
848 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
849 if (!mt->leader_spec)
850 yaz_marc_modify_leader(mt, 9, "a");
851 return yaz_marc_write_marcxml_ns(mt, wr,
852 "http://www.indexdata.com/turbomarc", 0, 0, 1);
855 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
859 return yaz_marc_write_marcxml_ns(mt, wr,
860 "info:lc/xmlns/marcxchange-v1",
866 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n,
868 xmlNsPtr ns_record, WRBUF wr_cdata,
869 int identifier_length)
872 struct yaz_marc_subfield *s;
873 WRBUF subfield_name = wrbuf_alloc();
875 /* TODO consider if safe */
878 strncpy(field + 1, n->u.datafield.tag, 3);
880 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
882 if (n->u.datafield.indicator)
885 for (i = 0; n->u.datafield.indicator[i]; i++)
890 ind_val[0] = n->u.datafield.indicator[i];
892 sprintf(ind_str, "%s%d", indicator_name[1], i+1);
893 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
896 for (s = n->u.datafield.subfields; s; s = s->next)
899 xmlNode *ptr_subfield;
900 size_t using_code_len = get_subfield_len(mt, s->code_data,
902 wrbuf_rewind(wr_cdata);
903 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
904 marc_iconv_reset(mt, wr_cdata);
906 wrbuf_rewind(subfield_name);
907 wrbuf_puts(subfield_name, "s");
908 not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0;
909 ptr_subfield = xmlNewTextChild(ptr, ns_record,
910 BAD_CAST wrbuf_cstr(subfield_name),
911 BAD_CAST wrbuf_cstr(wr_cdata));
914 /* Generate code attribute value and add */
915 wrbuf_rewind(wr_cdata);
916 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
917 xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata));
920 wrbuf_destroy(subfield_name);
923 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
928 struct yaz_marc_node *n;
929 int identifier_length;
930 const char *leader = 0;
935 for (n = mt->nodes; n; n = n->next)
936 if (n->which == YAZ_MARC_LEADER)
938 leader = n->u.leader;
944 if (!atoi_n_check(leader+11, 1, &identifier_length))
947 wr_cdata = wrbuf_alloc();
949 record_ptr = xmlNewNode(0, BAD_CAST "r");
950 *root_ptr = record_ptr;
952 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
953 xmlSetNs(record_ptr, ns_record);
956 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
958 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
959 for (n = mt->nodes; n; n = n->next)
969 case YAZ_MARC_DATAFIELD:
970 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
972 case YAZ_MARC_CONTROLFIELD:
973 wrbuf_rewind(wr_cdata);
974 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
975 marc_iconv_reset(mt, wr_cdata);
977 strncpy(field + 1, n->u.controlfield.tag, 3);
978 ptr = xmlNewTextChild(record_ptr, ns_record,
980 BAD_CAST wrbuf_cstr(wr_cdata));
982 case YAZ_MARC_COMMENT:
983 ptr = xmlNewComment(BAD_CAST n->u.comment);
984 xmlAddChild(record_ptr, ptr);
986 case YAZ_MARC_LEADER:
987 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "l",
988 BAD_CAST n->u.leader);
992 wrbuf_destroy(wr_cdata);
997 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
1002 struct yaz_marc_node *n;
1003 int identifier_length;
1004 const char *leader = 0;
1005 xmlNode *record_ptr;
1009 for (n = mt->nodes; n; n = n->next)
1010 if (n->which == YAZ_MARC_LEADER)
1012 leader = n->u.leader;
1018 if (!atoi_n_check(leader+11, 1, &identifier_length))
1021 wr_cdata = wrbuf_alloc();
1023 record_ptr = xmlNewNode(0, BAD_CAST "record");
1024 *root_ptr = record_ptr;
1026 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1027 xmlSetNs(record_ptr, ns_record);
1030 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1032 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1033 for (n = mt->nodes; n; n = n->next)
1035 struct yaz_marc_subfield *s;
1040 case YAZ_MARC_DATAFIELD:
1041 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1042 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1043 if (n->u.datafield.indicator)
1046 for (i = 0; n->u.datafield.indicator[i]; i++)
1051 sprintf(ind_str, "ind%d", i+1);
1052 ind_val[0] = n->u.datafield.indicator[i];
1054 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1057 for (s = n->u.datafield.subfields; s; s = s->next)
1059 xmlNode *ptr_subfield;
1060 size_t using_code_len = get_subfield_len(mt, s->code_data,
1062 wrbuf_rewind(wr_cdata);
1063 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1064 s->code_data + using_code_len);
1065 marc_iconv_reset(mt, wr_cdata);
1066 ptr_subfield = xmlNewTextChild(
1068 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1070 wrbuf_rewind(wr_cdata);
1071 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1072 s->code_data, using_code_len);
1073 xmlNewProp(ptr_subfield, BAD_CAST "code",
1074 BAD_CAST wrbuf_cstr(wr_cdata));
1077 case YAZ_MARC_CONTROLFIELD:
1078 wrbuf_rewind(wr_cdata);
1079 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1080 marc_iconv_reset(mt, wr_cdata);
1082 ptr = xmlNewTextChild(record_ptr, ns_record,
1083 BAD_CAST "controlfield",
1084 BAD_CAST wrbuf_cstr(wr_cdata));
1086 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1088 case YAZ_MARC_COMMENT:
1089 ptr = xmlNewComment(BAD_CAST n->u.comment);
1090 xmlAddChild(record_ptr, ptr);
1092 case YAZ_MARC_LEADER:
1093 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1094 BAD_CAST n->u.leader);
1098 wrbuf_destroy(wr_cdata);
1104 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1106 struct yaz_marc_node *n;
1107 int indicator_length;
1108 int identifier_length;
1109 int length_data_entry;
1110 int length_starting;
1111 int length_implementation;
1112 int data_offset = 0;
1113 const char *leader = 0;
1114 WRBUF wr_dir, wr_head, wr_data_tmp;
1117 for (n = mt->nodes; n; n = n->next)
1118 if (n->which == YAZ_MARC_LEADER)
1119 leader = n->u.leader;
1123 if (!atoi_n_check(leader+10, 1, &indicator_length))
1125 if (!atoi_n_check(leader+11, 1, &identifier_length))
1127 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1129 if (!atoi_n_check(leader+21, 1, &length_starting))
1131 if (!atoi_n_check(leader+22, 1, &length_implementation))
1134 wr_data_tmp = wrbuf_alloc();
1135 wr_dir = wrbuf_alloc();
1136 for (n = mt->nodes; n; n = n->next)
1138 int data_length = 0;
1139 struct yaz_marc_subfield *s;
1143 case YAZ_MARC_DATAFIELD:
1144 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1145 data_length += indicator_length;
1146 wrbuf_rewind(wr_data_tmp);
1147 for (s = n->u.datafield.subfields; s; s = s->next)
1149 /* write dummy IDFS + content */
1150 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1151 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1152 marc_iconv_reset(mt, wr_data_tmp);
1154 /* write dummy FS (makes MARC-8 to become ASCII) */
1155 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1156 marc_iconv_reset(mt, wr_data_tmp);
1157 data_length += wrbuf_len(wr_data_tmp);
1159 case YAZ_MARC_CONTROLFIELD:
1160 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1162 wrbuf_rewind(wr_data_tmp);
1163 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1164 n->u.controlfield.data);
1165 marc_iconv_reset(mt, wr_data_tmp);
1166 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1167 marc_iconv_reset(mt, wr_data_tmp);
1168 data_length += wrbuf_len(wr_data_tmp);
1170 case YAZ_MARC_COMMENT:
1172 case YAZ_MARC_LEADER:
1177 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1178 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1179 data_offset += data_length;
1182 /* mark end of directory */
1183 wrbuf_putc(wr_dir, ISO2709_FS);
1185 /* base address of data (comes after leader+directory) */
1186 base_address = 24 + wrbuf_len(wr_dir);
1188 wr_head = wrbuf_alloc();
1190 /* write record length */
1191 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1192 /* from "original" leader */
1193 wrbuf_write(wr_head, leader+5, 7);
1194 /* base address of data */
1195 wrbuf_printf(wr_head, "%05d", base_address);
1196 /* from "original" leader */
1197 wrbuf_write(wr_head, leader+17, 7);
1199 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1200 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1201 wrbuf_destroy(wr_head);
1202 wrbuf_destroy(wr_dir);
1203 wrbuf_destroy(wr_data_tmp);
1205 for (n = mt->nodes; n; n = n->next)
1207 struct yaz_marc_subfield *s;
1211 case YAZ_MARC_DATAFIELD:
1212 wrbuf_write(wr, n->u.datafield.indicator, indicator_length);
1213 for (s = n->u.datafield.subfields; s; s = s->next)
1215 wrbuf_putc(wr, ISO2709_IDFS);
1216 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1217 marc_iconv_reset(mt, wr);
1219 wrbuf_putc(wr, ISO2709_FS);
1221 case YAZ_MARC_CONTROLFIELD:
1222 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1223 marc_iconv_reset(mt, wr);
1224 wrbuf_putc(wr, ISO2709_FS);
1226 case YAZ_MARC_COMMENT:
1228 case YAZ_MARC_LEADER:
1232 wrbuf_printf(wr, "%c", ISO2709_RS);
1237 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1239 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1242 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1244 return -1; /* error */
1245 return r; /* OK, return length > 0 */
1248 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1249 const char **result, size_t *rsize)
1253 wrbuf_rewind(mt->m_wr);
1254 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1256 *result = wrbuf_cstr(mt->m_wr);
1258 *rsize = wrbuf_len(mt->m_wr);
1262 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1264 mt->output_format = xmlmode;
1267 void yaz_marc_debug(yaz_marc_t mt, int level)
1273 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1278 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1280 return mt->iconv_cd;
1283 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1285 struct yaz_marc_node *n;
1287 for (n = mt->nodes; n; n = n->next)
1288 if (n->which == YAZ_MARC_LEADER)
1290 leader = n->u.leader;
1291 memcpy(leader+off, str, strlen(str));
1296 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1298 xfree(mt->leader_spec);
1299 mt->leader_spec = 0;
1302 char dummy_leader[24];
1303 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1305 mt->leader_spec = xstrdup(leader_spec);
1310 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1312 const char *cp = leader_spec;
1317 int no_read = 0, no = 0;
1319 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1320 if (no < 2 || no_read < 3)
1322 if (pos < 0 || (size_t) pos >= size)
1327 const char *vp = strchr(val+1, '\'');
1333 if (len + pos > size)
1335 memcpy(leader + pos, val+1, len);
1337 else if (*val >= '0' && *val <= '9')
1353 int yaz_marc_decode_formatstr(const char *arg)
1356 if (!strcmp(arg, "marc"))
1357 mode = YAZ_MARC_ISO2709;
1358 if (!strcmp(arg, "marcxml"))
1359 mode = YAZ_MARC_MARCXML;
1360 if (!strcmp(arg, "turbomarc"))
1361 mode = YAZ_MARC_TURBOMARC;
1362 if (!strcmp(arg, "marcxchange"))
1363 mode = YAZ_MARC_XCHANGE;
1364 if (!strcmp(arg, "line"))
1365 mode = YAZ_MARC_LINE;
1369 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1371 mt->write_using_libxml2 = enable;
1377 * c-file-style: "Stroustrup"
1378 * indent-tabs-mode: nil
1380 * vim: shiftwidth=4 tabstop=8 expandtab