2 * Copyright (C) 1995-2006, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: marcdisp.c,v 1.34 2006-08-28 14:18:22 adam Exp $
10 * \brief Implements MARC conversion utilities
25 #include <yaz/marcdisp.h>
26 #include <yaz/wrbuf.h>
27 #include <yaz/yaz-util.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
34 static void yaz_marc_reset(yaz_marc_t mt);
36 /** \brief node types for yaz_marc_node */
37 enum YAZ_MARC_NODE_TYPE
40 YAZ_MARC_CONTROLFIELD,
45 /** \brief represets a data field */
46 struct yaz_marc_datafield {
49 struct yaz_marc_subfield *subfields;
52 /** \brief represents a control field */
53 struct yaz_marc_controlfield {
58 /** \brief a comment node */
59 struct yaz_marc_comment {
63 /** \brief MARC node */
64 struct yaz_marc_node {
65 enum YAZ_MARC_NODE_TYPE which;
67 struct yaz_marc_datafield datafield;
68 struct yaz_marc_controlfield controlfield;
72 struct yaz_marc_node *next;
75 /** \brief represents a subfield */
76 struct yaz_marc_subfield {
78 struct yaz_marc_subfield *next;
81 /** \brief the internals of a yaz_marc_t handle */
91 struct yaz_marc_node *nodes;
92 struct yaz_marc_node **nodes_pp;
93 struct yaz_marc_subfield **subfield_pp;
96 yaz_marc_t yaz_marc_create(void)
98 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
99 mt->xml = YAZ_MARC_LINE;
101 mt->m_wr = wrbuf_alloc();
104 strcpy(mt->subfield_str, " $");
105 strcpy(mt->endline_str, "\n");
107 mt->nmem = nmem_create();
112 void yaz_marc_destroy(yaz_marc_t mt)
116 nmem_destroy(mt->nmem);
117 wrbuf_free(mt->m_wr, 1);
118 xfree(mt->leader_spec);
122 static int marc_exec_leader(const char *leader_spec, char *leader,
126 struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
128 struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n));
131 mt->nodes_pp = &n->next;
135 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
137 struct yaz_marc_node *n = yaz_marc_add_node(mt);
138 n->which = YAZ_MARC_COMMENT;
139 n->u.comment = nmem_strdup(mt->nmem, comment);
142 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
149 _vsnprintf(buf, sizeof(buf)-1, fmt, ap);
153 vsnprintf(buf, sizeof(buf), fmt, ap);
155 vsprintf(buf, fmt, ap);
159 yaz_marc_add_comment(mt, buf);
163 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
165 struct yaz_marc_node *n = yaz_marc_add_node(mt);
166 n->which = YAZ_MARC_LEADER;
167 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
168 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
171 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
172 const char *data, size_t data_len)
174 struct yaz_marc_node *n = yaz_marc_add_node(mt);
175 n->which = YAZ_MARC_CONTROLFIELD;
176 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
177 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
183 sprintf(msg, "controlfield:");
184 for (i = 0; i < 16 && i < data_len; i++)
185 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
187 sprintf(msg + strlen(msg), " ..");
188 yaz_marc_add_comment(mt, msg);
193 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
194 const xmlNode *ptr_data)
196 struct yaz_marc_node *n = yaz_marc_add_node(mt);
197 n->which = YAZ_MARC_CONTROLFIELD;
198 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
199 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
203 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
204 const char *indicator, size_t indicator_len)
206 struct yaz_marc_node *n = yaz_marc_add_node(mt);
207 n->which = YAZ_MARC_DATAFIELD;
208 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
209 n->u.datafield.indicator =
210 nmem_strdupn(mt->nmem, indicator, indicator_len);
211 n->u.datafield.subfields = 0;
213 /* make subfield_pp the current (last one) */
214 mt->subfield_pp = &n->u.datafield.subfields;
218 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
219 const char *indicator, size_t indicator_len)
221 struct yaz_marc_node *n = yaz_marc_add_node(mt);
222 n->which = YAZ_MARC_DATAFIELD;
223 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
224 n->u.datafield.indicator =
225 nmem_strdupn(mt->nmem, indicator, indicator_len);
226 n->u.datafield.subfields = 0;
228 /* make subfield_pp the current (last one) */
229 mt->subfield_pp = &n->u.datafield.subfields;
233 void yaz_marc_add_subfield(yaz_marc_t mt,
234 const char *code_data, size_t code_data_len)
241 sprintf(msg, "subfield:");
242 for (i = 0; i < 16 && i < code_data_len; i++)
243 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
244 if (i < code_data_len)
245 sprintf(msg + strlen(msg), " ..");
246 yaz_marc_add_comment(mt, msg);
251 struct yaz_marc_subfield *n = nmem_malloc(mt->nmem, sizeof(*n));
252 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
254 /* mark subfield_pp to point to this one, so we append here next */
255 *mt->subfield_pp = n;
256 mt->subfield_pp = &n->next;
260 static int atoi_n_check(const char *buf, int size, int *val)
262 if (!isdigit(*(const unsigned char *) buf))
264 *val = atoi_n(buf, size);
268 /** \brief reads the MARC 24 bytes leader and checks content
270 \param leader of the 24 byte leader
271 \param indicator_length indicator length
272 \param identifier_length identifier length
273 \param base_address base address
274 \param length_data_entry length of data entry
275 \param length_starting length of starting
276 \param length_implementation length of implementation defined data
278 static void yaz_marc_read_leader(yaz_marc_t mt, const char *leader_c,
279 int *indicator_length,
280 int *identifier_length,
282 int *length_data_entry,
283 int *length_starting,
284 int *length_implementation)
288 memcpy(leader, leader_c, 24);
290 if (!atoi_n_check(leader+10, 1, indicator_length))
293 "Indicator length at offset 10 should hold a digit."
296 *indicator_length = 2;
298 if (!atoi_n_check(leader+11, 1, identifier_length))
301 "Identifier length at offset 11 should hold a digit."
304 *identifier_length = 2;
306 if (!atoi_n_check(leader+12, 5, base_address))
309 "Base address at offsets 12..16 should hold a number."
313 if (!atoi_n_check(leader+20, 1, length_data_entry))
316 "Length data entry at offset 20 should hold a digit."
318 *length_data_entry = 4;
321 if (!atoi_n_check(leader+21, 1, length_starting))
324 "Length starting at offset 21 should hold a digit."
326 *length_starting = 5;
329 if (!atoi_n_check(leader+22, 1, length_implementation))
332 "Length implementation at offset 22 should hold a digit."
334 *length_implementation = 0;
340 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
341 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
342 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
343 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
344 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
345 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
347 yaz_marc_add_leader(mt, leader, 24);
350 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
352 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
353 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
356 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
358 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
359 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
362 /* try to guess how many bytes the identifier really is! */
363 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
368 for (i = 1; i<5; i++)
371 size_t outbytesleft = sizeof(outbuf);
373 const char *inp = buf;
375 size_t inbytesleft = i;
376 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
377 &outp, &outbytesleft);
378 if (r != (size_t) (-1))
379 return i; /* got a complete sequence */
381 return 1; /* giving up */
383 return 1; /* we don't know */
386 static void yaz_marc_reset(yaz_marc_t mt)
388 nmem_reset(mt->nmem);
390 mt->nodes_pp = &mt->nodes;
394 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
396 struct yaz_marc_node *n;
397 int identifier_length;
398 const char *leader = 0;
400 for (n = mt->nodes; n; n = n->next)
401 if (n->which == YAZ_MARC_LEADER)
403 leader = n->u.leader;
409 if (!atoi_n_check(leader+11, 1, &identifier_length))
412 for (n = mt->nodes; n; n = n->next)
414 struct yaz_marc_subfield *s;
417 case YAZ_MARC_DATAFIELD:
418 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
419 n->u.datafield.indicator);
420 for (s = n->u.datafield.subfields; s; s = s->next)
422 /* if identifier length is 2 (most MARCs),
423 the code is a single character .. However we've
424 seen multibyte codes, so see how big it really is */
425 size_t using_code_len =
426 (identifier_length != 2) ? identifier_length - 1
428 cdata_one_character(mt, s->code_data);
430 wrbuf_puts (wr, mt->subfield_str);
431 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
433 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
434 wrbuf_iconv_puts(wr, mt->iconv_cd,
435 s->code_data + using_code_len);
436 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
439 wrbuf_puts (wr, mt->endline_str);
441 case YAZ_MARC_CONTROLFIELD:
442 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
443 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
444 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
445 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
447 wrbuf_puts (wr, mt->endline_str);
449 case YAZ_MARC_COMMENT:
451 wrbuf_iconv_write(wr, mt->iconv_cd,
452 n->u.comment, strlen(n->u.comment));
453 wrbuf_puts(wr, ")\n");
455 case YAZ_MARC_LEADER:
456 wrbuf_printf(wr, "%s\n", n->u.leader);
462 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
467 return yaz_marc_write_line(mt, wr);
468 case YAZ_MARC_MARCXML:
469 return yaz_marc_write_marcxml(mt, wr);
470 case YAZ_MARC_XCHANGE:
471 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
472 case YAZ_MARC_ISO2709:
473 return yaz_marc_write_iso2709(mt, wr);
478 /** \brief common MARC XML/Xchange writer
480 \param wr WRBUF output
481 \param ns XMLNS for the elements
482 \param format record format (e.g. "MARC21")
483 \param type record type (e.g. "Bibliographic")
485 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
490 struct yaz_marc_node *n;
491 int identifier_length;
492 const char *leader = 0;
494 for (n = mt->nodes; n; n = n->next)
495 if (n->which == YAZ_MARC_LEADER)
497 leader = n->u.leader;
503 if (!atoi_n_check(leader+11, 1, &identifier_length))
506 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
508 wrbuf_printf(wr, " format=\"%.80s\"", format);
510 wrbuf_printf(wr, " type=\"%.80s\"", type);
511 wrbuf_printf(wr, ">\n");
512 for (n = mt->nodes; n; n = n->next)
514 struct yaz_marc_subfield *s;
518 case YAZ_MARC_DATAFIELD:
519 wrbuf_printf(wr, " <datafield tag=\"");
520 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
521 strlen(n->u.datafield.tag));
522 wrbuf_printf(wr, "\"");
523 if (n->u.datafield.indicator)
526 for (i = 0; n->u.datafield.indicator[i]; i++)
528 wrbuf_printf(wr, " ind%d=\"", i+1);
529 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
530 n->u.datafield.indicator+i, 1);
531 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
534 wrbuf_printf(wr, ">\n");
535 for (s = n->u.datafield.subfields; s; s = s->next)
537 /* if identifier length is 2 (most MARCs),
538 the code is a single character .. However we've
539 seen multibyte codes, so see how big it really is */
540 size_t using_code_len =
541 (identifier_length != 2) ? identifier_length - 1
543 cdata_one_character(mt, s->code_data);
545 wrbuf_iconv_puts(wr, mt->iconv_cd, " <subfield code=\"");
546 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
547 s->code_data, using_code_len);
548 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
549 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
550 s->code_data + using_code_len,
551 strlen(s->code_data + using_code_len));
552 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
553 wrbuf_puts(wr, "\n");
555 wrbuf_printf(wr, " </datafield>\n");
557 case YAZ_MARC_CONTROLFIELD:
558 wrbuf_printf(wr, " <controlfield tag=\"");
559 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
560 strlen(n->u.controlfield.tag));
561 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
562 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
563 wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
564 wrbuf_puts(wr, "\n");
566 case YAZ_MARC_COMMENT:
567 wrbuf_printf(wr, "<!-- ");
568 wrbuf_puts(wr, n->u.comment);
569 wrbuf_printf(wr, " -->\n");
571 case YAZ_MARC_LEADER:
572 wrbuf_printf(wr, " <leader>");
573 wrbuf_iconv_write_cdata(wr,
574 0 /* no charset conversion for leader */,
575 n->u.leader, strlen(n->u.leader));
576 wrbuf_printf(wr, "</leader>\n");
579 wrbuf_puts(wr, "</record>\n");
583 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
585 if (!mt->leader_spec)
586 yaz_marc_modify_leader(mt, 9, "a");
587 return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
591 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
595 return yaz_marc_write_marcxml_ns(mt, wr,
596 "http://www.bs.dk/standards/MarcXchange",
600 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
602 struct yaz_marc_node *n;
603 int indicator_length;
604 int identifier_length;
605 int length_data_entry;
607 int length_implementation;
609 const char *leader = 0;
610 WRBUF wr_dir, wr_head, wr_data_tmp;
613 for (n = mt->nodes; n; n = n->next)
614 if (n->which == YAZ_MARC_LEADER)
615 leader = n->u.leader;
619 if (!atoi_n_check(leader+10, 1, &indicator_length))
621 if (!atoi_n_check(leader+11, 1, &identifier_length))
623 if (!atoi_n_check(leader+20, 1, &length_data_entry))
625 if (!atoi_n_check(leader+21, 1, &length_starting))
627 if (!atoi_n_check(leader+22, 1, &length_implementation))
630 wr_data_tmp = wrbuf_alloc();
631 wr_dir = wrbuf_alloc();
632 for (n = mt->nodes; n; n = n->next)
635 struct yaz_marc_subfield *s;
639 case YAZ_MARC_DATAFIELD:
640 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
641 data_length += indicator_length;
642 wrbuf_rewind(wr_data_tmp);
643 for (s = n->u.datafield.subfields; s; s = s->next)
645 /* write dummy IDFS + content */
646 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
647 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
649 /* write dummy FS (makes MARC-8 to become ASCII) */
650 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
651 data_length += wrbuf_len(wr_data_tmp);
653 case YAZ_MARC_CONTROLFIELD:
654 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
656 wrbuf_rewind(wr_data_tmp);
657 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
658 n->u.controlfield.data);
659 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
660 data_length += wrbuf_len(wr_data_tmp);
662 case YAZ_MARC_COMMENT:
664 case YAZ_MARC_LEADER:
669 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
670 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
671 data_offset += data_length;
674 /* mark end of directory */
675 wrbuf_putc(wr_dir, ISO2709_FS);
677 /* base address of data (comes after leader+directory) */
678 base_address = 24 + wrbuf_len(wr_dir);
680 wr_head = wrbuf_alloc();
682 /* write record length */
683 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
684 /* from "original" leader */
685 wrbuf_write(wr_head, leader+5, 7);
686 /* base address of data */
687 wrbuf_printf(wr_head, "%05d", base_address);
688 /* from "original" leader */
689 wrbuf_write(wr_head, leader+17, 7);
691 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
692 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
693 wrbuf_free(wr_head, 1);
694 wrbuf_free(wr_dir, 1);
695 wrbuf_free(wr_data_tmp, 1);
697 for (n = mt->nodes; n; n = n->next)
699 struct yaz_marc_subfield *s;
703 case YAZ_MARC_DATAFIELD:
704 wrbuf_printf(wr, "%.*s", indicator_length,
705 n->u.datafield.indicator);
706 for (s = n->u.datafield.subfields; s; s = s->next)
708 wrbuf_putc(wr, ISO2709_IDFS);
709 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
710 /* write dummy blank - makes MARC-8 to become ASCII */
711 wrbuf_iconv_putchar(wr, mt->iconv_cd, ' ');
714 wrbuf_putc(wr, ISO2709_FS);
716 case YAZ_MARC_CONTROLFIELD:
717 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
718 /* write dummy blank - makes MARC-8 to become ASCII */
719 wrbuf_iconv_putchar(wr, mt->iconv_cd, ' ');
721 wrbuf_putc(wr, ISO2709_FS);
723 case YAZ_MARC_COMMENT:
725 case YAZ_MARC_LEADER:
729 wrbuf_printf(wr, "%c", ISO2709_RS);
734 int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
736 for (; ptr; ptr = ptr->next)
738 if (ptr->type == XML_ELEMENT_NODE)
740 if (!strcmp((const char *) ptr->name, "subfield"))
742 size_t ctrl_data_len = 0;
743 char *ctrl_data_buf = 0;
744 const xmlNode *p = 0, *ptr_code = 0;
745 struct _xmlAttr *attr;
746 for (attr = ptr->properties; attr; attr = attr->next)
747 if (!strcmp((const char *)attr->name, "code"))
748 ptr_code = attr->children;
752 mt, "Bad attribute '%.80s' for 'subfield'",
759 mt, "Missing attribute 'code' for 'subfield'" );
762 if (ptr_code->type == XML_TEXT_NODE)
765 strlen((const char *)ptr_code->content);
770 mt, "Missing value for 'code' in 'subfield'" );
773 for (p = ptr->children; p ; p = p->next)
774 if (p->type == XML_TEXT_NODE)
775 ctrl_data_len += strlen((const char *)p->content);
776 ctrl_data_buf = nmem_malloc(mt->nmem, ctrl_data_len+1);
777 strcpy(ctrl_data_buf, (const char *)ptr_code->content);
778 for (p = ptr->children; p ; p = p->next)
779 if (p->type == XML_TEXT_NODE)
780 strcat(ctrl_data_buf, (const char *)p->content);
781 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
786 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
794 static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
796 int indicator_length;
797 int identifier_length;
799 int length_data_entry;
801 int length_implementation;
802 const char *leader = 0;
803 const xmlNode *ptr = *ptr_p;
805 for(; ptr; ptr = ptr->next)
806 if (ptr->type == XML_ELEMENT_NODE)
808 if (!strcmp((const char *) ptr->name, "leader"))
810 xmlNode *p = ptr->children;
811 for(; p; p = p->next)
812 if (p->type == XML_TEXT_NODE)
813 leader = (const char *) p->content;
819 mt, "Expected element 'leader', got '%.80s'", ptr->name);
825 yaz_marc_cprintf(mt, "Missing element 'leader'");
828 if (strlen(leader) != 24)
830 yaz_marc_cprintf(mt, "Bad length %d of leader data."
831 " Must have length of 24 characters", strlen(leader));
834 yaz_marc_read_leader(mt, leader,
840 &length_implementation);
845 static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
847 for(; ptr; ptr = ptr->next)
848 if (ptr->type == XML_ELEMENT_NODE)
850 if (!strcmp((const char *) ptr->name, "controlfield"))
852 const xmlNode *ptr_tag = 0;
853 struct _xmlAttr *attr;
854 for (attr = ptr->properties; attr; attr = attr->next)
855 if (!strcmp((const char *)attr->name, "tag"))
856 ptr_tag = attr->children;
860 mt, "Bad attribute '%.80s' for 'controlfield'",
867 mt, "Missing attribute 'tag' for 'controlfield'" );
870 yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
872 else if (!strcmp((const char *) ptr->name, "datafield"))
874 char indstr[11]; /* 0(unused), 1,....9, + zero term */
875 const xmlNode *ptr_tag = 0;
876 struct _xmlAttr *attr;
878 for (i = 0; i<11; i++)
880 for (attr = ptr->properties; attr; attr = attr->next)
881 if (!strcmp((const char *)attr->name, "tag"))
882 ptr_tag = attr->children;
883 else if (strlen((const char *)attr->name) == 4 &&
884 !memcmp(attr->name, "ind", 3))
886 int no = atoi((const char *)attr->name+3);
888 && attr->children->type == XML_TEXT_NODE)
889 indstr[no] = attr->children->content[0];
894 mt, "Bad attribute '%.80s' for 'datafield'",
901 mt, "Missing attribute 'tag' for 'datafield'" );
904 /* note that indstr[0] is unused so we use indstr[1..] */
905 yaz_marc_add_datafield_xml(mt, ptr_tag,
906 indstr+1, strlen(indstr+1));
908 if (yaz_marc_read_xml_subfields(mt, ptr->children))
914 "Expected element controlfield or datafield,"
915 " got %.80s", ptr->name);
922 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
924 const xmlNode *ptr = xmlnode;
925 for(; ptr; ptr = ptr->next)
926 if (ptr->type == XML_ELEMENT_NODE)
928 if (!strcmp((const char *) ptr->name, "record"))
933 mt, "Unknown element '%.80s' in MARC XML reader",
940 yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
943 /* ptr points to record node now */
945 if (yaz_marc_read_xml_leader(mt, &ptr))
947 return yaz_marc_read_xml_fields(mt, ptr->next);
950 int yaz_marc_read_xml(yaz_marc_t mt, const void *xmlnode)
956 int yaz_marc_read_iso2709(yaz_marc_t mt, const char *buf, int bsize)
960 int indicator_length;
961 int identifier_length;
962 int end_of_directory;
964 int length_data_entry;
966 int length_implementation;
970 record_length = atoi_n (buf, 5);
971 if (record_length < 25)
973 yaz_marc_cprintf(mt, "Record length %d < 24", record_length);
976 /* ballout if bsize is known and record_length is less than that */
977 if (bsize != -1 && record_length > bsize)
979 yaz_marc_cprintf(mt, "Record appears to be larger than buffer %d < %d",
980 record_length, bsize);
984 yaz_marc_cprintf(mt, "Record length %5d", record_length);
986 yaz_marc_read_leader(mt, buf,
992 &length_implementation);
994 /* First pass. determine length of directory & base of data */
995 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
997 /* length of directory entry */
998 int l = 3 + length_data_entry + length_starting;
999 if (entry_p + l >= record_length)
1001 yaz_marc_cprintf(mt, "Directory offset %d: end of record."
1002 " Missing FS char", entry_p);
1007 yaz_marc_cprintf(mt, "Directory offset %d: Tag %.3s",
1008 entry_p, buf+entry_p);
1010 /* Check for digits in length info */
1012 if (!isdigit(*(const unsigned char *) (buf + entry_p+l)))
1016 /* Not all digits, so stop directory scan */
1017 yaz_marc_cprintf(mt, "Directory offset %d: Bad value for data"
1018 " length and/or length starting", entry_p);
1021 entry_p += 3 + length_data_entry + length_starting;
1023 end_of_directory = entry_p;
1024 if (base_address != entry_p+1)
1026 yaz_marc_cprintf(mt, "Base address not at end of directory,"
1027 " base %d, end %d", base_address, entry_p+1);
1030 /* Second pass. parse control - and datafields */
1031 for (entry_p = 24; entry_p != end_of_directory; )
1038 int identifier_flag = 0;
1039 int entry_p0 = entry_p;
1041 memcpy (tag, buf+entry_p, 3);
1044 data_length = atoi_n(buf+entry_p, length_data_entry);
1045 entry_p += length_data_entry;
1046 data_offset = atoi_n(buf+entry_p, length_starting);
1047 entry_p += length_starting;
1048 i = data_offset + base_address;
1049 end_offset = i+data_length-1;
1051 if (data_length <= 0 || data_offset < 0)
1056 yaz_marc_cprintf(mt, "Tag: %s. Directory offset %d: data-length %d,"
1058 tag, entry_p0, data_length, data_offset);
1060 if (end_offset >= record_length)
1062 yaz_marc_cprintf(mt, "Directory offset %d: Data out of bounds %d >= %d",
1063 entry_p0, end_offset, record_length);
1067 if (memcmp (tag, "00", 2))
1068 identifier_flag = 1; /* if not 00X assume subfields */
1069 else if (indicator_length < 4 && indicator_length > 0)
1071 /* Danmarc 00X have subfields */
1072 if (buf[i + indicator_length] == ISO2709_IDFS)
1073 identifier_flag = 1;
1074 else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
1075 identifier_flag = 2;
1078 if (identifier_flag)
1081 i += identifier_flag-1;
1082 yaz_marc_add_datafield(mt, tag, buf+i, indicator_length);
1083 i += indicator_length;
1085 while (i < end_offset &&
1086 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1088 int code_offset = i+1;
1091 while (i < end_offset &&
1092 buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
1093 buf[i] != ISO2709_FS)
1095 yaz_marc_add_subfield(mt, buf+code_offset, i - code_offset);
1102 while (i < end_offset &&
1103 buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1105 yaz_marc_add_controlfield(mt, tag, buf+i0, i-i0);
1109 yaz_marc_cprintf(mt, "Separator but not at end of field length=%d",
1112 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
1114 yaz_marc_cprintf(mt, "No separator at end of field length=%d",
1118 return record_length;
1121 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1123 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1126 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1128 return -1; /* error */
1129 return r; /* OK, return length > 0 */
1132 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1133 char **result, int *rsize)
1137 wrbuf_rewind(mt->m_wr);
1138 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1140 *result = wrbuf_buf(mt->m_wr);
1142 *rsize = wrbuf_len(mt->m_wr);
1146 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1152 void yaz_marc_debug(yaz_marc_t mt, int level)
1158 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1163 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1165 struct yaz_marc_node *n;
1167 for (n = mt->nodes; n; n = n->next)
1168 if (n->which == YAZ_MARC_LEADER)
1170 leader = n->u.leader;
1171 memcpy(leader+off, str, strlen(str));
1177 int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
1179 yaz_marc_t mt = yaz_marc_create();
1184 r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr);
1185 yaz_marc_destroy(mt);
1190 int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize)
1192 return yaz_marc_decode(buf, wr, debug, bsize, 0);
1196 int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
1198 yaz_marc_t mt = yaz_marc_create();
1202 r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr);
1206 fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf);
1207 yaz_marc_destroy(mt);
1212 int marc_display_ex (const char *buf, FILE *outf, int debug)
1214 return marc_display_exl (buf, outf, debug, -1);
1218 int marc_display (const char *buf, FILE *outf)
1220 return marc_display_ex (buf, outf, 0);
1223 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1225 xfree(mt->leader_spec);
1226 mt->leader_spec = 0;
1229 char dummy_leader[24];
1230 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1232 mt->leader_spec = xstrdup(leader_spec);
1237 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1239 const char *cp = leader_spec;
1244 int no_read = 0, no = 0;
1246 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1247 if (no < 2 || no_read < 3)
1249 if (pos < 0 || pos >= size)
1254 const char *vp = strchr(val+1, '\'');
1260 if (len + pos > size)
1262 memcpy(leader + pos, val+1, len);
1264 else if (*val >= '0' && *val <= '9')
1284 * indent-tabs-mode: nil
1286 * vim: shiftwidth=4 tabstop=8 expandtab