2 * Copyright (C) 1995-2005, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: marcdisp.c,v 1.16 2005-02-08 13:51:30 adam Exp $
10 * \brief Implements MARC display - and conversion utilities
20 #include <yaz/marcdisp.h>
21 #include <yaz/wrbuf.h>
22 #include <yaz/yaz-util.h>
33 yaz_marc_t yaz_marc_create(void)
35 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
36 mt->xml = YAZ_MARC_LINE;
38 mt->m_wr = wrbuf_alloc();
40 strcpy(mt->subfield_str, " $");
41 strcpy(mt->endline_str, "\n");
45 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
47 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
48 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
51 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
53 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
54 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
57 void yaz_marc_destroy(yaz_marc_t mt)
61 wrbuf_free (mt->m_wr, 1);
65 static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len, WRBUF wr)
67 if (mt->xml == YAZ_MARC_ISO2709)
68 wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
69 else if (mt->xml == YAZ_MARC_LINE)
70 wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
72 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, buf, len);
75 static int atoi_n_check(const char *buf, int size, int *val)
77 if (!isdigit(*(const unsigned char *) buf))
79 *val = atoi_n(buf, size);
83 int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
88 int identifier_length;
90 int length_data_entry;
92 int length_implementation;
94 int produce_warnings = 0;
98 if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_OAIMARC
99 || mt->xml == YAZ_MARC_MARCXML || mt->xml == YAZ_MARC_XCHANGE)
100 produce_warnings = 1;
104 record_length = atoi_n (buf, 5);
105 if (record_length < 25)
111 wrbuf_printf(wr, "<!-- Record length %d - aborting -->\n",
116 memcpy(lead, buf, 24); /* se can modify the header for output */
118 /* ballout if bsize is known and record_length is less than that */
119 if (bsize != -1 && record_length > bsize)
121 if (!atoi_n_check(buf+10, 1, &indicator_length))
123 if (produce_warnings)
124 wrbuf_printf(wr, "<!-- Indicator length at offset 10 should hold a digit. Assuming 2 -->\n");
126 indicator_length = 2;
128 if (!atoi_n_check(buf+11, 1, &identifier_length))
130 if (produce_warnings)
131 wrbuf_printf(wr, "<!-- Identifier length at offset 11 should hold a digit. Assuming 2 -->\n");
133 identifier_length = 2;
135 if (!atoi_n_check(buf+12, 5, &base_address))
137 if (produce_warnings)
138 wrbuf_printf(wr, "<!-- Base address at offsets 12..16 should hold a number. Assuming 0 -->\n");
141 if (!atoi_n_check(buf+20, 1, &length_data_entry))
143 if (produce_warnings)
144 wrbuf_printf(wr, "<!-- Length data entry at offset 20 should hold a digit. Assuming 4 -->\n");
145 length_data_entry = 4;
148 if (!atoi_n_check(buf+21, 1, &length_starting))
150 if (produce_warnings)
151 wrbuf_printf(wr, "<!-- Length starting at offset 21 should hold a digit. Assuming 5 -->\n");
155 if (!atoi_n_check(buf+22, 1, &length_implementation))
157 if (produce_warnings)
158 wrbuf_printf(wr, "<!-- Length implementation at offset 22 should hold a digit. Assuming 0 -->\n");
159 length_implementation = 0;
163 if (mt->xml != YAZ_MARC_LINE)
169 case YAZ_MARC_ISO2709:
171 case YAZ_MARC_SIMPLEXML:
172 wrbuf_puts (wr, "<iso2709\n");
173 sprintf (str, " RecordStatus=\"%c\"\n", buf[5]);
174 wrbuf_puts (wr, str);
175 sprintf (str, " TypeOfRecord=\"%c\"\n", buf[6]);
176 wrbuf_puts (wr, str);
177 for (i = 1; i<=19; i++)
179 sprintf (str, " ImplDefined%d=\"%c\"\n", i, buf[6+i]);
180 wrbuf_puts (wr, str);
182 wrbuf_puts (wr, ">\n");
184 case YAZ_MARC_OAIMARC:
187 "<oai_marc xmlns=\"http://www.openarchives.org/OIA/oai_marc\""
189 " xmlns:xsi=\"http://www.w3.org/2000/10/XMLSchema-instance\""
191 " xsi:schemaLocation=\"http://www.openarchives.org/OAI/oai_marc.xsd\""
195 sprintf (str, " status=\"%c\" type=\"%c\" catForm=\"%c\">\n",
196 buf[5], buf[6], buf[7]);
197 wrbuf_puts (wr, str);
199 case YAZ_MARC_MARCXML:
202 "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
204 lead[9] = 'a'; /* set leader to signal unicode */
205 marc_cdata(mt, lead, 24, wr);
206 wrbuf_printf(wr, "</leader>\n");
208 case YAZ_MARC_XCHANGE:
211 "<record xmlns=\"http://www.bs.dk/standards/MarcXchange\">\n"
213 marc_cdata(mt, lead, 24, wr);
214 wrbuf_printf(wr, "</leader>\n");
223 wrbuf_puts (wr, "<!--\n");
224 sprintf (str, "Record length %5d\n", record_length);
225 wrbuf_puts (wr, str);
226 sprintf (str, "Indicator length %5d\n", indicator_length);
227 wrbuf_puts (wr, str);
228 sprintf (str, "Identifier length %5d\n", identifier_length);
229 wrbuf_puts (wr, str);
230 sprintf (str, "Base address %5d\n", base_address);
231 wrbuf_puts (wr, str);
232 sprintf (str, "Length data entry %5d\n", length_data_entry);
233 wrbuf_puts (wr, str);
234 sprintf (str, "Length starting %5d\n", length_starting);
235 wrbuf_puts (wr, str);
236 sprintf (str, "Length implementation %5d\n", length_implementation);
237 wrbuf_puts (wr, str);
239 wrbuf_puts (wr, "-->\n");
242 /* first pass. determine length of directory & base of data */
243 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
245 entry_p += 3+length_data_entry+length_starting;
246 if (entry_p >= record_length)
249 if (mt->debug && base_address != entry_p+1)
251 if (produce_warnings)
252 wrbuf_printf (wr," <!-- base address not at end of directory "
253 "base=%d end=%d -->\n", base_address, entry_p+1);
255 base_address = entry_p+1;
257 if (mt->xml == YAZ_MARC_ISO2709)
259 WRBUF wr_head = wrbuf_alloc();
260 WRBUF wr_dir = wrbuf_alloc();
261 WRBUF wr_tmp = wrbuf_alloc();
264 /* second pass. create directory for ISO2709 output */
265 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
267 int data_length, data_offset, end_offset;
270 wrbuf_write(wr_dir, buf+entry_p, 3);
273 data_length = atoi_n (buf+entry_p, length_data_entry);
274 entry_p += length_data_entry;
275 data_offset = atoi_n (buf+entry_p, length_starting);
276 entry_p += length_starting;
277 i = data_offset + base_address;
278 end_offset = i+data_length-1;
280 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS &&
283 sz1 = 1+i - (data_offset + base_address);
286 sz2 = wrbuf_iconv_write(wr_tmp, mt->iconv_cd,
287 buf + data_offset+base_address, sz1);
288 wrbuf_rewind(wr_tmp);
292 wrbuf_printf(wr_dir, "%0*d", length_data_entry, sz2);
293 wrbuf_printf(wr_dir, "%0*d", length_starting, data_p);
296 wrbuf_putc(wr_dir, ISO2709_FS);
297 wrbuf_printf(wr_head, "%05d", data_p+1 + base_address);
298 wrbuf_write(wr_head, lead+5, 7);
299 wrbuf_printf(wr_head, "%05d", base_address);
300 wrbuf_write(wr_head, lead+17, 7);
302 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
303 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
304 wrbuf_free(wr_head, 1);
305 wrbuf_free(wr_dir, 1);
306 wrbuf_free(wr_tmp, 1);
308 /* third pass. create data output */
309 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
316 int identifier_flag = 0;
319 memcpy (tag, buf+entry_p, 3);
323 data_length = atoi_n (buf+entry_p, length_data_entry);
324 entry_p += length_data_entry;
325 data_offset = atoi_n (buf+entry_p, length_starting);
326 entry_p += length_starting;
327 i = data_offset + base_address;
328 end_offset = i+data_length-1;
332 wrbuf_printf(wr, "<!-- offset=%d data dlength=%d doffset=%d -->\n",
333 entry_p0, data_length, data_offset);
336 if (memcmp (tag, "00", 2))
337 identifier_flag = 1; /* if not 00X assume subfields */
338 else if (indicator_length < 4 && indicator_length > 0)
340 /* Danmarc 00X have subfields */
341 if (buf[i + indicator_length] == ISO2709_IDFS)
343 else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
349 wrbuf_printf(wr, "<!-- identifier_flag = %d -->\n",
357 wrbuf_puts (wr, "Tag: ");
358 wrbuf_puts (wr, tag);
359 wrbuf_puts (wr, " ");
361 case YAZ_MARC_SIMPLEXML:
362 wrbuf_printf (wr, "<field tag=\"");
363 marc_cdata(mt, tag, strlen(tag), wr);
364 wrbuf_printf(wr, "\"");
366 case YAZ_MARC_OAIMARC:
368 wrbuf_printf (wr, " <varfield id=\"");
370 wrbuf_printf (wr, " <fixfield id=\"");
371 marc_cdata(mt, tag, strlen(tag), wr);
372 wrbuf_printf(wr, "\"");
374 case YAZ_MARC_MARCXML:
375 case YAZ_MARC_XCHANGE:
377 wrbuf_printf (wr, " <datafield tag=\"");
379 wrbuf_printf (wr, " <controlfield tag=\"");
380 marc_cdata(mt, tag, strlen(tag), wr);
381 wrbuf_printf(wr, "\"");
386 i += identifier_flag-1;
387 for (j = 0; j<indicator_length; j++, i++)
391 case YAZ_MARC_ISO2709:
392 wrbuf_putc(wr, buf[i]);
396 wrbuf_puts (wr, " Ind: ");
397 wrbuf_putc(wr, buf[i]);
399 case YAZ_MARC_SIMPLEXML:
400 wrbuf_printf(wr, " Indicator%d=\"", j+1);
401 marc_cdata(mt, buf+i, 1, wr);
402 wrbuf_printf(wr, "\"");
404 case YAZ_MARC_OAIMARC:
405 wrbuf_printf(wr, " i%d=\"", j+1);
406 marc_cdata(mt, buf+i, 1, wr);
407 wrbuf_printf(wr, "\"");
409 case YAZ_MARC_MARCXML:
410 case YAZ_MARC_XCHANGE:
411 wrbuf_printf(wr, " ind%d=\"", j+1);
412 marc_cdata(mt, buf+i, 1, wr);
413 wrbuf_printf(wr, "\"");
417 if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_MARCXML
418 || mt->xml == YAZ_MARC_OAIMARC || mt->xml == YAZ_MARC_XCHANGE)
420 wrbuf_puts (wr, ">");
422 wrbuf_puts (wr, "\n");
424 if (mt->xml == YAZ_MARC_LINE)
427 wrbuf_puts (wr, " Fields: ");
431 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
437 case YAZ_MARC_ISO2709:
439 wrbuf_iconv_write(wr, mt->iconv_cd,
440 buf+i, identifier_length);
441 i += identifier_length;
444 wrbuf_puts (wr, mt->subfield_str);
445 marc_cdata(mt, buf+i, identifier_length-1, wr);
446 i = i+identifier_length-1;
447 wrbuf_putc (wr, ' ');
449 case YAZ_MARC_SIMPLEXML:
450 wrbuf_puts (wr, " <subfield code=\"");
451 marc_cdata(mt, buf+i, identifier_length-1, wr);
452 i = i+identifier_length-1;
453 wrbuf_puts (wr, "\">");
455 case YAZ_MARC_OAIMARC:
456 wrbuf_puts (wr, " <subfield label=\"");
457 marc_cdata(mt, buf+i, identifier_length-1, wr);
458 i = i+identifier_length-1;
459 wrbuf_puts (wr, "\">");
461 case YAZ_MARC_MARCXML:
462 case YAZ_MARC_XCHANGE:
463 wrbuf_puts (wr, " <subfield code=\"");
464 marc_cdata(mt, buf+i, identifier_length-1, wr);
465 i = i+identifier_length-1;
466 wrbuf_puts (wr, "\">");
470 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
471 buf[i] != ISO2709_FS && i < end_offset)
473 marc_cdata(mt, buf + i0, i - i0, wr);
475 if (mt->xml == YAZ_MARC_ISO2709 && buf[i] != ISO2709_IDFS)
476 marc_cdata(mt, buf + i, 1, wr);
478 if (mt->xml == YAZ_MARC_SIMPLEXML ||
479 mt->xml == YAZ_MARC_MARCXML ||
480 mt->xml == YAZ_MARC_XCHANGE ||
481 mt->xml == YAZ_MARC_OAIMARC)
482 wrbuf_puts (wr, "</subfield>\n");
488 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
490 marc_cdata(mt, buf + i0, i - i0, wr);
491 if (mt->xml == YAZ_MARC_ISO2709)
492 marc_cdata(mt, buf + i, 1, wr);
494 if (mt->xml == YAZ_MARC_LINE)
495 wrbuf_puts (wr, mt->endline_str);
497 wrbuf_printf(wr, " <!-- separator but not at end of field length=%d-->\n", data_length);
498 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
499 wrbuf_printf(wr, " <!-- no separator at end of field length=%d-->\n", data_length);
502 case YAZ_MARC_SIMPLEXML:
503 wrbuf_puts (wr, "</field>\n");
505 case YAZ_MARC_OAIMARC:
507 wrbuf_puts (wr, "</varfield>\n");
509 wrbuf_puts (wr, "</fixfield>\n");
511 case YAZ_MARC_MARCXML:
512 case YAZ_MARC_XCHANGE:
514 wrbuf_puts (wr, " </datafield>\n");
516 wrbuf_puts (wr, "</controlfield>\n");
525 case YAZ_MARC_SIMPLEXML:
526 wrbuf_puts (wr, "</iso2709>\n");
528 case YAZ_MARC_OAIMARC:
529 wrbuf_puts (wr, "</oai_marc>\n");
531 case YAZ_MARC_MARCXML:
532 case YAZ_MARC_XCHANGE:
533 wrbuf_puts (wr, "</record>\n");
535 case YAZ_MARC_ISO2709:
536 wrbuf_putc (wr, ISO2709_RS);
539 return record_length;
542 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
543 char **result, int *rsize)
545 int r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
549 *result = wrbuf_buf(mt->m_wr);
551 *rsize = wrbuf_len(mt->m_wr);
556 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
562 void yaz_marc_debug(yaz_marc_t mt, int level)
568 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
574 int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
576 yaz_marc_t mt = yaz_marc_create();
581 r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr);
582 yaz_marc_destroy(mt);
587 int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize)
589 return yaz_marc_decode(buf, wr, debug, bsize, 0);
593 int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
595 yaz_marc_t mt = yaz_marc_create();
599 r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr);
603 fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf);
604 yaz_marc_destroy(mt);
609 int marc_display_ex (const char *buf, FILE *outf, int debug)
611 return marc_display_exl (buf, outf, debug, -1);
615 int marc_display (const char *buf, FILE *outf)
617 return marc_display_ex (buf, outf, 0);