2 * Copyright (C) 1995-2005, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: marcdisp.c,v 1.15 2005-02-02 23:25:08 adam Exp $
10 * \brief Implements MARC display - and conversion utilities
20 #include <yaz/marcdisp.h>
21 #include <yaz/wrbuf.h>
22 #include <yaz/yaz-util.h>
33 yaz_marc_t yaz_marc_create(void)
35 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
36 mt->xml = YAZ_MARC_LINE;
38 mt->m_wr = wrbuf_alloc();
40 strcpy(mt->subfield_str, " $");
41 strcpy(mt->endline_str, "\n");
45 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
47 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
48 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
51 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
53 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
54 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
57 void yaz_marc_destroy(yaz_marc_t mt)
61 wrbuf_free (mt->m_wr, 1);
65 static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len, WRBUF wr)
67 if (mt->xml == YAZ_MARC_ISO2709)
68 wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
69 else if (mt->xml == YAZ_MARC_LINE)
70 wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
72 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, buf, len);
75 int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
80 int identifier_length;
82 int length_data_entry;
84 int length_implementation;
88 record_length = atoi_n (buf, 5);
89 if (record_length < 25)
95 sprintf (str, "Record length %d - aborting\n", record_length);
100 /* ballout if bsize is known and record_length is less than that */
101 if (bsize != -1 && record_length > bsize)
103 if (isdigit(((const unsigned char *) buf)[10]))
104 indicator_length = atoi_n (buf+10, 1);
106 indicator_length = 2;
107 if (isdigit(((const unsigned char *) buf)[11]))
108 identifier_length = atoi_n (buf+11, 1);
110 identifier_length = 2;
111 base_address = atoi_n (buf+12, 5);
113 length_data_entry = atoi_n (buf+20, 1);
114 if (buf[20] <= '0' || buf[20] >= '9')
116 wrbuf_printf(wr, "<!-- Length data entry should hold a digit. Assuming 4 -->\n");
117 length_data_entry = 4;
119 length_starting = atoi_n (buf+21, 1);
120 if (buf[21] <= '0' || buf[21] >= '9')
122 wrbuf_printf(wr, "<!-- Length starting should hold a digit. Assuming 5 -->\n");
125 length_implementation = atoi_n (buf+22, 1);
127 if (mt->xml != YAZ_MARC_LINE)
133 case YAZ_MARC_ISO2709:
135 case YAZ_MARC_SIMPLEXML:
136 wrbuf_puts (wr, "<iso2709\n");
137 sprintf (str, " RecordStatus=\"%c\"\n", buf[5]);
138 wrbuf_puts (wr, str);
139 sprintf (str, " TypeOfRecord=\"%c\"\n", buf[6]);
140 wrbuf_puts (wr, str);
141 for (i = 1; i<=19; i++)
143 sprintf (str, " ImplDefined%d=\"%c\"\n", i, buf[6+i]);
144 wrbuf_puts (wr, str);
146 wrbuf_puts (wr, ">\n");
148 case YAZ_MARC_OAIMARC:
151 "<oai_marc xmlns=\"http://www.openarchives.org/OIA/oai_marc\""
153 " xmlns:xsi=\"http://www.w3.org/2000/10/XMLSchema-instance\""
155 " xsi:schemaLocation=\"http://www.openarchives.org/OAI/oai_marc.xsd\""
159 sprintf (str, " status=\"%c\" type=\"%c\" catForm=\"%c\">\n",
160 buf[5], buf[6], buf[7]);
161 wrbuf_puts (wr, str);
163 case YAZ_MARC_MARCXML:
166 "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
169 marc_cdata(mt, buf, 9, wr);
170 marc_cdata(mt, "a", 1, wr); /* set leader to signal unicode */
171 marc_cdata(mt, buf+10, 14, wr);
173 marc_cdata(mt, buf, 24, wr); /* leave header as is .. */
175 wrbuf_printf(wr, "</leader>\n");
184 wrbuf_puts (wr, "<!--\n");
185 sprintf (str, "Record length %5d\n", record_length);
186 wrbuf_puts (wr, str);
187 sprintf (str, "Indicator length %5d\n", indicator_length);
188 wrbuf_puts (wr, str);
189 sprintf (str, "Identifier length %5d\n", identifier_length);
190 wrbuf_puts (wr, str);
191 sprintf (str, "Base address %5d\n", base_address);
192 wrbuf_puts (wr, str);
193 sprintf (str, "Length data entry %5d\n", length_data_entry);
194 wrbuf_puts (wr, str);
195 sprintf (str, "Length starting %5d\n", length_starting);
196 wrbuf_puts (wr, str);
197 sprintf (str, "Length implementation %5d\n", length_implementation);
198 wrbuf_puts (wr, str);
200 wrbuf_puts (wr, "-->\n");
203 /* first pass. determine length of directory & base of data */
204 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
206 entry_p += 3+length_data_entry+length_starting;
207 if (entry_p >= record_length)
210 if (mt->debug && base_address != entry_p+1)
212 wrbuf_printf (wr," <!-- base address not at end of directory "
213 "base=%d end=%d -->\n", base_address, entry_p+1);
215 base_address = entry_p+1;
217 if (mt->xml == YAZ_MARC_ISO2709)
219 WRBUF wr_head = wrbuf_alloc();
220 WRBUF wr_dir = wrbuf_alloc();
221 WRBUF wr_tmp = wrbuf_alloc();
224 /* second pass. create directory for ISO2709 output */
225 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
227 int data_length, data_offset, end_offset;
230 wrbuf_write(wr_dir, buf+entry_p, 3);
233 data_length = atoi_n (buf+entry_p, length_data_entry);
234 entry_p += length_data_entry;
235 data_offset = atoi_n (buf+entry_p, length_starting);
236 entry_p += length_starting;
237 i = data_offset + base_address;
238 end_offset = i+data_length-1;
240 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS &&
243 sz1 = 1+i - (data_offset + base_address);
246 sz2 = wrbuf_iconv_write(wr_tmp, mt->iconv_cd,
247 buf + data_offset+base_address, sz1);
248 wrbuf_rewind(wr_tmp);
252 wrbuf_printf(wr_dir, "%0*d", length_data_entry, sz2);
253 wrbuf_printf(wr_dir, "%0*d", length_starting, data_p);
256 wrbuf_putc(wr_dir, ISO2709_FS);
257 wrbuf_printf(wr_head, "%05d", data_p+1 + base_address);
258 wrbuf_write(wr_head, buf+5, 7);
259 wrbuf_printf(wr_head, "%05d", base_address);
260 wrbuf_write(wr_head, buf+17, 7);
262 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
263 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
264 wrbuf_free(wr_head, 1);
265 wrbuf_free(wr_dir, 1);
266 wrbuf_free(wr_tmp, 1);
268 /* third pass. create data output */
269 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
276 int identifier_flag = 0;
279 memcpy (tag, buf+entry_p, 3);
283 data_length = atoi_n (buf+entry_p, length_data_entry);
284 entry_p += length_data_entry;
285 data_offset = atoi_n (buf+entry_p, length_starting);
286 entry_p += length_starting;
287 i = data_offset + base_address;
288 end_offset = i+data_length-1;
292 wrbuf_printf(wr, "<!-- offset=%d data dlength=%d doffset=%d -->\n",
293 entry_p0, data_length, data_offset);
296 if (memcmp (tag, "00", 2))
297 identifier_flag = 1; /* if not 00X assume subfields */
298 else if (indicator_length < 4 && indicator_length > 0)
300 /* Danmarc 00X have subfields */
301 if (buf[i + indicator_length] == ISO2709_IDFS)
303 else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
309 wrbuf_printf(wr, "<!-- identifier_flag = %d -->\n",
317 wrbuf_puts (wr, "Tag: ");
318 wrbuf_puts (wr, tag);
319 wrbuf_puts (wr, " ");
321 case YAZ_MARC_SIMPLEXML:
322 wrbuf_printf (wr, "<field tag=\"");
323 marc_cdata(mt, tag, strlen(tag), wr);
324 wrbuf_printf(wr, "\"");
326 case YAZ_MARC_OAIMARC:
328 wrbuf_printf (wr, " <varfield id=\"");
330 wrbuf_printf (wr, " <fixfield id=\"");
331 marc_cdata(mt, tag, strlen(tag), wr);
332 wrbuf_printf(wr, "\"");
334 case YAZ_MARC_MARCXML:
336 wrbuf_printf (wr, " <datafield tag=\"");
338 wrbuf_printf (wr, " <controlfield tag=\"");
339 marc_cdata(mt, tag, strlen(tag), wr);
340 wrbuf_printf(wr, "\"");
345 i += identifier_flag-1;
346 for (j = 0; j<indicator_length; j++, i++)
350 case YAZ_MARC_ISO2709:
351 wrbuf_putc(wr, buf[i]);
355 wrbuf_puts (wr, " Ind: ");
356 wrbuf_putc(wr, buf[i]);
358 case YAZ_MARC_SIMPLEXML:
359 wrbuf_printf(wr, " Indicator%d=\"", j+1);
360 marc_cdata(mt, buf+i, 1, wr);
361 wrbuf_printf(wr, "\"");
363 case YAZ_MARC_OAIMARC:
364 wrbuf_printf(wr, " i%d=\"", j+1);
365 marc_cdata(mt, buf+i, 1, wr);
366 wrbuf_printf(wr, "\"");
368 case YAZ_MARC_MARCXML:
369 wrbuf_printf(wr, " ind%d=\"", j+1);
370 marc_cdata(mt, buf+i, 1, wr);
371 wrbuf_printf(wr, "\"");
375 if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_MARCXML
376 || mt->xml == YAZ_MARC_OAIMARC)
378 wrbuf_puts (wr, ">");
380 wrbuf_puts (wr, "\n");
382 if (mt->xml == YAZ_MARC_LINE)
385 wrbuf_puts (wr, " Fields: ");
389 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
395 case YAZ_MARC_ISO2709:
397 wrbuf_iconv_write(wr, mt->iconv_cd,
398 buf+i, identifier_length);
399 i += identifier_length;
402 wrbuf_puts (wr, mt->subfield_str);
403 marc_cdata(mt, buf+i, identifier_length-1, wr);
404 i = i+identifier_length-1;
405 wrbuf_putc (wr, ' ');
407 case YAZ_MARC_SIMPLEXML:
408 wrbuf_puts (wr, " <subfield code=\"");
409 marc_cdata(mt, buf+i, identifier_length-1, wr);
410 i = i+identifier_length-1;
411 wrbuf_puts (wr, "\">");
413 case YAZ_MARC_OAIMARC:
414 wrbuf_puts (wr, " <subfield label=\"");
415 marc_cdata(mt, buf+i, identifier_length-1, wr);
416 i = i+identifier_length-1;
417 wrbuf_puts (wr, "\">");
419 case YAZ_MARC_MARCXML:
420 wrbuf_puts (wr, " <subfield code=\"");
421 marc_cdata(mt, buf+i, identifier_length-1, wr);
422 i = i+identifier_length-1;
423 wrbuf_puts (wr, "\">");
427 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
428 buf[i] != ISO2709_FS && i < end_offset)
430 marc_cdata(mt, buf + i0, i - i0, wr);
432 if (mt->xml == YAZ_MARC_ISO2709 && buf[i] != ISO2709_IDFS)
433 marc_cdata(mt, buf + i, 1, wr);
435 if (mt->xml == YAZ_MARC_SIMPLEXML ||
436 mt->xml == YAZ_MARC_MARCXML ||
437 mt->xml == YAZ_MARC_OAIMARC)
438 wrbuf_puts (wr, "</subfield>\n");
444 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
446 marc_cdata(mt, buf + i0, i - i0, wr);
447 if (mt->xml == YAZ_MARC_ISO2709)
448 marc_cdata(mt, buf + i, 1, wr);
450 if (mt->xml == YAZ_MARC_LINE)
451 wrbuf_puts (wr, mt->endline_str);
453 wrbuf_printf(wr, " <!-- separator but not at end of field length=%d-->\n", data_length);
454 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
455 wrbuf_printf(wr, " <!-- no separator at end of field length=%d-->\n", data_length);
458 case YAZ_MARC_SIMPLEXML:
459 wrbuf_puts (wr, "</field>\n");
461 case YAZ_MARC_OAIMARC:
463 wrbuf_puts (wr, "</varfield>\n");
465 wrbuf_puts (wr, "</fixfield>\n");
467 case YAZ_MARC_MARCXML:
469 wrbuf_puts (wr, " </datafield>\n");
471 wrbuf_puts (wr, "</controlfield>\n");
480 case YAZ_MARC_SIMPLEXML:
481 wrbuf_puts (wr, "</iso2709>\n");
483 case YAZ_MARC_OAIMARC:
484 wrbuf_puts (wr, "</oai_marc>\n");
486 case YAZ_MARC_MARCXML:
487 wrbuf_puts (wr, "</record>\n");
489 case YAZ_MARC_ISO2709:
490 wrbuf_putc (wr, ISO2709_RS);
493 return record_length;
496 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
497 char **result, int *rsize)
499 int r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
503 *result = wrbuf_buf(mt->m_wr);
505 *rsize = wrbuf_len(mt->m_wr);
510 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
516 void yaz_marc_debug(yaz_marc_t mt, int level)
522 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
528 int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
530 yaz_marc_t mt = yaz_marc_create();
535 r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr);
536 yaz_marc_destroy(mt);
541 int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize)
543 return yaz_marc_decode(buf, wr, debug, bsize, 0);
547 int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
549 yaz_marc_t mt = yaz_marc_create();
553 r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr);
557 fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf);
558 yaz_marc_destroy(mt);
563 int marc_display_ex (const char *buf, FILE *outf, int debug)
565 return marc_display_exl (buf, outf, debug, -1);
569 int marc_display (const char *buf, FILE *outf)
571 return marc_display_ex (buf, outf, 0);