2 * Copyright (C) 1995-2005, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: marcdisp.c,v 1.12 2005-01-15 19:47:14 adam Exp $
10 * \brief Implements MARC display - and conversion utilities
20 #include <yaz/marcdisp.h>
21 #include <yaz/wrbuf.h>
22 #include <yaz/yaz-util.h>
31 yaz_marc_t yaz_marc_create(void)
33 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
34 mt->xml = YAZ_MARC_LINE;
36 mt->m_wr = wrbuf_alloc();
41 void yaz_marc_destroy(yaz_marc_t mt)
45 wrbuf_free (mt->m_wr, 1);
49 static void marc_cdata (yaz_marc_t mt, const char *buf, size_t len, WRBUF wr)
51 if (mt->xml == YAZ_MARC_ISO2709)
52 wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
53 else if (mt->xml == YAZ_MARC_LINE)
54 wrbuf_iconv_write(wr, mt->iconv_cd, buf, len);
56 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, buf, len);
59 int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
64 int identifier_length;
66 int length_data_entry;
68 int length_implementation;
72 record_length = atoi_n (buf, 5);
73 if (record_length < 25)
79 sprintf (str, "Record length %d - aborting\n", record_length);
84 /* ballout if bsize is known and record_length is less than that */
85 if (bsize != -1 && record_length > bsize)
87 if (isdigit(((const unsigned char *) buf)[10]))
88 indicator_length = atoi_n (buf+10, 1);
91 if (isdigit(((const unsigned char *) buf)[11]))
92 identifier_length = atoi_n (buf+11, 1);
94 identifier_length = 2;
95 base_address = atoi_n (buf+12, 5);
97 length_data_entry = atoi_n (buf+20, 1);
98 if (buf[20] <= '0' || buf[20] >= '9')
100 wrbuf_printf(wr, "<!-- Length data entry should hold a digit. Assuming 4 -->\n");
101 length_data_entry = 4;
103 length_starting = atoi_n (buf+21, 1);
104 if (buf[21] <= '0' || buf[21] >= '9')
106 wrbuf_printf(wr, "<!-- Length starting should hold a digit. Assuming 5 -->\n");
109 length_implementation = atoi_n (buf+22, 1);
111 if (mt->xml != YAZ_MARC_LINE)
117 case YAZ_MARC_ISO2709:
119 case YAZ_MARC_SIMPLEXML:
120 wrbuf_puts (wr, "<iso2709\n");
121 sprintf (str, " RecordStatus=\"%c\"\n", buf[5]);
122 wrbuf_puts (wr, str);
123 sprintf (str, " TypeOfRecord=\"%c\"\n", buf[6]);
124 wrbuf_puts (wr, str);
125 for (i = 1; i<=19; i++)
127 sprintf (str, " ImplDefined%d=\"%c\"\n", i, buf[6+i]);
128 wrbuf_puts (wr, str);
130 wrbuf_puts (wr, ">\n");
132 case YAZ_MARC_OAIMARC:
135 "<oai_marc xmlns=\"http://www.openarchives.org/OIA/oai_marc\""
137 " xmlns:xsi=\"http://www.w3.org/2000/10/XMLSchema-instance\""
139 " xsi:schemaLocation=\"http://www.openarchives.org/OAI/oai_marc.xsd\""
143 sprintf (str, " status=\"%c\" type=\"%c\" catForm=\"%c\">\n",
144 buf[5], buf[6], buf[7]);
145 wrbuf_puts (wr, str);
147 case YAZ_MARC_MARCXML:
150 "<record xmlns=\"http://www.loc.gov/MARC21/slim\">\n"
153 marc_cdata(mt, buf, 9, wr);
154 marc_cdata(mt, "a", 1, wr); /* set leader to signal unicode */
155 marc_cdata(mt, buf+10, 14, wr);
157 marc_cdata(mt, buf, 24, wr); /* leave header as is .. */
159 wrbuf_printf(wr, "</leader>\n");
168 wrbuf_puts (wr, "<!--\n");
169 sprintf (str, "Record length %5d\n", record_length);
170 wrbuf_puts (wr, str);
171 sprintf (str, "Indicator length %5d\n", indicator_length);
172 wrbuf_puts (wr, str);
173 sprintf (str, "Identifier length %5d\n", identifier_length);
174 wrbuf_puts (wr, str);
175 sprintf (str, "Base address %5d\n", base_address);
176 wrbuf_puts (wr, str);
177 sprintf (str, "Length data entry %5d\n", length_data_entry);
178 wrbuf_puts (wr, str);
179 sprintf (str, "Length starting %5d\n", length_starting);
180 wrbuf_puts (wr, str);
181 sprintf (str, "Length implementation %5d\n", length_implementation);
182 wrbuf_puts (wr, str);
184 wrbuf_puts (wr, "-->\n");
187 /* first pass. determine length of directory & base of data */
188 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
190 entry_p += 3+length_data_entry+length_starting;
191 if (entry_p >= record_length)
194 if (mt->debug && base_address != entry_p+1)
196 wrbuf_printf (wr," <!-- base address not at end of directory "
197 "base=%d end=%d -->\n", base_address, entry_p+1);
199 base_address = entry_p+1;
201 if (mt->xml == YAZ_MARC_ISO2709)
203 WRBUF wr_head = wrbuf_alloc();
204 WRBUF wr_dir = wrbuf_alloc();
205 WRBUF wr_tmp = wrbuf_alloc();
208 /* second pass. create directory for ISO2709 output */
209 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
211 int data_length, data_offset, end_offset;
214 wrbuf_write(wr_dir, buf+entry_p, 3);
217 data_length = atoi_n (buf+entry_p, length_data_entry);
218 entry_p += length_data_entry;
219 data_offset = atoi_n (buf+entry_p, length_starting);
220 entry_p += length_starting;
221 i = data_offset + base_address;
222 end_offset = i+data_length-1;
224 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS &&
227 sz1 = 1+i - (data_offset + base_address);
230 sz2 = wrbuf_iconv_write(wr_tmp, mt->iconv_cd,
231 buf + data_offset+base_address, sz1);
232 wrbuf_rewind(wr_tmp);
236 wrbuf_printf(wr_dir, "%0*d", length_data_entry, sz2);
237 wrbuf_printf(wr_dir, "%0*d", length_starting, data_p);
240 wrbuf_putc(wr_dir, ISO2709_FS);
241 wrbuf_printf(wr_head, "%05d", data_p+1 + base_address);
242 wrbuf_write(wr_head, buf+5, 7);
243 wrbuf_printf(wr_head, "%05d", base_address);
244 wrbuf_write(wr_head, buf+17, 7);
246 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
247 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
248 wrbuf_free(wr_head, 1);
249 wrbuf_free(wr_dir, 1);
250 wrbuf_free(wr_tmp, 1);
252 /* third pass. create data output */
253 for (entry_p = 24; buf[entry_p] != ISO2709_FS; )
260 int identifier_flag = 0;
263 memcpy (tag, buf+entry_p, 3);
267 data_length = atoi_n (buf+entry_p, length_data_entry);
268 entry_p += length_data_entry;
269 data_offset = atoi_n (buf+entry_p, length_starting);
270 entry_p += length_starting;
271 i = data_offset + base_address;
272 end_offset = i+data_length-1;
276 wrbuf_printf(wr, "<!-- offset=%d data dlength=%d doffset=%d -->\n",
277 entry_p0, data_length, data_offset);
280 if (indicator_length < 4 && indicator_length > 0)
282 if (buf[i + indicator_length] == ISO2709_IDFS)
284 else if (buf[i + indicator_length + 1] == ISO2709_IDFS)
287 else if (memcmp (tag, "00", 2))
294 wrbuf_puts (wr, "Tag: ");
295 wrbuf_puts (wr, tag);
296 wrbuf_puts (wr, " ");
298 case YAZ_MARC_SIMPLEXML:
299 wrbuf_printf (wr, "<field tag=\"");
300 marc_cdata(mt, tag, strlen(tag), wr);
301 wrbuf_printf(wr, "\"");
303 case YAZ_MARC_OAIMARC:
305 wrbuf_printf (wr, " <varfield id=\"");
307 wrbuf_printf (wr, " <fixfield id=\"");
308 marc_cdata(mt, tag, strlen(tag), wr);
309 wrbuf_printf(wr, "\"");
311 case YAZ_MARC_MARCXML:
313 wrbuf_printf (wr, " <datafield tag=\"");
315 wrbuf_printf (wr, " <controlfield tag=\"");
316 marc_cdata(mt, tag, strlen(tag), wr);
317 wrbuf_printf(wr, "\"");
322 i += identifier_flag-1;
323 for (j = 0; j<indicator_length; j++, i++)
327 case YAZ_MARC_ISO2709:
328 wrbuf_putc(wr, buf[i]);
332 wrbuf_puts (wr, " Ind: ");
333 wrbuf_putc(wr, buf[i]);
335 case YAZ_MARC_SIMPLEXML:
336 wrbuf_printf(wr, " Indicator%d=\"", j+1);
337 marc_cdata(mt, buf+i, 1, wr);
338 wrbuf_printf(wr, "\"");
340 case YAZ_MARC_OAIMARC:
341 wrbuf_printf(wr, " i%d=\"", j+1);
342 marc_cdata(mt, buf+i, 1, wr);
343 wrbuf_printf(wr, "\"");
345 case YAZ_MARC_MARCXML:
346 wrbuf_printf(wr, " ind%d=\"", j+1);
347 marc_cdata(mt, buf+i, 1, wr);
348 wrbuf_printf(wr, "\"");
352 if (mt->xml == YAZ_MARC_SIMPLEXML || mt->xml == YAZ_MARC_MARCXML
353 || mt->xml == YAZ_MARC_OAIMARC)
355 wrbuf_puts (wr, ">");
357 wrbuf_puts (wr, "\n");
359 if (mt->xml == YAZ_MARC_LINE)
362 wrbuf_puts (wr, " Fields: ");
366 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
372 case YAZ_MARC_ISO2709:
374 wrbuf_iconv_write(wr, mt->iconv_cd,
375 buf+i, identifier_length);
376 i += identifier_length;
379 wrbuf_puts (wr, " $");
380 marc_cdata(mt, buf+i, identifier_length-1, wr);
381 i = i+identifier_length-1;
382 wrbuf_putc (wr, ' ');
384 case YAZ_MARC_SIMPLEXML:
385 wrbuf_puts (wr, " <subfield code=\"");
386 marc_cdata(mt, buf+i, identifier_length-1, wr);
387 i = i+identifier_length-1;
388 wrbuf_puts (wr, "\">");
390 case YAZ_MARC_OAIMARC:
391 wrbuf_puts (wr, " <subfield label=\"");
392 marc_cdata(mt, buf+i, identifier_length-1, wr);
393 i = i+identifier_length-1;
394 wrbuf_puts (wr, "\">");
396 case YAZ_MARC_MARCXML:
397 wrbuf_puts (wr, " <subfield code=\"");
398 marc_cdata(mt, buf+i, identifier_length-1, wr);
399 i = i+identifier_length-1;
400 wrbuf_puts (wr, "\">");
404 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS &&
405 buf[i] != ISO2709_FS && i < end_offset)
407 marc_cdata(mt, buf + i0, i - i0, wr);
409 if (mt->xml == YAZ_MARC_ISO2709 && buf[i] != ISO2709_IDFS)
410 marc_cdata(mt, buf + i, 1, wr);
412 if (mt->xml == YAZ_MARC_SIMPLEXML ||
413 mt->xml == YAZ_MARC_MARCXML ||
414 mt->xml == YAZ_MARC_OAIMARC)
415 wrbuf_puts (wr, "</subfield>\n");
421 while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset)
423 marc_cdata(mt, buf + i0, i - i0, wr);
424 if (mt->xml == YAZ_MARC_ISO2709)
425 marc_cdata(mt, buf + i, 1, wr);
427 if (mt->xml == YAZ_MARC_LINE)
428 wrbuf_putc (wr, '\n');
430 wrbuf_printf(wr, " <!-- separator but not at end of field length=%d-->\n", data_length);
431 if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS)
432 wrbuf_printf(wr, " <!-- no separator at end of field length=%d-->\n", data_length);
435 case YAZ_MARC_SIMPLEXML:
436 wrbuf_puts (wr, "</field>\n");
438 case YAZ_MARC_OAIMARC:
440 wrbuf_puts (wr, " </varfield>\n");
442 wrbuf_puts (wr, " </fixfield>\n");
444 case YAZ_MARC_MARCXML:
446 wrbuf_puts (wr, " </datafield>\n");
448 wrbuf_puts (wr, " </controlfield>\n");
457 case YAZ_MARC_SIMPLEXML:
458 wrbuf_puts (wr, "</iso2709>\n");
460 case YAZ_MARC_OAIMARC:
461 wrbuf_puts (wr, "</oai_marc>\n");
463 case YAZ_MARC_MARCXML:
464 wrbuf_puts (wr, "</record>\n");
466 case YAZ_MARC_ISO2709:
467 wrbuf_putc (wr, ISO2709_RS);
470 return record_length;
473 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
474 char **result, int *rsize)
476 int r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
480 *result = wrbuf_buf(mt->m_wr);
482 *rsize = wrbuf_len(mt->m_wr);
487 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
493 void yaz_marc_debug(yaz_marc_t mt, int level)
499 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
505 int yaz_marc_decode(const char *buf, WRBUF wr, int debug, int bsize, int xml)
507 yaz_marc_t mt = yaz_marc_create();
512 r = yaz_marc_decode_wrbuf(mt, buf, bsize, wr);
513 yaz_marc_destroy(mt);
518 int marc_display_wrbuf (const char *buf, WRBUF wr, int debug, int bsize)
520 return yaz_marc_decode(buf, wr, debug, bsize, 0);
524 int marc_display_exl (const char *buf, FILE *outf, int debug, int bsize)
526 yaz_marc_t mt = yaz_marc_create();
530 r = yaz_marc_decode_wrbuf (mt, buf, bsize, mt->m_wr);
534 fwrite (wrbuf_buf(mt->m_wr), 1, wrbuf_len(mt->m_wr), outf);
535 yaz_marc_destroy(mt);
540 int marc_display_ex (const char *buf, FILE *outf, int debug)
542 return marc_display_exl (buf, outf, debug, -1);
546 int marc_display (const char *buf, FILE *outf)
548 return marc_display_ex (buf, outf, 0);