2 * Copyright (C) 1995-2006, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: marcdump.c,v 1.46 2006-12-18 10:33:52 adam Exp $
8 #define _FILE_OFFSET_BITS 64
15 #include <libxml/parser.h>
16 #include <libxml/tree.h>
18 #include <libxml/xpath.h>
19 #include <libxml/xpathInternals.h>
36 #include <yaz/marcdisp.h>
37 #include <yaz/yaz-util.h>
38 #include <yaz/xmalloc.h>
39 #include <yaz/options.h>
51 static void usage(const char *prog)
53 fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] "
54 "[-i format] [-o format] "
55 "[-n] [-l pos=value] [-v] [-C chunk] [-s splitfname] file...\n",
59 static int getbyte_stream(void *client_data)
61 FILE *f = (FILE*) client_data;
69 static void ungetbyte_stream(int c, void *client_data)
71 FILE *f = (FILE*) client_data;
78 static void marcdump_read_line(yaz_marc_t mt, const char *fname)
80 FILE *inf = fopen(fname, "rb");
83 fprintf (stderr, "%s: cannot open %s:%s\n",
84 prog, fname, strerror (errno));
88 while (yaz_marc_read_line(mt, getbyte_stream,
89 ungetbyte_stream, inf) == 0)
91 WRBUF wrbuf = wrbuf_alloc();
92 yaz_marc_write_mode(mt, wrbuf);
93 fputs(wrbuf_buf(wrbuf), stdout);
100 static void marcdump_read_xml(yaz_marc_t mt, const char *fname)
103 xmlDocPtr doc = xmlParseFile(fname);
107 ptr = xmlDocGetRootElement(doc);
111 WRBUF wrbuf = wrbuf_alloc();
112 r = yaz_marc_read_xml(mt, ptr);
114 fprintf(stderr, "yaz_marc_read_xml failed\n");
117 yaz_marc_write_mode(mt, wrbuf);
119 fputs(wrbuf_buf(wrbuf), stdout);
121 wrbuf_free(wrbuf, 1);
127 static void dump(const char *fname, const char *from, const char *to,
128 int input_format, int output_format,
129 int write_using_libxml2,
130 int print_offset, const char *split_fname, int split_chunk,
131 int verbose, FILE *cfile, const char *leader_spec)
133 yaz_marc_t mt = yaz_marc_create();
136 if (yaz_marc_leader_spec(mt, leader_spec))
138 fprintf(stderr, "bad leader spec: %s\n", leader_spec);
139 yaz_marc_destroy(mt);
144 cd = yaz_iconv_open(to, from);
147 fprintf(stderr, "conversion from %s to %s "
148 "unsupported\n", from, to);
149 yaz_marc_destroy(mt);
152 yaz_marc_iconv(mt, cd);
154 yaz_marc_xml(mt, output_format);
155 yaz_marc_write_using_libxml2(mt, write_using_libxml2);
156 yaz_marc_debug(mt, verbose);
158 if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_XCHANGE)
161 marcdump_read_xml(mt, fname);
164 else if (input_format == YAZ_MARC_LINE)
166 marcdump_read_line(mt, fname);
168 else if (input_format == YAZ_MARC_ISO2709)
170 FILE *inf = fopen(fname, "rb");
173 int split_file_no = -1;
176 fprintf (stderr, "%s: cannot open %s:%s\n",
177 prog, fname, strerror (errno));
181 fprintf (cfile, "char *marc_records[] = {\n");
191 r = fread (buf, 1, 5, inf);
194 if (r && print_offset && verbose)
195 printf ("<!-- Extra %ld bytes at end of file -->\n",
199 while (*buf < '0' || *buf > '9')
202 long off = ftell(inf) - 5;
203 if (verbose || print_offset)
204 printf("<!-- Skipping bad byte %d (0x%02X) at offset "
206 *buf & 0xff, *buf & 0xff,
208 for (i = 0; i<4; i++)
210 r = fread(buf+4, 1, 1, inf);
216 if (verbose || print_offset)
217 printf ("<!-- End of file with data -->\n");
222 long off = ftell(inf) - 5;
223 printf ("<!-- Record %d offset %ld (0x%lx) -->\n",
226 len = atoi_n(buf, 5);
227 if (len < 25 || len > 100000)
229 long off = ftell(inf) - 5;
230 printf("Bad Length %ld read at offset %ld (%lx)\n",
231 (long)len, (long) off, (long) off);
235 r = fread (buf + 5, 1, rlen, inf);
241 const char *mode = 0;
243 if ((marc_no % split_chunk) == 0)
250 sprintf(fname, "%.200s%07d", split_fname, split_file_no);
251 sf = fopen(fname, mode);
254 fprintf(stderr, "Could not open %s\n", fname);
259 if (fwrite(buf, 1, len, sf) != len)
261 fprintf(stderr, "Could write content to %s\n",
268 len_result = (int) rlen;
269 r = yaz_marc_decode_buf(mt, buf, -1, &result, &len_result);
272 fwrite (result, len_result, 1, stdout);
279 fprintf (cfile, ",");
280 fprintf (cfile, "\n");
281 for (i = 0; i < r; i++)
284 fprintf (cfile, " \"");
285 fprintf (cfile, "\\x%02X", p[i] & 255);
287 if (i < r - 1 && (i & 15) == 15)
288 fprintf (cfile, "\"\n");
291 fprintf (cfile, "\"\n");
298 fprintf (cfile, "};\n");
303 yaz_marc_destroy(mt);
306 int main (int argc, char **argv)
309 int print_offset = 0;
313 int output_format = YAZ_MARC_LINE;
315 char *from = 0, *to = 0;
316 int input_format = YAZ_MARC_ISO2709;
318 const char *split_fname = 0;
319 const char *leader_spec = 0;
320 int write_using_libxml2 = 0;
323 setlocale(LC_CTYPE, "");
327 to = nl_langinfo(CODESET);
332 while ((r = options("i:o:C:npvc:xOeXIf:t:s:l:", argv, argc, &arg)) != -2)
338 input_format = yaz_marc_decode_formatstr(arg);
339 if (input_format == -1)
341 fprintf(stderr, "%s: bad input format: %s\n", prog, arg);
346 /* dirty hack so we can make Libxml2 do the writing ..
348 if (strlen(arg) > 4 && strncmp(arg, "xml,", 4) == 0)
351 write_using_libxml2 = 1;
353 output_format = yaz_marc_decode_formatstr(arg);
354 if (output_format == -1)
356 fprintf(stderr, "%s: bad output format: %s\n", prog, arg);
372 cfile = fopen(arg, "w");
375 fprintf(stderr, "%s: -x no longer supported. "
376 "Use -i marcxml instead\n", prog);
380 fprintf(stderr, "%s: OAI MARC no longer supported."
381 " Use MARCXML instead.\n", prog);
385 fprintf(stderr, "%s: -e no longer supported. "
386 "Use -o marcxchange instead\n", prog);
390 fprintf(stderr, "%s: -X no longer supported. "
391 "Use -o marcxml instead\n", prog);
395 fprintf(stderr, "%s: -I no longer supported. "
396 "Use -o marc instead\n", prog);
400 output_format = YAZ_MARC_CHECK;
409 split_chunk = atoi(arg);
412 dump(arg, from, to, input_format, output_format,
414 print_offset, split_fname, split_chunk,
415 verbose, cfile, leader_spec);
437 * indent-tabs-mode: nil
439 * vim: shiftwidth=4 tabstop=8 expandtab