1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2012 Index Data
3 * See the file LICENSE for details.
7 * \brief MARC-8 encoding
10 * http://www.loc.gov/marc/specifications/speccharmarc8.html
21 #include <yaz/xmalloc.h>
22 #include <yaz/snprintf.h>
25 yaz_conv_func_t yaz_marc8r_42_conv;
26 yaz_conv_func_t yaz_marc8r_45_conv;
27 yaz_conv_func_t yaz_marc8r_67_conv;
28 yaz_conv_func_t yaz_marc8r_62_conv;
29 yaz_conv_func_t yaz_marc8r_70_conv;
30 yaz_conv_func_t yaz_marc8r_32_conv;
31 yaz_conv_func_t yaz_marc8r_4E_conv;
32 yaz_conv_func_t yaz_marc8r_51_conv;
33 yaz_conv_func_t yaz_marc8r_33_conv;
34 yaz_conv_func_t yaz_marc8r_34_conv;
35 yaz_conv_func_t yaz_marc8r_53_conv;
36 yaz_conv_func_t yaz_marc8r_31_conv;
42 unsigned write_marc8_second_half_char;
43 unsigned long write_marc8_last;
45 const char *write_marc8_lpage;
46 const char *write_marc8_g0;
47 const char *write_marc8_g1;
50 static void init_marc8(yaz_iconv_encoder_t w)
52 struct encoder_data *data = (struct encoder_data *) w->data;
53 data->write_marc8_second_half_char = 0;
54 data->write_marc8_last = 0;
55 data->write_marc8_ncr = 0;
56 data->write_marc8_lpage = 0;
57 data->write_marc8_g0 = ESC "(B";
58 data->write_marc8_g1 = 0;
61 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
62 struct encoder_data *w,
63 char **outbuf, size_t *outbytesleft,
64 const char *page_chr);
66 static unsigned long lookup_marc8(yaz_iconv_t cd,
67 unsigned long x, int *comb,
68 const char **page_chr)
71 char *utf8_outbuf = utf8_buf;
72 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
75 r = yaz_write_UTF8_char(x, &utf8_outbuf, &utf8_outbytesleft, &error_code);
76 if (r == (size_t)(-1))
78 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
84 size_t inbytesleft, no_read_sub = 0;
88 inp = (unsigned char *) utf8_buf;
89 inbytesleft = strlen(utf8_buf);
91 x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
97 x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
100 *page_chr = ESC "(B";
103 x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
109 x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
115 x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
118 *page_chr = ESC "(2";
121 x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
124 *page_chr = ESC "(N";
127 x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
130 *page_chr = ESC "(Q";
133 x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
136 *page_chr = ESC "(3";
139 x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
142 *page_chr = ESC "(4";
145 x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
148 *page_chr = ESC "(S";
151 x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
154 *page_chr = ESC "$1";
157 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
162 static size_t flush_combos(yaz_iconv_t cd,
163 struct encoder_data *w,
164 char **outbuf, size_t *outbytesleft)
166 unsigned long y = w->write_marc8_last;
171 assert(w->write_marc8_lpage);
172 if (w->write_marc8_lpage)
174 size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
175 w->write_marc8_lpage);
180 if (9 >= *outbytesleft)
182 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
183 return (size_t) (-1);
185 if (w->write_marc8_ncr)
187 yaz_snprintf(*outbuf, 9, "&#x%04x;", y);
188 (*outbytesleft) -= 8;
196 byte = (unsigned char )((y>>16) & 0xff);
198 (*outbuf)[out_no++] = byte;
199 byte = (unsigned char)((y>>8) & 0xff);
201 (*outbuf)[out_no++] = byte;
202 byte = (unsigned char )(y & 0xff);
204 (*outbuf)[out_no++] = byte;
206 (*outbytesleft) -= out_no;
209 if (w->write_marc8_second_half_char)
211 *(*outbuf)++ = w->write_marc8_second_half_char;
215 w->write_marc8_last = 0;
216 w->write_marc8_ncr = 0;
217 w->write_marc8_lpage = 0;
218 w->write_marc8_second_half_char = 0;
222 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
223 struct encoder_data *w,
224 char **outbuf, size_t *outbytesleft,
225 const char *page_chr)
227 const char **old_page_chr = &w->write_marc8_g0;
229 /* are we going to a G1-set (such as such as ESC ")!E") */
230 if (page_chr && page_chr[1] == ')')
231 old_page_chr = &w->write_marc8_g1;
233 if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
236 const char *page_out = page_chr;
238 if (*outbytesleft < 8)
240 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
242 return (size_t) (-1);
247 if (!strcmp(*old_page_chr, ESC "p")
248 || !strcmp(*old_page_chr, ESC "g")
249 || !strcmp(*old_page_chr, ESC "b"))
252 /* Technique 1 leave */
253 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
255 /* Must leave script + enter new page */
256 plen = strlen(page_out);
257 memcpy(*outbuf, page_out, plen);
259 (*outbytesleft) -= plen;
264 *old_page_chr = page_chr;
265 plen = strlen(page_out);
266 memcpy(*outbuf, page_out, plen);
268 (*outbytesleft) -= plen;
274 static size_t yaz_write_marc8_2(yaz_iconv_t cd, struct encoder_data *w,
276 char **outbuf, size_t *outbytesleft,
281 const char *page_chr = 0;
282 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
287 return (size_t) (-1);
302 size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
308 w->write_marc8_second_half_char = 0xEC;
309 else if (x == 0x0360)
310 w->write_marc8_second_half_char = 0xFB;
312 if (*outbytesleft <= 1)
314 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
315 return (size_t) (-1);
322 size_t r = flush_combos(cd, w, outbuf, outbytesleft);
326 w->write_marc8_last = y;
327 w->write_marc8_lpage = page_chr;
328 w->write_marc8_ncr = enable_ncr;
333 static size_t flush_marc8(yaz_iconv_t cd, yaz_iconv_encoder_t en,
334 char **outbuf, size_t *outbytesleft)
336 struct encoder_data *w = (struct encoder_data *) en->data;
337 size_t r = flush_combos(cd, w, outbuf, outbytesleft);
340 w->write_marc8_g1 = 0;
341 return yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft, ESC "(B");
344 static size_t yaz_write_marc8_generic(yaz_iconv_t cd, struct encoder_data *w,
346 char **outbuf, size_t *outbytesleft,
349 unsigned long x1, x2;
350 if (yaz_iso_8859_1_lookup_y(x, &x1, &x2))
352 /* save the output pointers .. */
353 char *outbuf0 = *outbuf;
354 size_t outbytesleft0 = *outbytesleft;
355 int last_ch = w->write_marc8_last;
356 int ncr = w->write_marc8_ncr;
357 const char *lpage = w->write_marc8_lpage;
360 r = yaz_write_marc8_2(cd, w, x1,
361 outbuf, outbytesleft, loss_mode);
364 r = yaz_write_marc8_2(cd, w, x2,
365 outbuf, outbytesleft, loss_mode);
366 if (r && yaz_iconv_error(cd) == YAZ_ICONV_E2BIG)
368 /* not enough room. reset output to original values */
370 *outbytesleft = outbytesleft0;
371 w->write_marc8_last = last_ch;
372 w->write_marc8_ncr = ncr;
373 w->write_marc8_lpage = lpage;
377 return yaz_write_marc8_2(cd, w, x, outbuf, outbytesleft, loss_mode);
380 static size_t write_marc8_normal(yaz_iconv_t cd, yaz_iconv_encoder_t e,
382 char **outbuf, size_t *outbytesleft)
384 return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
385 x, outbuf, outbytesleft, 0);
388 static size_t write_marc8_lossy(yaz_iconv_t cd, yaz_iconv_encoder_t e,
390 char **outbuf, size_t *outbytesleft)
392 return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
393 x, outbuf, outbytesleft, 1);
396 static size_t write_marc8_lossless(yaz_iconv_t cd, yaz_iconv_encoder_t e,
398 char **outbuf, size_t *outbytesleft)
400 return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
401 x, outbuf, outbytesleft, 2);
404 static void destroy_marc8(yaz_iconv_encoder_t e)
409 yaz_iconv_encoder_t yaz_marc8_encoder(const char *tocode,
410 yaz_iconv_encoder_t e)
413 if (!yaz_matchstr(tocode, "MARC8"))
414 e->write_handle = write_marc8_normal;
415 else if (!yaz_matchstr(tocode, "MARC8s"))
416 e->write_handle = write_marc8_normal;
417 else if (!yaz_matchstr(tocode, "MARC8lossy"))
418 e->write_handle = write_marc8_lossy;
419 else if (!yaz_matchstr(tocode, "MARC8lossless"))
420 e->write_handle = write_marc8_lossless;
425 struct encoder_data *data = (struct encoder_data *)
426 xmalloc(sizeof(*data));
428 e->destroy_handle = destroy_marc8;
429 e->flush_handle = flush_marc8;
430 e->init_handle = init_marc8;
439 * c-file-style: "Stroustrup"
440 * indent-tabs-mode: nil
442 * vim: shiftwidth=4 tabstop=8 expandtab