1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) Index Data
3 * See the file LICENSE for details.
7 * \brief MARC-8 encoding
10 * http://www.loc.gov/marc/specifications/speccharmarc8.html
21 #include <yaz/xmalloc.h>
22 #include <yaz/snprintf.h>
25 yaz_conv_func_t yaz_marc8r_42_conv;
26 yaz_conv_func_t yaz_marc8r_45_conv;
27 yaz_conv_func_t yaz_marc8r_67_conv;
28 yaz_conv_func_t yaz_marc8r_62_conv;
29 yaz_conv_func_t yaz_marc8r_70_conv;
30 yaz_conv_func_t yaz_marc8r_32_conv;
31 yaz_conv_func_t yaz_marc8r_4E_conv;
32 yaz_conv_func_t yaz_marc8r_51_conv;
33 yaz_conv_func_t yaz_marc8r_33_conv;
34 yaz_conv_func_t yaz_marc8r_34_conv;
35 yaz_conv_func_t yaz_marc8r_53_conv;
36 yaz_conv_func_t yaz_marc8r_31_conv;
42 unsigned write_marc8_second_half_char;
43 unsigned long write_marc8_last;
45 const char *write_marc8_lpage;
46 const char *write_marc8_g0;
47 const char *write_marc8_g1;
50 static void init_marc8(yaz_iconv_encoder_t w)
52 struct encoder_data *data = (struct encoder_data *) w->data;
53 data->write_marc8_second_half_char = 0;
54 data->write_marc8_last = 0;
55 data->write_marc8_ncr = 0;
56 data->write_marc8_lpage = 0;
57 data->write_marc8_g0 = ESC "(B";
58 data->write_marc8_g1 = 0;
61 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
62 struct encoder_data *w,
63 char **outbuf, size_t *outbytesleft,
64 const char *page_chr);
66 static unsigned long lookup_marc8(yaz_iconv_t cd,
67 unsigned long x, int *comb,
68 const char **page_chr)
71 char *utf8_outbuf = utf8_buf;
72 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
75 r = yaz_write_UTF8_char(x, &utf8_outbuf, &utf8_outbytesleft, &error_code);
76 if (r == (size_t)(-1))
78 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
84 size_t inbytesleft, no_read_sub = 0;
88 inp = (unsigned char *) utf8_buf;
89 inbytesleft = strlen(utf8_buf);
91 x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
97 x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
100 *page_chr = ESC "(B";
103 x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
109 x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
115 x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
118 *page_chr = ESC "(2";
121 x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
124 *page_chr = ESC "(N";
127 x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
130 *page_chr = ESC "(Q";
133 x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
136 *page_chr = ESC "(3";
139 x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
142 *page_chr = ESC "(4";
145 x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
148 *page_chr = ESC "(S";
151 x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
154 *page_chr = ESC "$1";
157 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
162 static size_t flush_combos(yaz_iconv_t cd,
163 struct encoder_data *w,
164 char **outbuf, size_t *outbytesleft)
166 unsigned long y = w->write_marc8_last;
171 assert(w->write_marc8_lpage);
172 if (w->write_marc8_lpage)
174 size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
175 w->write_marc8_lpage);
180 if (9 >= *outbytesleft)
182 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
183 return (size_t) (-1);
185 if (w->write_marc8_ncr)
187 yaz_snprintf(*outbuf, 9, "&#x%04x;", y);
188 (*outbytesleft) -= 8;
196 byte = (unsigned char )((y>>16) & 0xff);
198 (*outbuf)[out_no++] = byte;
199 byte = (unsigned char)((y>>8) & 0xff);
201 (*outbuf)[out_no++] = byte;
202 byte = (unsigned char )(y & 0xff);
204 (*outbuf)[out_no++] = byte;
206 (*outbytesleft) -= out_no;
209 if (w->write_marc8_second_half_char)
211 *(*outbuf)++ = w->write_marc8_second_half_char;
215 w->write_marc8_last = 0;
216 w->write_marc8_ncr = 0;
217 w->write_marc8_lpage = 0;
218 w->write_marc8_second_half_char = 0;
222 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
223 struct encoder_data *w,
224 char **outbuf, size_t *outbytesleft,
225 const char *page_chr)
227 const char **old_page_chr = &w->write_marc8_g0;
229 /* are we going to a G1-set (such as such as ESC ")!E") */
230 if (page_chr && page_chr[1] == ')')
231 old_page_chr = &w->write_marc8_g1;
233 if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
236 const char *page_out = page_chr;
238 if (*outbytesleft < 8)
240 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
242 return (size_t) (-1);
247 if (!strcmp(*old_page_chr, ESC "p")
248 || !strcmp(*old_page_chr, ESC "g")
249 || !strcmp(*old_page_chr, ESC "b"))
252 /* Technique 1 leave */
253 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
255 /* Must leave script + enter new page */
256 plen = strlen(page_out);
257 memcpy(*outbuf, page_out, plen);
259 (*outbytesleft) -= plen;
264 *old_page_chr = page_chr;
265 plen = strlen(page_out);
266 memcpy(*outbuf, page_out, plen);
268 (*outbytesleft) -= plen;
274 static size_t yaz_write_marc8_2(yaz_iconv_t cd, struct encoder_data *w,
276 char **outbuf, size_t *outbytesleft,
281 const char *page_chr = 0;
282 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
290 return (size_t) (-1);
299 if (x < 32 && x != 27)
302 return (size_t) (-1);
310 size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
316 w->write_marc8_second_half_char = 0xEC;
317 else if (x == 0x0360)
318 w->write_marc8_second_half_char = 0xFB;
320 if (*outbytesleft <= 1)
322 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
323 return (size_t) (-1);
330 size_t r = flush_combos(cd, w, outbuf, outbytesleft);
334 w->write_marc8_last = y;
335 w->write_marc8_lpage = page_chr;
336 w->write_marc8_ncr = enable_ncr;
341 static size_t flush_marc8(yaz_iconv_t cd, yaz_iconv_encoder_t en,
342 char **outbuf, size_t *outbytesleft)
344 struct encoder_data *w = (struct encoder_data *) en->data;
345 size_t r = flush_combos(cd, w, outbuf, outbytesleft);
348 w->write_marc8_g1 = 0;
349 return yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft, ESC "(B");
352 static size_t yaz_write_marc8_generic(yaz_iconv_t cd, struct encoder_data *w,
354 char **outbuf, size_t *outbytesleft,
357 unsigned long x1, x2;
358 if (yaz_iso_8859_1_lookup_y(x, &x1, &x2))
360 /* save the output pointers .. */
361 char *outbuf0 = *outbuf;
362 size_t outbytesleft0 = *outbytesleft;
363 int last_ch = w->write_marc8_last;
364 int ncr = w->write_marc8_ncr;
365 const char *lpage = w->write_marc8_lpage;
368 r = yaz_write_marc8_2(cd, w, x1,
369 outbuf, outbytesleft, loss_mode);
372 r = yaz_write_marc8_2(cd, w, x2,
373 outbuf, outbytesleft, loss_mode);
374 if (r && yaz_iconv_error(cd) == YAZ_ICONV_E2BIG)
376 /* not enough room. reset output to original values */
378 *outbytesleft = outbytesleft0;
379 w->write_marc8_last = last_ch;
380 w->write_marc8_ncr = ncr;
381 w->write_marc8_lpage = lpage;
385 return yaz_write_marc8_2(cd, w, x, outbuf, outbytesleft, loss_mode);
388 static size_t write_marc8_normal(yaz_iconv_t cd, yaz_iconv_encoder_t e,
390 char **outbuf, size_t *outbytesleft)
392 return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
393 x, outbuf, outbytesleft, 0);
396 static size_t write_marc8_lossy(yaz_iconv_t cd, yaz_iconv_encoder_t e,
398 char **outbuf, size_t *outbytesleft)
400 return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
401 x, outbuf, outbytesleft, 1);
404 static size_t write_marc8_lossless(yaz_iconv_t cd, yaz_iconv_encoder_t e,
406 char **outbuf, size_t *outbytesleft)
408 return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
409 x, outbuf, outbytesleft, 2);
412 static size_t write_marc8_control(yaz_iconv_t cd, yaz_iconv_encoder_t e,
414 char **outbuf, size_t *outbytesleft)
416 return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
417 x, outbuf, outbytesleft, 3);
420 static void destroy_marc8(yaz_iconv_encoder_t e)
425 yaz_iconv_encoder_t yaz_marc8_encoder(const char *tocode,
426 yaz_iconv_encoder_t e)
429 if (!yaz_matchstr(tocode, "MARC8"))
430 e->write_handle = write_marc8_normal;
431 else if (!yaz_matchstr(tocode, "MARC8s"))
432 e->write_handle = write_marc8_normal;
433 else if (!yaz_matchstr(tocode, "MARC8lossy"))
434 e->write_handle = write_marc8_lossy;
435 else if (!yaz_matchstr(tocode, "MARC8lossless"))
436 e->write_handle = write_marc8_lossless;
437 else if (!yaz_matchstr(tocode, "MARC8c"))
438 e->write_handle = write_marc8_control;
443 struct encoder_data *data = (struct encoder_data *)
444 xmalloc(sizeof(*data));
446 e->destroy_handle = destroy_marc8;
447 e->flush_handle = flush_marc8;
448 e->init_handle = init_marc8;
457 * c-file-style: "Stroustrup"
458 * indent-tabs-mode: nil
460 * vim: shiftwidth=4 tabstop=8 expandtab