1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2013 Index Data
3 * See the file LICENSE for details.
7 * \brief MARC-8 decoding
10 * http://www.loc.gov/marc/specifications/speccharmarc8.html
21 #include <yaz/xmalloc.h>
30 unsigned long comb_x[8];
31 size_t comb_no_read[8];
34 yaz_conv_func_t yaz_marc8_42_conv;
35 yaz_conv_func_t yaz_marc8_45_conv;
36 yaz_conv_func_t yaz_marc8_67_conv;
37 yaz_conv_func_t yaz_marc8_62_conv;
38 yaz_conv_func_t yaz_marc8_70_conv;
39 yaz_conv_func_t yaz_marc8_32_conv;
40 yaz_conv_func_t yaz_marc8_4E_conv;
41 yaz_conv_func_t yaz_marc8_51_conv;
42 yaz_conv_func_t yaz_marc8_33_conv;
43 yaz_conv_func_t yaz_marc8_34_conv;
44 yaz_conv_func_t yaz_marc8_53_conv;
45 yaz_conv_func_t yaz_marc8_31_conv;
48 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
49 struct decoder_data *data,
51 size_t inbytesleft, size_t *no_read,
54 static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
56 size_t inbytesleft, size_t *no_read)
58 struct decoder_data *data = (struct decoder_data *) d->data;
60 if (data->comb_offset < data->comb_size)
62 *no_read = data->comb_no_read[data->comb_offset];
63 x = data->comb_x[data->comb_offset];
65 /* special case for double-diacritic combining characters,
66 INVERTED BREVE and DOUBLE TILDE.
67 We'll increment the no_read counter by 1, since we want to skip over
68 the processing of the closing ligature character
70 /* this code is no longer necessary.. our handlers code in
71 yaz_marc8_?_conv (generated by charconv.tcl) now returns
72 0 and no_read=1 when a sequence does not match the input.
73 The SECOND HALFs in codetables.xml produces a non-existant
74 entry in the conversion trie.. Hence when met, the input byte is
75 skipped as it should (in yaz_iconv)
78 if (x == 0x0361 || x == 0x0360)
85 data->comb_offset = 0;
86 for (data->comb_size = 0; data->comb_size < 8; data->comb_size++)
90 if (inbytesleft == 0 && data->comb_size)
92 yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
97 x = yaz_read_marc8_comb(cd, data, inp, inbytesleft, no_read, &comb);
100 data->comb_x[data->comb_size] = x;
101 data->comb_no_read[data->comb_size] = *no_read;
103 inbytesleft = inbytesleft - *no_read;
108 static unsigned long read_marc8s(yaz_iconv_t cd, yaz_iconv_decoder_t d,
110 size_t inbytesleft, size_t *no_read)
112 struct decoder_data *data = (struct decoder_data *) d->data;
113 unsigned long x = read_marc8(cd, d, inp, inbytesleft, no_read);
114 if (x && data->comb_size == 1)
116 if (yaz_iso_8859_1_lookup_x12(x, data->comb_x[0], &x))
118 *no_read += data->comb_no_read[0];
125 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
126 struct decoder_data *data,
128 size_t inbytesleft, size_t *no_read,
132 while (inbytesleft > 0 && *inp == 27)
134 int *modep = &data->g0_mode;
135 size_t inbytesleft0 = inbytesleft;
139 if (inbytesleft == 0)
141 if (*inp == '$') /* set with multiple bytes */
146 if (inbytesleft == 0)
148 if (*inp == '(' || *inp == ',') /* G0 */
153 else if (*inp == ')' || *inp == '-') /* G1 */
157 modep = &data->g1_mode;
159 if (inbytesleft == 0)
161 if (*inp == '!') /* ANSEL is a special case */
166 if (inbytesleft == 0)
168 *modep = *inp++; /* Final character */
171 (*no_read) += inbytesleft0 - inbytesleft;
173 if (inbytesleft == 0)
175 else if (*inp <= ' ')
183 size_t no_read_sub = 0;
184 int mode = *inp < 128 ? data->g0_mode : data->g1_mode;
189 case 'B': /* Basic ASCII */
190 case 's': /* ASCII */
191 x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
193 case 'E': /* ANSEL */
194 x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128);
196 case 'g': /* Greek */
197 x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
199 case 'b': /* Subscripts */
200 x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
202 case 'p': /* Superscripts */
203 x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
205 case '2': /* Basic Hebrew */
206 x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
208 case 'N': /* Basic Cyrillic */
209 x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
211 case 'Q': /* Extended Cyrillic */
212 x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
214 case '3': /* Basic Arabic */
215 x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
217 case '4': /* Extended Arabic */
218 x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
220 case 'S': /* Greek */
221 x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
223 case '1': /* Chinese, Japanese, Korean (EACC) */
224 x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
228 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
231 *no_read += no_read_sub;
236 yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
241 static size_t init_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
243 size_t inbytesleft, size_t *no_read)
245 struct decoder_data *data = (struct decoder_data *) d->data;
248 data->comb_offset = data->comb_size = 0;
252 void destroy_marc8(yaz_iconv_decoder_t d)
254 struct decoder_data *data = (struct decoder_data *) d->data;
258 yaz_iconv_decoder_t yaz_marc8_decoder(const char *fromcode,
259 yaz_iconv_decoder_t d)
261 if (!yaz_matchstr(fromcode, "MARC8") || !yaz_matchstr(fromcode, "ANSEL"))
262 d->read_handle = read_marc8;
263 else if (!yaz_matchstr(fromcode, "MARC8s"))
264 d->read_handle = read_marc8s;
268 struct decoder_data *data = (struct decoder_data *)
269 xmalloc(sizeof(*data));
271 d->init_handle = init_marc8;
272 d->destroy_handle = destroy_marc8;
281 * c-file-style: "Stroustrup"
282 * indent-tabs-mode: nil
284 * vim: shiftwidth=4 tabstop=8 expandtab