2 * Copyright (C) 1995-2007, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: siconv.c,v 1.50 2008-03-12 08:53:28 adam Exp $
9 * \brief Implements simple ICONV
11 * This implements an interface similar to that of iconv and
12 * is used by YAZ to interface with iconv (if present).
13 * For systems where iconv is not present, this layer
14 * provides a few important conversions: UTF-8, MARC-8, Latin-1.
17 * http://www.loc.gov/marc/specifications/speccharmarc8.html
37 #include <yaz/yaz-util.h>
39 unsigned long yaz_marc8_42_conv(unsigned char *inp, size_t inbytesleft,
40 size_t *no_read, int *combining);
41 unsigned long yaz_marc8_45_conv(unsigned char *inp, size_t inbytesleft,
42 size_t *no_read, int *combining);
43 unsigned long yaz_marc8_67_conv(unsigned char *inp, size_t inbytesleft,
44 size_t *no_read, int *combining);
45 unsigned long yaz_marc8_62_conv(unsigned char *inp, size_t inbytesleft,
46 size_t *no_read, int *combining);
47 unsigned long yaz_marc8_70_conv(unsigned char *inp, size_t inbytesleft,
48 size_t *no_read, int *combining);
49 unsigned long yaz_marc8_32_conv(unsigned char *inp, size_t inbytesleft,
50 size_t *no_read, int *combining);
51 unsigned long yaz_marc8_4E_conv(unsigned char *inp, size_t inbytesleft,
52 size_t *no_read, int *combining);
53 unsigned long yaz_marc8_51_conv(unsigned char *inp, size_t inbytesleft,
54 size_t *no_read, int *combining);
55 unsigned long yaz_marc8_33_conv(unsigned char *inp, size_t inbytesleft,
56 size_t *no_read, int *combining);
57 unsigned long yaz_marc8_34_conv(unsigned char *inp, size_t inbytesleft,
58 size_t *no_read, int *combining);
59 unsigned long yaz_marc8_53_conv(unsigned char *inp, size_t inbytesleft,
60 size_t *no_read, int *combining);
61 unsigned long yaz_marc8_31_conv(unsigned char *inp, size_t inbytesleft,
62 size_t *no_read, int *combining);
65 unsigned long yaz_marc8r_42_conv(unsigned char *inp, size_t inbytesleft,
66 size_t *no_read, int *combining);
67 unsigned long yaz_marc8r_45_conv(unsigned char *inp, size_t inbytesleft,
68 size_t *no_read, int *combining);
69 unsigned long yaz_marc8r_67_conv(unsigned char *inp, size_t inbytesleft,
70 size_t *no_read, int *combining);
71 unsigned long yaz_marc8r_62_conv(unsigned char *inp, size_t inbytesleft,
72 size_t *no_read, int *combining);
73 unsigned long yaz_marc8r_70_conv(unsigned char *inp, size_t inbytesleft,
74 size_t *no_read, int *combining);
75 unsigned long yaz_marc8r_32_conv(unsigned char *inp, size_t inbytesleft,
76 size_t *no_read, int *combining);
77 unsigned long yaz_marc8r_4E_conv(unsigned char *inp, size_t inbytesleft,
78 size_t *no_read, int *combining);
79 unsigned long yaz_marc8r_51_conv(unsigned char *inp, size_t inbytesleft,
80 size_t *no_read, int *combining);
81 unsigned long yaz_marc8r_33_conv(unsigned char *inp, size_t inbytesleft,
82 size_t *no_read, int *combining);
83 unsigned long yaz_marc8r_34_conv(unsigned char *inp, size_t inbytesleft,
84 size_t *no_read, int *combining);
85 unsigned long yaz_marc8r_53_conv(unsigned char *inp, size_t inbytesleft,
86 size_t *no_read, int *combining);
87 unsigned long yaz_marc8r_31_conv(unsigned char *inp, size_t inbytesleft,
88 size_t *no_read, int *combining);
92 struct yaz_iconv_struct {
95 size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
96 size_t inbytesleft, size_t *no_read);
97 unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
98 size_t inbytesleft, size_t *no_read);
99 size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
100 char **outbuf, size_t *outbytesleft);
101 size_t (*flush_handle)(yaz_iconv_t cd,
102 char **outbuf, size_t *outbytesleft);
108 unsigned long comb_x[8];
109 size_t comb_no_read[8];
111 unsigned long unget_x;
115 unsigned long compose_char;
117 unsigned write_marc8_second_half_char;
118 unsigned long write_marc8_last;
119 const char *write_marc8_lpage;
120 const char *write_marc8_g0;
121 const char *write_marc8_g1;
125 unsigned long x1, x2;
128 { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
129 { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
130 { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
131 { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
132 { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
133 { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
134 /* no need for 0xc6 LATIN CAPITAL LETTER AE */
135 { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
136 { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
137 { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
138 { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
139 { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
140 { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
141 { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
142 { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
143 { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
144 { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
145 { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
146 { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
147 { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
148 { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
149 { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
150 /* omitted: 0xd7 MULTIPLICATION SIGN */
151 /* omitted: 0xd8 LATIN CAPITAL LETTER O WITH STROKE */
152 { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
153 { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
154 { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
155 { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
156 { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
157 /* omitted: 0xde LATIN CAPITAL LETTER THORN */
158 /* omitted: 0xdf LATIN SMALL LETTER SHARP S */
159 { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
160 { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
161 { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
162 { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
163 { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
164 { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
165 /* omitted: 0xe6 LATIN SMALL LETTER AE */
166 { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
167 { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
168 { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
169 { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
170 { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
171 { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
172 { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
173 { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
174 { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
175 /* omitted: 0xf0 LATIN SMALL LETTER ETH */
176 { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
177 { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
178 { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
179 { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
180 { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
181 { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
182 /* omitted: 0xf7 DIVISION SIGN */
183 /* omitted: 0xf8 LATIN SMALL LETTER O WITH STROKE */
184 { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
185 { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
186 { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
187 { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
188 { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
189 /* omitted: 0xfe LATIN SMALL LETTER THORN */
190 { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
195 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
196 char **outbuf, size_t *outbytesleft,
197 const char *page_chr);
199 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
200 size_t inbytesleft, size_t *no_read)
202 unsigned long x = inp[0];
208 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
209 size_t inbytesleft, size_t *no_read)
218 cd->my_errno = YAZ_ICONV_EINVAL;
221 if (inp[1] != 0xbb && inp[2] == 0xbf)
228 unsigned long yaz_read_UTF8_char(unsigned char *inp,
229 size_t inbytesleft, size_t *no_read,
234 *no_read = 0; /* by default */
240 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
242 *error = YAZ_ICONV_EILSEQ;
244 else if (inp[0] <= 0xdf && inbytesleft >= 2)
246 if ((inp[1] & 0xc0) == 0x80)
248 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
252 *error = YAZ_ICONV_EILSEQ;
255 *error = YAZ_ICONV_EILSEQ;
257 else if (inp[0] <= 0xef && inbytesleft >= 3)
259 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
261 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
266 *error = YAZ_ICONV_EILSEQ;
269 *error = YAZ_ICONV_EILSEQ;
271 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
273 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
274 && (inp[3] & 0xc0) == 0x80)
276 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
277 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
281 *error = YAZ_ICONV_EILSEQ;
284 *error = YAZ_ICONV_EILSEQ;
286 else if (inp[0] <= 0xfb && inbytesleft >= 5)
288 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
289 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
291 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
292 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
297 *error = YAZ_ICONV_EILSEQ;
300 *error = YAZ_ICONV_EILSEQ;
302 else if (inp[0] <= 0xfd && inbytesleft >= 6)
304 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
305 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
306 && (inp[5] & 0xc0) == 0x80)
308 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
309 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
310 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
314 *error = YAZ_ICONV_EILSEQ;
317 *error = YAZ_ICONV_EILSEQ;
320 *error = YAZ_ICONV_EINVAL; /* incomplete sentence */
325 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
326 size_t inbytesleft, size_t *no_read)
328 return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
331 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
332 size_t inbytesleft, size_t *no_read)
338 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
343 x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
349 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
350 size_t inbytesleft, size_t *no_read)
356 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
361 x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
368 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
369 size_t inbytesleft, size_t *no_read)
373 if (inbytesleft < sizeof(wchar_t))
375 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
381 memcpy (&wch, inp, sizeof(wch));
383 *no_read = sizeof(wch);
389 static unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
390 size_t inbytesleft, size_t *no_read)
397 while (inbytesleft > 0)
403 else if (*inp == 0xa3)
413 if (inbytesleft == 0)
415 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
420 case 0xe1: /* alpha small */
426 case 0xc1: /* alpha capital */
433 case 0xe2: /* Beta small */
436 case 0xc2: /* Beta capital */
440 case 0xe4: /* Gamma small */
443 case 0xc4: /* Gamma capital */
447 case 0xe5: /* Delta small */
450 case 0xc5: /* Delta capital */
453 case 0xe6: /* epsilon small */
459 case 0xc6: /* epsilon capital */
465 case 0xe9: /* Zeta small */
468 case 0xc9: /* Zeta capital */
471 case 0xea: /* Eta small */
477 case 0xca: /* Eta capital */
483 case 0xeb: /* Theta small */
486 case 0xcb: /* Theta capital */
489 case 0xec: /* Iota small */
501 case 0xcc: /* Iota capital */
510 case 0xed: /* Kappa small */
513 case 0xcd: /* Kappa capital */
516 case 0xee: /* Lambda small */
519 case 0xce: /* Lambda capital */
522 case 0xef: /* Mu small */
525 case 0xcf: /* Mu capital */
528 case 0xf0: /* Nu small */
531 case 0xd0: /* Nu capital */
534 case 0xf1: /* Xi small */
537 case 0xd1: /* Xi capital */
540 case 0xf2: /* Omicron small */
546 case 0xd2: /* Omicron capital */
552 case 0xf3: /* Pi small */
555 case 0xd3: /* Pi capital */
558 case 0xf5: /* Rho small */
561 case 0xd5: /* Rho capital */
564 case 0xf7: /* Sigma small (end of words) */
567 case 0xf6: /* Sigma small */
570 case 0xd6: /* Sigma capital */
573 case 0xf8: /* Tau small */
576 case 0xd8: /* Tau capital */
579 case 0xf9: /* Upsilon small */
591 case 0xd9: /* Upsilon capital */
600 case 0xfa: /* Phi small */
603 case 0xda: /* Phi capital */
606 case 0xfb: /* Chi small */
609 case 0xdb: /* Chi capital */
612 case 0xfc: /* Psi small */
615 case 0xdc: /* Psi capital */
618 case 0xfd: /* Omega small */
624 case 0xdd: /* Omega capital */
639 static size_t yaz_write_iso5428_1984(yaz_iconv_t cd, unsigned long x,
640 char **outbuf, size_t *outbytesleft)
643 unsigned char *out = (unsigned char*) *outbuf;
644 if (*outbytesleft < 3)
646 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
651 case 0x03ac : out[k++]=0xa2; out[k++]=0xe1; break;
652 case 0x03b1 : out[k++]=0xe1; break;
653 case 0x0386 : out[k++]=0xa2; out[k++]=0xc1; break;
654 case 0x0391 : out[k++]=0xc1; break;
655 case 0x03b2 : out[k++]=0xe2; break;
656 case 0x0392 : out[k++]=0xc2; break;
657 case 0x03b3 : out[k++]=0xe4; break;
658 case 0x0393 : out[k++]=0xc4; break;
659 case 0x03b4 : out[k++]=0xe5; break;
660 case 0x0394 : out[k++]=0xc5; break;
661 case 0x03ad : out[k++]=0xa2; out[k++]=0xe6; break;
662 case 0x03b5 : out[k++]=0xe6; break;
663 case 0x0388 : out[k++]=0xa2; out[k++]=0xc6; break;
664 case 0x0395 : out[k++]=0xc6; break;
665 case 0x03b6 : out[k++]=0xe9; break;
666 case 0x0396 : out[k++]=0xc9; break;
667 case 0x03ae : out[k++]=0xa2; out[k++]=0xea; break;
668 case 0x03b7 : out[k++]=0xea; break;
669 case 0x0389 : out[k++]=0xa2; out[k++]=0xca; break;
670 case 0x0397 : out[k++]=0xca; break;
671 case 0x03b8 : out[k++]=0xeb; break;
672 case 0x0398 : out[k++]=0xcb; break;
673 case 0x0390 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xec; break;
674 case 0x03af : out[k++]=0xa2; out[k++]=0xec; break;
675 case 0x03ca : out[k++]=0xa3; out[k++]=0xec; break;
676 case 0x03b9 : out[k++]=0xec; break;
677 case 0x038a : out[k++]=0xa2; out[k++]=0xcc; break;
678 case 0x03aa : out[k++]=0xa3; out[k++]=0xcc; break;
679 case 0x0399 : out[k++]=0xcc; break;
680 case 0x03ba : out[k++]=0xed; break;
681 case 0x039a : out[k++]=0xcd; break;
682 case 0x03bb : out[k++]=0xee; break;
683 case 0x039b : out[k++]=0xce; break;
684 case 0x03bc : out[k++]=0xef; break;
685 case 0x039c : out[k++]=0xcf; break;
686 case 0x03bd : out[k++]=0xf0; break;
687 case 0x039d : out[k++]=0xd0; break;
688 case 0x03be : out[k++]=0xf1; break;
689 case 0x039e : out[k++]=0xd1; break;
690 case 0x03cc : out[k++]=0xa2; out[k++]=0xf2; break;
691 case 0x03bf : out[k++]=0xf2; break;
692 case 0x038c : out[k++]=0xa2; out[k++]=0xd2; break;
693 case 0x039f : out[k++]=0xd2; break;
694 case 0x03c0 : out[k++]=0xf3; break;
695 case 0x03a0 : out[k++]=0xd3; break;
696 case 0x03c1 : out[k++]=0xf5; break;
697 case 0x03a1 : out[k++]=0xd5; break;
698 case 0x03c2 : out[k++]=0xf7; break;
699 case 0x03c3 : out[k++]=0xf6; break;
700 case 0x03a3 : out[k++]=0xd6; break;
701 case 0x03c4 : out[k++]=0xf8; break;
702 case 0x03a4 : out[k++]=0xd8; break;
703 case 0x03b0 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xf9; break;
704 case 0x03cd : out[k++]=0xa2; out[k++]=0xf9; break;
705 case 0x03cb : out[k++]=0xa3; out[k++]=0xf9; break;
706 case 0x03c5 : out[k++]=0xf9; break;
707 case 0x038e : out[k++]=0xa2; out[k++]=0xd9; break;
708 case 0x03ab : out[k++]=0xa3; out[k++]=0xd9; break;
709 case 0x03a5 : out[k++]=0xd9; break;
710 case 0x03c6 : out[k++]=0xfa; break;
711 case 0x03a6 : out[k++]=0xda; break;
712 case 0x03c7 : out[k++]=0xfb; break;
713 case 0x03a7 : out[k++]=0xdb; break;
714 case 0x03c8 : out[k++]=0xfc; break;
715 case 0x03a8 : out[k++]=0xdc; break;
716 case 0x03ce : out[k++]=0xa2; out[k++]=0xfd; break;
717 case 0x03c9 : out[k++]=0xfd; break;
718 case 0x038f : out[k++]=0xa2; out[k++]=0xdd; break;
719 case 0x03a9 : out[k++]=0xdd; break;
723 cd->my_errno = YAZ_ICONV_EILSEQ;
734 static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
735 size_t inbytesleft, size_t *no_read)
743 while (inbytesleft > 0)
749 else if (*inp == 0x9e)
753 else if (*inp == 0x9f)
763 if (inbytesleft == 0)
765 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
993 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
994 char **outbuf, size_t *outbytesleft)
997 unsigned char *out = (unsigned char*) *outbuf;
998 if (*outbytesleft < 3)
1000 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
1001 return (size_t)(-1);
1005 case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
1006 case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
1007 case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
1008 case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
1009 case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
1010 case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
1011 case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
1012 case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
1013 case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
1014 case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
1015 case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
1016 case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
1017 case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
1018 case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
1019 case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
1020 case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
1021 case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
1022 case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
1023 case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
1024 case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
1025 case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
1026 case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
1027 case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
1028 case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
1029 case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
1030 case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
1031 case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
1032 case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
1033 case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
1034 case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
1035 case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
1036 case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
1037 case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
1038 case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
1039 case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
1040 case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
1041 case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
1042 case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
1043 case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
1044 case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
1045 case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
1046 case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
1047 case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
1048 case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
1049 case 0x03b1 : out[k++]=0x81; break;
1050 case 0x03b2 : out[k++]=0x82; break;
1051 case 0x03b3 : out[k++]=0x83; break;
1052 case 0x03b4 : out[k++]=0x84; break;
1053 case 0x03b5 : out[k++]=0x85; break;
1054 case 0x03b6 : out[k++]=0x86; break;
1055 case 0x03b7 : out[k++]=0x87; break;
1056 case 0x03b8 : out[k++]=0x88; break;
1057 case 0x03b9 : out[k++]=0x89; break;
1058 case 0x03ba : out[k++]=0x8a; break;
1059 case 0x03bb : out[k++]=0x8b; break;
1060 case 0x03bc : out[k++]=0x8c; break;
1061 case 0x03bd : out[k++]=0x8d; break;
1062 case 0x03be : out[k++]=0x8e; break;
1063 case 0x03bf : out[k++]=0x8f; break;
1064 case 0x03c0 : out[k++]=0x90; break;
1065 case 0x03c1 : out[k++]=0x91; break;
1066 case 0x03c2 : out[k++]=0x92; break;
1067 case 0x03c3 : out[k++]=0x93; break;
1068 case 0x03c4 : out[k++]=0x94; break;
1069 case 0x03c5 : out[k++]=0x95; break;
1070 case 0x03c6 : out[k++]=0x96; break;
1071 case 0x03c7 : out[k++]=0x96; break;
1072 case 0x03c8 : out[k++]=0x98; break;
1073 case 0x03c9 : out[k++]=0x99; break;
1077 cd->my_errno = YAZ_ICONV_EILSEQ;
1089 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
1090 size_t inbytesleft, size_t *no_read,
1093 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
1094 size_t inbytesleft, size_t *no_read)
1097 if (cd->comb_offset < cd->comb_size)
1099 *no_read = cd->comb_no_read[cd->comb_offset];
1100 x = cd->comb_x[cd->comb_offset];
1102 /* special case for double-diacritic combining characters,
1103 INVERTED BREVE and DOUBLE TILDE.
1104 We'll increment the no_read counter by 1, since we want to skip over
1105 the processing of the closing ligature character
1107 /* this code is no longer necessary.. our handlers code in
1108 yaz_marc8_?_conv (generated by charconv.tcl) now returns
1109 0 and no_read=1 when a sequence does not match the input.
1110 The SECOND HALFs in codetables.xml produces a non-existant
1111 entry in the conversion trie.. Hence when met, the input byte is
1112 skipped as it should (in yaz_iconv)
1115 if (x == 0x0361 || x == 0x0360)
1122 cd->comb_offset = 0;
1123 for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
1127 if (inbytesleft == 0 && cd->comb_size)
1129 cd->my_errno = YAZ_ICONV_EINVAL;
1134 x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
1137 cd->comb_x[cd->comb_size] = x;
1138 cd->comb_no_read[cd->comb_size] = *no_read;
1140 inbytesleft = inbytesleft - *no_read;
1145 static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
1146 size_t inbytesleft, size_t *no_read)
1148 unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
1149 if (x && cd->comb_size == 1)
1151 /* For MARC8s we try to get a Latin-1 page code out of it */
1153 for (i = 0; latin1_comb[i].x1; i++)
1154 if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
1156 *no_read += cd->comb_no_read[0];
1158 x = latin1_comb[i].y;
1165 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
1166 size_t inbytesleft, size_t *no_read,
1170 while(inbytesleft >= 1 && inp[0] == 27)
1173 size_t inbytesleft0 = inbytesleft;
1176 if (inbytesleft > 0 && *inp == '$')
1181 if (inbytesleft <= 1)
1184 cd->my_errno = YAZ_ICONV_EINVAL;
1189 if (inbytesleft > 0 && (ch == '(' || ch == ','))
1192 cd->g0_mode = *inp++;
1194 else if (inbytesleft > 0 && (ch == ')' || ch == '-'))
1197 cd->g1_mode = *inp++;
1202 (*no_read) += inbytesleft0 - inbytesleft;
1204 if (inbytesleft <= 0)
1206 else if (*inp == ' ')
1214 size_t no_read_sub = 0;
1215 int mode = *inp < 128 ? cd->g0_mode : cd->g1_mode;
1220 case 'B': /* Basic ASCII */
1221 case 's': /* ASCII */
1222 case 'E': /* ANSEL */
1223 x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb);
1227 x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb);
1230 case 'g': /* Greek */
1231 x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb);
1233 case 'b': /* Subscripts */
1234 x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb);
1236 case 'p': /* Superscripts */
1237 x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb);
1239 case '2': /* Basic Hebrew */
1240 x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb);
1242 case 'N': /* Basic Cyrillic */
1243 x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1245 case 'Q': /* Extended Cyrillic */
1246 x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb);
1248 case '3': /* Basic Arabic */
1249 x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb);
1251 case '4': /* Extended Arabic */
1252 x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb);
1254 case 'S': /* Greek */
1255 x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb);
1257 case '1': /* Chinese, Japanese, Korean (EACC) */
1258 x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb);
1262 cd->my_errno = YAZ_ICONV_EILSEQ;
1265 *no_read += no_read_sub;
1270 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
1271 char **outbuf, size_t *outbytesleft)
1273 return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
1276 size_t yaz_write_UTF8_char(unsigned long x,
1277 char **outbuf, size_t *outbytesleft,
1280 unsigned char *outp = (unsigned char *) *outbuf;
1282 if (x <= 0x7f && *outbytesleft >= 1)
1284 *outp++ = (unsigned char) x;
1287 else if (x <= 0x7ff && *outbytesleft >= 2)
1289 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
1290 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1291 (*outbytesleft) -= 2;
1293 else if (x <= 0xffff && *outbytesleft >= 3)
1295 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
1296 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1297 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1298 (*outbytesleft) -= 3;
1300 else if (x <= 0x1fffff && *outbytesleft >= 4)
1302 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
1303 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1304 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1305 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1306 (*outbytesleft) -= 4;
1308 else if (x <= 0x3ffffff && *outbytesleft >= 5)
1310 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
1311 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1312 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1313 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1314 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1315 (*outbytesleft) -= 5;
1317 else if (*outbytesleft >= 6)
1319 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
1320 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
1321 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1322 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1323 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1324 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1325 (*outbytesleft) -= 6;
1329 *error = YAZ_ICONV_E2BIG; /* not room for output */
1330 return (size_t)(-1);
1332 *outbuf = (char *) outp;
1336 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
1337 char **outbuf, size_t *outbytesleft)
1339 /* list of two char unicode sequence that, when combined, are
1340 equivalent to single unicode chars that can be represented in
1342 Regular iconv on Linux at least does not seem to convert these,
1343 but since MARC-8 to UTF-8 generates these composed sequence
1344 we get a better chance of a successful MARC-8 -> ISO-8859-1
1346 unsigned char *outp = (unsigned char *) *outbuf;
1348 if (cd->compose_char)
1351 for (i = 0; latin1_comb[i].x1; i++)
1352 if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
1354 x = latin1_comb[i].y;
1357 if (*outbytesleft < 1)
1358 { /* no room. Retain compose_char and bail out */
1359 cd->my_errno = YAZ_ICONV_E2BIG;
1360 return (size_t)(-1);
1362 if (!latin1_comb[i].x1)
1363 { /* not found. Just write compose_char */
1364 *outp++ = (unsigned char) cd->compose_char;
1366 *outbuf = (char *) outp;
1368 /* compose_char used so reset it. x now holds current char */
1369 cd->compose_char = 0;
1372 if (x > 32 && x < 127 && cd->compose_char == 0)
1374 cd->compose_char = x;
1377 else if (x > 255 || x < 1)
1379 cd->my_errno = YAZ_ICONV_EILSEQ;
1382 else if (*outbytesleft < 1)
1384 cd->my_errno = YAZ_ICONV_E2BIG;
1385 return (size_t)(-1);
1387 *outp++ = (unsigned char) x;
1389 *outbuf = (char *) outp;
1393 static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
1394 char **outbuf, size_t *outbytesleft)
1396 if (cd->compose_char)
1398 unsigned char *outp = (unsigned char *) *outbuf;
1399 if (*outbytesleft < 1)
1401 cd->my_errno = YAZ_ICONV_E2BIG;
1402 return (size_t)(-1);
1404 *outp++ = (unsigned char) cd->compose_char;
1406 *outbuf = (char *) outp;
1407 cd->compose_char = 0;
1412 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
1413 char **outbuf, size_t *outbytesleft)
1415 unsigned char *outp = (unsigned char *) *outbuf;
1416 if (*outbytesleft >= 4)
1418 *outp++ = (unsigned char) (x>>24);
1419 *outp++ = (unsigned char) (x>>16);
1420 *outp++ = (unsigned char) (x>>8);
1421 *outp++ = (unsigned char) x;
1422 (*outbytesleft) -= 4;
1426 cd->my_errno = YAZ_ICONV_E2BIG;
1427 return (size_t)(-1);
1429 *outbuf = (char *) outp;
1433 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
1434 char **outbuf, size_t *outbytesleft)
1436 unsigned char *outp = (unsigned char *) *outbuf;
1437 if (*outbytesleft >= 4)
1439 *outp++ = (unsigned char) x;
1440 *outp++ = (unsigned char) (x>>8);
1441 *outp++ = (unsigned char) (x>>16);
1442 *outp++ = (unsigned char) (x>>24);
1443 (*outbytesleft) -= 4;
1447 cd->my_errno = YAZ_ICONV_E2BIG;
1448 return (size_t)(-1);
1450 *outbuf = (char *) outp;
1454 static unsigned long lookup_marc8(yaz_iconv_t cd,
1455 unsigned long x, int *comb,
1456 const char **page_chr)
1459 char *utf8_outbuf = utf8_buf;
1460 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
1462 r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
1463 if (r == (size_t)(-1))
1465 cd->my_errno = YAZ_ICONV_EILSEQ;
1471 size_t inbytesleft, no_read_sub = 0;
1474 *utf8_outbuf = '\0';
1475 inp = (unsigned char *) utf8_buf;
1476 inbytesleft = strlen(utf8_buf);
1478 x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb);
1481 *page_chr = ESC "(B";
1484 x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb);
1487 *page_chr = ESC "(B";
1490 x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb);
1493 *page_chr = ESC "b";
1496 x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb);
1499 *page_chr = ESC "p";
1502 x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb);
1505 *page_chr = ESC "(2";
1508 x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1511 *page_chr = ESC "(N";
1514 x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb);
1517 *page_chr = ESC "(Q";
1520 x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb);
1523 *page_chr = ESC "(3";
1526 x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb);
1529 *page_chr = ESC "(4";
1532 x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb);
1535 *page_chr = ESC "(S";
1538 x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb);
1541 *page_chr = ESC "$1";
1544 cd->my_errno = YAZ_ICONV_EILSEQ;
1549 static size_t flush_combos(yaz_iconv_t cd,
1550 char **outbuf, size_t *outbytesleft)
1552 unsigned long y = cd->write_marc8_last;
1560 assert(cd->write_marc8_lpage);
1561 if (cd->write_marc8_lpage)
1563 size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft,
1564 cd->write_marc8_lpage);
1569 byte = (unsigned char )((y>>16) & 0xff);
1571 out_buf[out_no++] = byte;
1572 byte = (unsigned char)((y>>8) & 0xff);
1574 out_buf[out_no++] = byte;
1575 byte = (unsigned char )(y & 0xff);
1577 out_buf[out_no++] = byte;
1579 if (out_no + 2 >= *outbytesleft)
1581 cd->my_errno = YAZ_ICONV_E2BIG;
1582 return (size_t) (-1);
1585 memcpy(*outbuf, out_buf, out_no);
1587 (*outbytesleft) -= out_no;
1588 if (cd->write_marc8_second_half_char)
1590 *(*outbuf)++ = cd->write_marc8_second_half_char;
1594 cd->write_marc8_last = 0;
1595 cd->write_marc8_lpage = 0;
1596 cd->write_marc8_second_half_char = 0;
1600 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
1601 char **outbuf, size_t *outbytesleft,
1602 const char *page_chr)
1604 const char **old_page_chr = &cd->write_marc8_g0;
1606 /* are we going to a G1-set (such as such as ESC ")!E") */
1607 if (page_chr && page_chr[1] == ')')
1608 old_page_chr = &cd->write_marc8_g1;
1610 if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
1613 const char *page_out = page_chr;
1615 if (*outbytesleft < 8)
1617 cd->my_errno = YAZ_ICONV_E2BIG;
1619 return (size_t) (-1);
1624 if (!strcmp(*old_page_chr, ESC "p")
1625 || !strcmp(*old_page_chr, ESC "g")
1626 || !strcmp(*old_page_chr, ESC "b"))
1629 /* Technique 1 leave */
1630 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
1632 /* Must leave script + enter new page */
1633 plen = strlen(page_out);
1634 memcpy(*outbuf, page_out, plen);
1636 (*outbytesleft) -= plen;
1637 page_out = ESC "(B";
1641 *old_page_chr = page_chr;
1642 plen = strlen(page_out);
1643 memcpy(*outbuf, page_out, plen);
1645 (*outbytesleft) -= plen;
1651 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
1652 char **outbuf, size_t *outbytesleft)
1655 const char *page_chr = 0;
1656 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
1659 return (size_t) (-1);
1665 size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft,
1671 cd->write_marc8_second_half_char = 0xEC;
1672 else if (x == 0x0360)
1673 cd->write_marc8_second_half_char = 0xFB;
1675 if (*outbytesleft <= 1)
1677 cd->my_errno = YAZ_ICONV_E2BIG;
1678 return (size_t) (-1);
1685 size_t r = flush_combos(cd, outbuf, outbytesleft);
1689 cd->write_marc8_last = y;
1690 cd->write_marc8_lpage = page_chr;
1695 static size_t yaz_flush_marc8(yaz_iconv_t cd,
1696 char **outbuf, size_t *outbytesleft)
1698 size_t r = flush_combos(cd, outbuf, outbytesleft);
1701 cd->write_marc8_g1 = 0;
1702 return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, ESC "(B");
1705 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
1706 char **outbuf, size_t *outbytesleft)
1709 for (i = 0; latin1_comb[i].x1; i++)
1711 if (x == latin1_comb[i].y)
1714 /* save the output pointers .. */
1715 char *outbuf0 = *outbuf;
1716 size_t outbytesleft0 = *outbytesleft;
1717 int last_ch = cd->write_marc8_last;
1718 const char *lpage = cd->write_marc8_lpage;
1720 r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
1721 outbuf, outbytesleft);
1724 r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
1725 outbuf, outbytesleft);
1726 if (r && cd->my_errno == YAZ_ICONV_E2BIG)
1728 /* not enough room. reset output to original values */
1730 *outbytesleft = outbytesleft0;
1731 cd->write_marc8_last = last_ch;
1732 cd->write_marc8_lpage = lpage;
1737 return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
1742 static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
1743 char **outbuf, size_t *outbytesleft)
1745 unsigned char *outp = (unsigned char *) *outbuf;
1747 if (*outbytesleft >= sizeof(wchar_t))
1750 memcpy(outp, &wch, sizeof(wch));
1751 outp += sizeof(wch);
1752 (*outbytesleft) -= sizeof(wch);
1756 cd->my_errno = YAZ_ICONV_E2BIG;
1757 return (size_t)(-1);
1759 *outbuf = (char *) outp;
1764 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
1766 return cd->read_handle && cd->write_handle;
1769 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
1771 yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
1773 cd->write_handle = 0;
1774 cd->read_handle = 0;
1775 cd->init_handle = 0;
1776 cd->flush_handle = 0;
1777 cd->my_errno = YAZ_ICONV_UNKNOWN;
1779 /* a useful hack: if fromcode has leading @,
1780 the library not use YAZ's own conversions .. */
1781 if (fromcode[0] == '@')
1785 if (!yaz_matchstr(fromcode, "UTF8"))
1787 cd->read_handle = yaz_read_UTF8;
1788 cd->init_handle = yaz_init_UTF8;
1790 else if (!yaz_matchstr(fromcode, "ISO88591"))
1791 cd->read_handle = yaz_read_ISO8859_1;
1792 else if (!yaz_matchstr(fromcode, "UCS4"))
1793 cd->read_handle = yaz_read_UCS4;
1794 else if (!yaz_matchstr(fromcode, "UCS4LE"))
1795 cd->read_handle = yaz_read_UCS4LE;
1796 else if (!yaz_matchstr(fromcode, "MARC8"))
1797 cd->read_handle = yaz_read_marc8;
1798 else if (!yaz_matchstr(fromcode, "MARC8s"))
1799 cd->read_handle = yaz_read_marc8s;
1800 else if (!yaz_matchstr(fromcode, "advancegreek"))
1801 cd->read_handle = yaz_read_advancegreek;
1802 else if (!yaz_matchstr(fromcode, "iso54281984"))
1803 cd->read_handle = yaz_read_iso5428_1984;
1804 else if (!yaz_matchstr(fromcode, "iso5428:1984"))
1805 cd->read_handle = yaz_read_iso5428_1984;
1807 else if (!yaz_matchstr(fromcode, "WCHAR_T"))
1808 cd->read_handle = yaz_read_wchar_t;
1811 if (!yaz_matchstr(tocode, "UTF8"))
1812 cd->write_handle = yaz_write_UTF8;
1813 else if (!yaz_matchstr(tocode, "ISO88591"))
1815 cd->write_handle = yaz_write_ISO8859_1;
1816 cd->flush_handle = yaz_flush_ISO8859_1;
1818 else if (!yaz_matchstr (tocode, "UCS4"))
1819 cd->write_handle = yaz_write_UCS4;
1820 else if (!yaz_matchstr(tocode, "UCS4LE"))
1821 cd->write_handle = yaz_write_UCS4LE;
1822 else if (!yaz_matchstr(tocode, "MARC8"))
1824 cd->write_handle = yaz_write_marc8;
1825 cd->flush_handle = yaz_flush_marc8;
1827 else if (!yaz_matchstr(tocode, "MARC8s"))
1829 cd->write_handle = yaz_write_marc8;
1830 cd->flush_handle = yaz_flush_marc8;
1832 else if (!yaz_matchstr(tocode, "advancegreek"))
1834 cd->write_handle = yaz_write_advancegreek;
1836 else if (!yaz_matchstr(tocode, "iso54281984"))
1838 cd->write_handle = yaz_write_iso5428_1984;
1840 else if (!yaz_matchstr(tocode, "iso5428:1984"))
1842 cd->write_handle = yaz_write_iso5428_1984;
1845 else if (!yaz_matchstr(tocode, "WCHAR_T"))
1846 cd->write_handle = yaz_write_wchar_t;
1851 if (!cd->read_handle || !cd->write_handle)
1853 cd->iconv_cd = iconv_open (tocode, fromcode);
1854 if (cd->iconv_cd == (iconv_t) (-1))
1861 if (!cd->read_handle || !cd->write_handle)
1871 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
1872 char **outbuf, size_t *outbytesleft)
1881 iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
1882 if (r == (size_t)(-1))
1884 switch (yaz_errno())
1887 cd->my_errno = YAZ_ICONV_E2BIG;
1890 cd->my_errno = YAZ_ICONV_EINVAL;
1893 cd->my_errno = YAZ_ICONV_EILSEQ;
1896 cd->my_errno = YAZ_ICONV_UNKNOWN;
1908 cd->my_errno = YAZ_ICONV_UNKNOWN;
1912 cd->comb_offset = cd->comb_size = 0;
1913 cd->compose_char = 0;
1915 cd->write_marc8_second_half_char = 0;
1916 cd->write_marc8_last = 0;
1917 cd->write_marc8_lpage = 0;
1918 cd->write_marc8_g0 = ESC "(B";
1919 cd->write_marc8_g1 = 0;
1927 if (cd->init_handle && inbuf && *inbuf)
1930 size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
1931 *inbytesleft, &no_read);
1934 if (cd->my_errno == YAZ_ICONV_EINVAL)
1939 *inbytesleft -= no_read;
1945 if (!inbuf || !*inbuf)
1947 if (outbuf && *outbuf)
1950 r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
1951 if (cd->flush_handle)
1952 r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
1967 no_read = cd->no_read_x;
1971 if (*inbytesleft == 0)
1973 r = *inbuf - inbuf0;
1976 x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
1986 r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
1989 /* unable to write it. save it because read_handle cannot
1991 if (cd->my_errno == YAZ_ICONV_E2BIG)
1994 cd->no_read_x = no_read;
2000 *inbytesleft -= no_read;
2001 (*inbuf) += no_read;
2006 int yaz_iconv_error (yaz_iconv_t cd)
2008 return cd->my_errno;
2011 int yaz_iconv_close (yaz_iconv_t cd)
2015 iconv_close (cd->iconv_cd);
2024 * indent-tabs-mode: nil
2026 * vim: shiftwidth=4 tabstop=8 expandtab