2 * Copyright (C) 1995-2008, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: siconv.c,v 1.50 2008-03-12 08:53:28 adam Exp $
9 * \brief Implements simple ICONV
11 * This implements an interface similar to that of iconv and
12 * is used by YAZ to interface with iconv (if present).
13 * For systems where iconv is not present, this layer
14 * provides a few important conversions: UTF-8, MARC-8, Latin-1.
17 * http://www.loc.gov/marc/specifications/speccharmarc8.html
37 #include <yaz/yaz-util.h>
39 unsigned long yaz_marc8_42_conv(unsigned char *inp, size_t inbytesleft,
40 size_t *no_read, int *combining);
41 unsigned long yaz_marc8_45_conv(unsigned char *inp, size_t inbytesleft,
42 size_t *no_read, int *combining);
43 unsigned long yaz_marc8_67_conv(unsigned char *inp, size_t inbytesleft,
44 size_t *no_read, int *combining);
45 unsigned long yaz_marc8_62_conv(unsigned char *inp, size_t inbytesleft,
46 size_t *no_read, int *combining);
47 unsigned long yaz_marc8_70_conv(unsigned char *inp, size_t inbytesleft,
48 size_t *no_read, int *combining);
49 unsigned long yaz_marc8_32_conv(unsigned char *inp, size_t inbytesleft,
50 size_t *no_read, int *combining);
51 unsigned long yaz_marc8_4E_conv(unsigned char *inp, size_t inbytesleft,
52 size_t *no_read, int *combining);
53 unsigned long yaz_marc8_51_conv(unsigned char *inp, size_t inbytesleft,
54 size_t *no_read, int *combining);
55 unsigned long yaz_marc8_33_conv(unsigned char *inp, size_t inbytesleft,
56 size_t *no_read, int *combining);
57 unsigned long yaz_marc8_34_conv(unsigned char *inp, size_t inbytesleft,
58 size_t *no_read, int *combining);
59 unsigned long yaz_marc8_53_conv(unsigned char *inp, size_t inbytesleft,
60 size_t *no_read, int *combining);
61 unsigned long yaz_marc8_31_conv(unsigned char *inp, size_t inbytesleft,
62 size_t *no_read, int *combining);
65 unsigned long yaz_marc8r_42_conv(unsigned char *inp, size_t inbytesleft,
66 size_t *no_read, int *combining);
67 unsigned long yaz_marc8r_45_conv(unsigned char *inp, size_t inbytesleft,
68 size_t *no_read, int *combining);
69 unsigned long yaz_marc8r_67_conv(unsigned char *inp, size_t inbytesleft,
70 size_t *no_read, int *combining);
71 unsigned long yaz_marc8r_62_conv(unsigned char *inp, size_t inbytesleft,
72 size_t *no_read, int *combining);
73 unsigned long yaz_marc8r_70_conv(unsigned char *inp, size_t inbytesleft,
74 size_t *no_read, int *combining);
75 unsigned long yaz_marc8r_32_conv(unsigned char *inp, size_t inbytesleft,
76 size_t *no_read, int *combining);
77 unsigned long yaz_marc8r_4E_conv(unsigned char *inp, size_t inbytesleft,
78 size_t *no_read, int *combining);
79 unsigned long yaz_marc8r_51_conv(unsigned char *inp, size_t inbytesleft,
80 size_t *no_read, int *combining);
81 unsigned long yaz_marc8r_33_conv(unsigned char *inp, size_t inbytesleft,
82 size_t *no_read, int *combining);
83 unsigned long yaz_marc8r_34_conv(unsigned char *inp, size_t inbytesleft,
84 size_t *no_read, int *combining);
85 unsigned long yaz_marc8r_53_conv(unsigned char *inp, size_t inbytesleft,
86 size_t *no_read, int *combining);
87 unsigned long yaz_marc8r_31_conv(unsigned char *inp, size_t inbytesleft,
88 size_t *no_read, int *combining);
92 struct yaz_iconv_struct {
95 size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
96 size_t inbytesleft, size_t *no_read);
97 unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
98 size_t inbytesleft, size_t *no_read);
99 size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
100 char **outbuf, size_t *outbytesleft);
101 size_t (*flush_handle)(yaz_iconv_t cd,
102 char **outbuf, size_t *outbytesleft);
108 unsigned long comb_x[8];
109 size_t comb_no_read[8];
111 unsigned long unget_x;
115 unsigned long compose_char;
117 unsigned write_marc8_second_half_char;
118 unsigned long write_marc8_last;
119 const char *write_marc8_lpage;
120 const char *write_marc8_g0;
121 const char *write_marc8_g1;
125 unsigned long x1, x2;
128 { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
129 { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
130 { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
131 { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
132 { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
133 { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
134 /* no need for 0xc6 LATIN CAPITAL LETTER AE */
135 { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
136 { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
137 { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
138 { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
139 { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
140 { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
141 { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
142 { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
143 { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
144 { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
145 { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
146 { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
147 { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
148 { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
149 { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
150 /* omitted: 0xd7 MULTIPLICATION SIGN */
151 /* omitted: 0xd8 LATIN CAPITAL LETTER O WITH STROKE */
152 { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
153 { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
154 { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
155 { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
156 { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
157 /* omitted: 0xde LATIN CAPITAL LETTER THORN */
158 /* omitted: 0xdf LATIN SMALL LETTER SHARP S */
159 { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
160 { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
161 { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
162 { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
163 { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
164 { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
165 /* omitted: 0xe6 LATIN SMALL LETTER AE */
166 { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
167 { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
168 { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
169 { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
170 { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
171 { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
172 { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
173 { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
174 { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
175 /* omitted: 0xf0 LATIN SMALL LETTER ETH */
176 { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
177 { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
178 { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
179 { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
180 { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
181 { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
182 /* omitted: 0xf7 DIVISION SIGN */
183 /* omitted: 0xf8 LATIN SMALL LETTER O WITH STROKE */
184 { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
185 { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
186 { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
187 { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
188 { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
189 /* omitted: 0xfe LATIN SMALL LETTER THORN */
190 { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
195 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
196 char **outbuf, size_t *outbytesleft,
197 const char *page_chr);
199 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
200 size_t inbytesleft, size_t *no_read)
202 unsigned long x = inp[0];
208 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
209 size_t inbytesleft, size_t *no_read)
218 cd->my_errno = YAZ_ICONV_EINVAL;
221 if (inp[1] != 0xbb && inp[2] == 0xbf)
228 unsigned long yaz_read_UTF8_char(unsigned char *inp,
229 size_t inbytesleft, size_t *no_read,
234 *no_read = 0; /* by default */
240 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
242 *error = YAZ_ICONV_EILSEQ;
244 else if (inp[0] <= 0xdf && inbytesleft >= 2)
246 if ((inp[1] & 0xc0) == 0x80)
248 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
252 *error = YAZ_ICONV_EILSEQ;
255 *error = YAZ_ICONV_EILSEQ;
257 else if (inp[0] <= 0xef && inbytesleft >= 3)
259 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
261 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
266 *error = YAZ_ICONV_EILSEQ;
269 *error = YAZ_ICONV_EILSEQ;
271 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
273 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
274 && (inp[3] & 0xc0) == 0x80)
276 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
277 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
281 *error = YAZ_ICONV_EILSEQ;
284 *error = YAZ_ICONV_EILSEQ;
286 else if (inp[0] <= 0xfb && inbytesleft >= 5)
288 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
289 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
291 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
292 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
297 *error = YAZ_ICONV_EILSEQ;
300 *error = YAZ_ICONV_EILSEQ;
302 else if (inp[0] <= 0xfd && inbytesleft >= 6)
304 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
305 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
306 && (inp[5] & 0xc0) == 0x80)
308 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
309 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
310 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
314 *error = YAZ_ICONV_EILSEQ;
317 *error = YAZ_ICONV_EILSEQ;
320 *error = YAZ_ICONV_EINVAL; /* incomplete sentence */
325 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
326 size_t inbytesleft, size_t *no_read)
328 return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
331 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
332 size_t inbytesleft, size_t *no_read)
338 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
343 x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
349 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
350 size_t inbytesleft, size_t *no_read)
356 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
361 x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
368 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
369 size_t inbytesleft, size_t *no_read)
373 if (inbytesleft < sizeof(wchar_t))
375 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
381 memcpy (&wch, inp, sizeof(wch));
383 *no_read = sizeof(wch);
389 static unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
390 size_t inbytesleft, size_t *no_read)
397 while (inbytesleft > 0)
403 else if (*inp == 0xa3)
413 if (inbytesleft == 0)
415 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
420 case 0xe1: /* alpha small */
426 case 0xc1: /* alpha capital */
433 case 0xe2: /* Beta small */
436 case 0xc2: /* Beta capital */
440 case 0xe4: /* Gamma small */
443 case 0xc4: /* Gamma capital */
447 case 0xe5: /* Delta small */
450 case 0xc5: /* Delta capital */
453 case 0xe6: /* epsilon small */
459 case 0xc6: /* epsilon capital */
465 case 0xe9: /* Zeta small */
468 case 0xc9: /* Zeta capital */
471 case 0xea: /* Eta small */
477 case 0xca: /* Eta capital */
483 case 0xeb: /* Theta small */
486 case 0xcb: /* Theta capital */
489 case 0xec: /* Iota small */
501 case 0xcc: /* Iota capital */
510 case 0xed: /* Kappa small */
513 case 0xcd: /* Kappa capital */
516 case 0xee: /* Lambda small */
519 case 0xce: /* Lambda capital */
522 case 0xef: /* Mu small */
525 case 0xcf: /* Mu capital */
528 case 0xf0: /* Nu small */
531 case 0xd0: /* Nu capital */
534 case 0xf1: /* Xi small */
537 case 0xd1: /* Xi capital */
540 case 0xf2: /* Omicron small */
546 case 0xd2: /* Omicron capital */
552 case 0xf3: /* Pi small */
555 case 0xd3: /* Pi capital */
558 case 0xf5: /* Rho small */
561 case 0xd5: /* Rho capital */
564 case 0xf7: /* Sigma small (end of words) */
567 case 0xf6: /* Sigma small */
570 case 0xd6: /* Sigma capital */
573 case 0xf8: /* Tau small */
576 case 0xd8: /* Tau capital */
579 case 0xf9: /* Upsilon small */
591 case 0xd9: /* Upsilon capital */
600 case 0xfa: /* Phi small */
603 case 0xda: /* Phi capital */
606 case 0xfb: /* Chi small */
609 case 0xdb: /* Chi capital */
612 case 0xfc: /* Psi small */
615 case 0xdc: /* Psi capital */
618 case 0xfd: /* Omega small */
624 case 0xdd: /* Omega capital */
639 static size_t yaz_write_iso5428_1984(yaz_iconv_t cd, unsigned long x,
640 char **outbuf, size_t *outbytesleft)
643 unsigned char *out = (unsigned char*) *outbuf;
644 if (*outbytesleft < 3)
646 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
651 case 0x03ac : out[k++]=0xa2; out[k++]=0xe1; break;
652 case 0x03b1 : out[k++]=0xe1; break;
653 case 0x0386 : out[k++]=0xa2; out[k++]=0xc1; break;
654 case 0x0391 : out[k++]=0xc1; break;
655 case 0x03b2 : out[k++]=0xe2; break;
656 case 0x0392 : out[k++]=0xc2; break;
657 case 0x03b3 : out[k++]=0xe4; break;
658 case 0x0393 : out[k++]=0xc4; break;
659 case 0x03b4 : out[k++]=0xe5; break;
660 case 0x0394 : out[k++]=0xc5; break;
661 case 0x03ad : out[k++]=0xa2; out[k++]=0xe6; break;
662 case 0x03b5 : out[k++]=0xe6; break;
663 case 0x0388 : out[k++]=0xa2; out[k++]=0xc6; break;
664 case 0x0395 : out[k++]=0xc6; break;
665 case 0x03b6 : out[k++]=0xe9; break;
666 case 0x0396 : out[k++]=0xc9; break;
667 case 0x03ae : out[k++]=0xa2; out[k++]=0xea; break;
668 case 0x03b7 : out[k++]=0xea; break;
669 case 0x0389 : out[k++]=0xa2; out[k++]=0xca; break;
670 case 0x0397 : out[k++]=0xca; break;
671 case 0x03b8 : out[k++]=0xeb; break;
672 case 0x0398 : out[k++]=0xcb; break;
673 case 0x0390 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xec; break;
674 case 0x03af : out[k++]=0xa2; out[k++]=0xec; break;
675 case 0x03ca : out[k++]=0xa3; out[k++]=0xec; break;
676 case 0x03b9 : out[k++]=0xec; break;
677 case 0x038a : out[k++]=0xa2; out[k++]=0xcc; break;
678 case 0x03aa : out[k++]=0xa3; out[k++]=0xcc; break;
679 case 0x0399 : out[k++]=0xcc; break;
680 case 0x03ba : out[k++]=0xed; break;
681 case 0x039a : out[k++]=0xcd; break;
682 case 0x03bb : out[k++]=0xee; break;
683 case 0x039b : out[k++]=0xce; break;
684 case 0x03bc : out[k++]=0xef; break;
685 case 0x039c : out[k++]=0xcf; break;
686 case 0x03bd : out[k++]=0xf0; break;
687 case 0x039d : out[k++]=0xd0; break;
688 case 0x03be : out[k++]=0xf1; break;
689 case 0x039e : out[k++]=0xd1; break;
690 case 0x03cc : out[k++]=0xa2; out[k++]=0xf2; break;
691 case 0x03bf : out[k++]=0xf2; break;
692 case 0x038c : out[k++]=0xa2; out[k++]=0xd2; break;
693 case 0x039f : out[k++]=0xd2; break;
694 case 0x03c0 : out[k++]=0xf3; break;
695 case 0x03a0 : out[k++]=0xd3; break;
696 case 0x03c1 : out[k++]=0xf5; break;
697 case 0x03a1 : out[k++]=0xd5; break;
698 case 0x03c2 : out[k++]=0xf7; break;
699 case 0x03c3 : out[k++]=0xf6; break;
700 case 0x03a3 : out[k++]=0xd6; break;
701 case 0x03c4 : out[k++]=0xf8; break;
702 case 0x03a4 : out[k++]=0xd8; break;
703 case 0x03b0 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xf9; break;
704 case 0x03cd : out[k++]=0xa2; out[k++]=0xf9; break;
705 case 0x03cb : out[k++]=0xa3; out[k++]=0xf9; break;
706 case 0x03c5 : out[k++]=0xf9; break;
707 case 0x038e : out[k++]=0xa2; out[k++]=0xd9; break;
708 case 0x03ab : out[k++]=0xa3; out[k++]=0xd9; break;
709 case 0x03a5 : out[k++]=0xd9; break;
710 case 0x03c6 : out[k++]=0xfa; break;
711 case 0x03a6 : out[k++]=0xda; break;
712 case 0x03c7 : out[k++]=0xfb; break;
713 case 0x03a7 : out[k++]=0xdb; break;
714 case 0x03c8 : out[k++]=0xfc; break;
715 case 0x03a8 : out[k++]=0xdc; break;
716 case 0x03ce : out[k++]=0xa2; out[k++]=0xfd; break;
717 case 0x03c9 : out[k++]=0xfd; break;
718 case 0x038f : out[k++]=0xa2; out[k++]=0xdd; break;
719 case 0x03a9 : out[k++]=0xdd; break;
723 cd->my_errno = YAZ_ICONV_EILSEQ;
734 static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
735 size_t inbytesleft, size_t *no_read)
743 while (inbytesleft > 0)
749 else if (*inp == 0x9e)
753 else if (*inp == 0x9f)
763 if (inbytesleft == 0)
765 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
993 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
994 char **outbuf, size_t *outbytesleft)
997 unsigned char *out = (unsigned char*) *outbuf;
998 if (*outbytesleft < 3)
1000 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
1001 return (size_t)(-1);
1005 case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
1006 case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
1007 case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
1008 case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
1009 case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
1010 case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
1011 case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
1012 case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
1013 case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
1014 case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
1015 case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
1016 case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
1017 case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
1018 case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
1019 case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
1020 case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
1021 case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
1022 case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
1023 case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
1024 case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
1025 case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
1026 case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
1027 case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
1028 case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
1029 case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
1030 case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
1031 case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
1032 case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
1033 case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
1034 case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
1035 case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
1036 case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
1037 case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
1038 case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
1039 case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
1040 case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
1041 case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
1042 case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
1043 case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
1044 case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
1045 case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
1046 case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
1047 case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
1048 case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
1049 case 0x03b1 : out[k++]=0x81; break;
1050 case 0x03b2 : out[k++]=0x82; break;
1051 case 0x03b3 : out[k++]=0x83; break;
1052 case 0x03b4 : out[k++]=0x84; break;
1053 case 0x03b5 : out[k++]=0x85; break;
1054 case 0x03b6 : out[k++]=0x86; break;
1055 case 0x03b7 : out[k++]=0x87; break;
1056 case 0x03b8 : out[k++]=0x88; break;
1057 case 0x03b9 : out[k++]=0x89; break;
1058 case 0x03ba : out[k++]=0x8a; break;
1059 case 0x03bb : out[k++]=0x8b; break;
1060 case 0x03bc : out[k++]=0x8c; break;
1061 case 0x03bd : out[k++]=0x8d; break;
1062 case 0x03be : out[k++]=0x8e; break;
1063 case 0x03bf : out[k++]=0x8f; break;
1064 case 0x03c0 : out[k++]=0x90; break;
1065 case 0x03c1 : out[k++]=0x91; break;
1066 case 0x03c2 : out[k++]=0x92; break;
1067 case 0x03c3 : out[k++]=0x93; break;
1068 case 0x03c4 : out[k++]=0x94; break;
1069 case 0x03c5 : out[k++]=0x95; break;
1070 case 0x03c6 : out[k++]=0x96; break;
1071 case 0x03c7 : out[k++]=0x96; break;
1072 case 0x03c8 : out[k++]=0x98; break;
1073 case 0x03c9 : out[k++]=0x99; break;
1077 cd->my_errno = YAZ_ICONV_EILSEQ;
1089 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
1090 size_t inbytesleft, size_t *no_read,
1093 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
1094 size_t inbytesleft, size_t *no_read)
1097 if (cd->comb_offset < cd->comb_size)
1099 *no_read = cd->comb_no_read[cd->comb_offset];
1100 x = cd->comb_x[cd->comb_offset];
1102 /* special case for double-diacritic combining characters,
1103 INVERTED BREVE and DOUBLE TILDE.
1104 We'll increment the no_read counter by 1, since we want to skip over
1105 the processing of the closing ligature character
1107 /* this code is no longer necessary.. our handlers code in
1108 yaz_marc8_?_conv (generated by charconv.tcl) now returns
1109 0 and no_read=1 when a sequence does not match the input.
1110 The SECOND HALFs in codetables.xml produces a non-existant
1111 entry in the conversion trie.. Hence when met, the input byte is
1112 skipped as it should (in yaz_iconv)
1115 if (x == 0x0361 || x == 0x0360)
1122 cd->comb_offset = 0;
1123 for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
1127 if (inbytesleft == 0 && cd->comb_size)
1129 cd->my_errno = YAZ_ICONV_EINVAL;
1134 x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
1137 cd->comb_x[cd->comb_size] = x;
1138 cd->comb_no_read[cd->comb_size] = *no_read;
1140 inbytesleft = inbytesleft - *no_read;
1145 static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
1146 size_t inbytesleft, size_t *no_read)
1148 unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
1149 if (x && cd->comb_size == 1)
1151 /* For MARC8s we try to get a Latin-1 page code out of it */
1153 for (i = 0; latin1_comb[i].x1; i++)
1154 if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
1156 *no_read += cd->comb_no_read[0];
1158 x = latin1_comb[i].y;
1165 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
1166 size_t inbytesleft, size_t *no_read,
1170 while (inbytesleft > 0 && *inp == 27)
1172 int *modep = &cd->g0_mode;
1173 size_t inbytesleft0 = inbytesleft;
1177 if (inbytesleft == 0)
1179 if (*inp == '$') /* set with multiple bytes */
1184 if (inbytesleft == 0)
1186 if (*inp == '(' || *inp == ',') /* G0 */
1191 else if (*inp == ')' || *inp == '-') /* G1 */
1195 modep = &cd->g1_mode;
1197 if (inbytesleft == 0)
1199 if (*inp == '!') /* ANSEL is a special case */
1204 if (inbytesleft == 0)
1206 *modep = *inp++; /* Final character */
1209 (*no_read) += inbytesleft0 - inbytesleft;
1211 if (inbytesleft == 0)
1213 else if (*inp == ' ')
1221 size_t no_read_sub = 0;
1222 int mode = *inp < 128 ? cd->g0_mode : cd->g1_mode;
1227 case 'B': /* Basic ASCII */
1228 case 's': /* ASCII */
1229 case 'E': /* ANSEL */
1230 x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb);
1234 x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb);
1237 case 'g': /* Greek */
1238 x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb);
1240 case 'b': /* Subscripts */
1241 x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb);
1243 case 'p': /* Superscripts */
1244 x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb);
1246 case '2': /* Basic Hebrew */
1247 x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb);
1249 case 'N': /* Basic Cyrillic */
1250 x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1252 case 'Q': /* Extended Cyrillic */
1253 x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb);
1255 case '3': /* Basic Arabic */
1256 x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb);
1258 case '4': /* Extended Arabic */
1259 x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb);
1261 case 'S': /* Greek */
1262 x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb);
1264 case '1': /* Chinese, Japanese, Korean (EACC) */
1265 x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb);
1269 cd->my_errno = YAZ_ICONV_EILSEQ;
1272 *no_read += no_read_sub;
1277 cd->my_errno = YAZ_ICONV_EINVAL;
1281 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
1282 char **outbuf, size_t *outbytesleft)
1284 return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
1287 size_t yaz_write_UTF8_char(unsigned long x,
1288 char **outbuf, size_t *outbytesleft,
1291 unsigned char *outp = (unsigned char *) *outbuf;
1293 if (x <= 0x7f && *outbytesleft >= 1)
1295 *outp++ = (unsigned char) x;
1298 else if (x <= 0x7ff && *outbytesleft >= 2)
1300 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
1301 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1302 (*outbytesleft) -= 2;
1304 else if (x <= 0xffff && *outbytesleft >= 3)
1306 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
1307 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1308 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1309 (*outbytesleft) -= 3;
1311 else if (x <= 0x1fffff && *outbytesleft >= 4)
1313 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
1314 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1315 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1316 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1317 (*outbytesleft) -= 4;
1319 else if (x <= 0x3ffffff && *outbytesleft >= 5)
1321 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
1322 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1323 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1324 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1325 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1326 (*outbytesleft) -= 5;
1328 else if (*outbytesleft >= 6)
1330 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
1331 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
1332 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1333 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1334 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1335 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1336 (*outbytesleft) -= 6;
1340 *error = YAZ_ICONV_E2BIG; /* not room for output */
1341 return (size_t)(-1);
1343 *outbuf = (char *) outp;
1347 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
1348 char **outbuf, size_t *outbytesleft)
1350 /* list of two char unicode sequence that, when combined, are
1351 equivalent to single unicode chars that can be represented in
1353 Regular iconv on Linux at least does not seem to convert these,
1354 but since MARC-8 to UTF-8 generates these composed sequence
1355 we get a better chance of a successful MARC-8 -> ISO-8859-1
1357 unsigned char *outp = (unsigned char *) *outbuf;
1359 if (cd->compose_char)
1362 for (i = 0; latin1_comb[i].x1; i++)
1363 if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
1365 x = latin1_comb[i].y;
1368 if (*outbytesleft < 1)
1369 { /* no room. Retain compose_char and bail out */
1370 cd->my_errno = YAZ_ICONV_E2BIG;
1371 return (size_t)(-1);
1373 if (!latin1_comb[i].x1)
1374 { /* not found. Just write compose_char */
1375 *outp++ = (unsigned char) cd->compose_char;
1377 *outbuf = (char *) outp;
1379 /* compose_char used so reset it. x now holds current char */
1380 cd->compose_char = 0;
1383 if (x > 32 && x < 127 && cd->compose_char == 0)
1385 cd->compose_char = x;
1388 else if (x > 255 || x < 1)
1390 cd->my_errno = YAZ_ICONV_EILSEQ;
1393 else if (*outbytesleft < 1)
1395 cd->my_errno = YAZ_ICONV_E2BIG;
1396 return (size_t)(-1);
1398 *outp++ = (unsigned char) x;
1400 *outbuf = (char *) outp;
1404 static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
1405 char **outbuf, size_t *outbytesleft)
1407 if (cd->compose_char)
1409 unsigned char *outp = (unsigned char *) *outbuf;
1410 if (*outbytesleft < 1)
1412 cd->my_errno = YAZ_ICONV_E2BIG;
1413 return (size_t)(-1);
1415 *outp++ = (unsigned char) cd->compose_char;
1417 *outbuf = (char *) outp;
1418 cd->compose_char = 0;
1423 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
1424 char **outbuf, size_t *outbytesleft)
1426 unsigned char *outp = (unsigned char *) *outbuf;
1427 if (*outbytesleft >= 4)
1429 *outp++ = (unsigned char) (x>>24);
1430 *outp++ = (unsigned char) (x>>16);
1431 *outp++ = (unsigned char) (x>>8);
1432 *outp++ = (unsigned char) x;
1433 (*outbytesleft) -= 4;
1437 cd->my_errno = YAZ_ICONV_E2BIG;
1438 return (size_t)(-1);
1440 *outbuf = (char *) outp;
1444 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
1445 char **outbuf, size_t *outbytesleft)
1447 unsigned char *outp = (unsigned char *) *outbuf;
1448 if (*outbytesleft >= 4)
1450 *outp++ = (unsigned char) x;
1451 *outp++ = (unsigned char) (x>>8);
1452 *outp++ = (unsigned char) (x>>16);
1453 *outp++ = (unsigned char) (x>>24);
1454 (*outbytesleft) -= 4;
1458 cd->my_errno = YAZ_ICONV_E2BIG;
1459 return (size_t)(-1);
1461 *outbuf = (char *) outp;
1465 static unsigned long lookup_marc8(yaz_iconv_t cd,
1466 unsigned long x, int *comb,
1467 const char **page_chr)
1470 char *utf8_outbuf = utf8_buf;
1471 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
1473 r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
1474 if (r == (size_t)(-1))
1476 cd->my_errno = YAZ_ICONV_EILSEQ;
1482 size_t inbytesleft, no_read_sub = 0;
1485 *utf8_outbuf = '\0';
1486 inp = (unsigned char *) utf8_buf;
1487 inbytesleft = strlen(utf8_buf);
1489 x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb);
1492 *page_chr = ESC "(B";
1495 x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb);
1498 *page_chr = ESC "(B";
1501 x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb);
1504 *page_chr = ESC "b";
1507 x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb);
1510 *page_chr = ESC "p";
1513 x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb);
1516 *page_chr = ESC "(2";
1519 x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1522 *page_chr = ESC "(N";
1525 x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb);
1528 *page_chr = ESC "(Q";
1531 x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb);
1534 *page_chr = ESC "(3";
1537 x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb);
1540 *page_chr = ESC "(4";
1543 x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb);
1546 *page_chr = ESC "(S";
1549 x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb);
1552 *page_chr = ESC "$1";
1555 cd->my_errno = YAZ_ICONV_EILSEQ;
1560 static size_t flush_combos(yaz_iconv_t cd,
1561 char **outbuf, size_t *outbytesleft)
1563 unsigned long y = cd->write_marc8_last;
1571 assert(cd->write_marc8_lpage);
1572 if (cd->write_marc8_lpage)
1574 size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft,
1575 cd->write_marc8_lpage);
1580 byte = (unsigned char )((y>>16) & 0xff);
1582 out_buf[out_no++] = byte;
1583 byte = (unsigned char)((y>>8) & 0xff);
1585 out_buf[out_no++] = byte;
1586 byte = (unsigned char )(y & 0xff);
1588 out_buf[out_no++] = byte;
1590 if (out_no + 2 >= *outbytesleft)
1592 cd->my_errno = YAZ_ICONV_E2BIG;
1593 return (size_t) (-1);
1596 memcpy(*outbuf, out_buf, out_no);
1598 (*outbytesleft) -= out_no;
1599 if (cd->write_marc8_second_half_char)
1601 *(*outbuf)++ = cd->write_marc8_second_half_char;
1605 cd->write_marc8_last = 0;
1606 cd->write_marc8_lpage = 0;
1607 cd->write_marc8_second_half_char = 0;
1611 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
1612 char **outbuf, size_t *outbytesleft,
1613 const char *page_chr)
1615 const char **old_page_chr = &cd->write_marc8_g0;
1617 /* are we going to a G1-set (such as such as ESC ")!E") */
1618 if (page_chr && page_chr[1] == ')')
1619 old_page_chr = &cd->write_marc8_g1;
1621 if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
1624 const char *page_out = page_chr;
1626 if (*outbytesleft < 8)
1628 cd->my_errno = YAZ_ICONV_E2BIG;
1630 return (size_t) (-1);
1635 if (!strcmp(*old_page_chr, ESC "p")
1636 || !strcmp(*old_page_chr, ESC "g")
1637 || !strcmp(*old_page_chr, ESC "b"))
1640 /* Technique 1 leave */
1641 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
1643 /* Must leave script + enter new page */
1644 plen = strlen(page_out);
1645 memcpy(*outbuf, page_out, plen);
1647 (*outbytesleft) -= plen;
1648 page_out = ESC "(B";
1652 *old_page_chr = page_chr;
1653 plen = strlen(page_out);
1654 memcpy(*outbuf, page_out, plen);
1656 (*outbytesleft) -= plen;
1662 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
1663 char **outbuf, size_t *outbytesleft)
1666 const char *page_chr = 0;
1667 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
1670 return (size_t) (-1);
1676 size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft,
1682 cd->write_marc8_second_half_char = 0xEC;
1683 else if (x == 0x0360)
1684 cd->write_marc8_second_half_char = 0xFB;
1686 if (*outbytesleft <= 1)
1688 cd->my_errno = YAZ_ICONV_E2BIG;
1689 return (size_t) (-1);
1696 size_t r = flush_combos(cd, outbuf, outbytesleft);
1700 cd->write_marc8_last = y;
1701 cd->write_marc8_lpage = page_chr;
1706 static size_t yaz_flush_marc8(yaz_iconv_t cd,
1707 char **outbuf, size_t *outbytesleft)
1709 size_t r = flush_combos(cd, outbuf, outbytesleft);
1712 cd->write_marc8_g1 = 0;
1713 return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, ESC "(B");
1716 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
1717 char **outbuf, size_t *outbytesleft)
1720 for (i = 0; latin1_comb[i].x1; i++)
1722 if (x == latin1_comb[i].y)
1725 /* save the output pointers .. */
1726 char *outbuf0 = *outbuf;
1727 size_t outbytesleft0 = *outbytesleft;
1728 int last_ch = cd->write_marc8_last;
1729 const char *lpage = cd->write_marc8_lpage;
1731 r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
1732 outbuf, outbytesleft);
1735 r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
1736 outbuf, outbytesleft);
1737 if (r && cd->my_errno == YAZ_ICONV_E2BIG)
1739 /* not enough room. reset output to original values */
1741 *outbytesleft = outbytesleft0;
1742 cd->write_marc8_last = last_ch;
1743 cd->write_marc8_lpage = lpage;
1748 return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
1753 static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
1754 char **outbuf, size_t *outbytesleft)
1756 unsigned char *outp = (unsigned char *) *outbuf;
1758 if (*outbytesleft >= sizeof(wchar_t))
1761 memcpy(outp, &wch, sizeof(wch));
1762 outp += sizeof(wch);
1763 (*outbytesleft) -= sizeof(wch);
1767 cd->my_errno = YAZ_ICONV_E2BIG;
1768 return (size_t)(-1);
1770 *outbuf = (char *) outp;
1775 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
1777 return cd->read_handle && cd->write_handle;
1780 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
1782 yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
1784 cd->write_handle = 0;
1785 cd->read_handle = 0;
1786 cd->init_handle = 0;
1787 cd->flush_handle = 0;
1788 cd->my_errno = YAZ_ICONV_UNKNOWN;
1790 /* a useful hack: if fromcode has leading @,
1791 the library not use YAZ's own conversions .. */
1792 if (fromcode[0] == '@')
1796 if (!yaz_matchstr(fromcode, "UTF8"))
1798 cd->read_handle = yaz_read_UTF8;
1799 cd->init_handle = yaz_init_UTF8;
1801 else if (!yaz_matchstr(fromcode, "ISO88591"))
1802 cd->read_handle = yaz_read_ISO8859_1;
1803 else if (!yaz_matchstr(fromcode, "UCS4"))
1804 cd->read_handle = yaz_read_UCS4;
1805 else if (!yaz_matchstr(fromcode, "UCS4LE"))
1806 cd->read_handle = yaz_read_UCS4LE;
1807 else if (!yaz_matchstr(fromcode, "MARC8"))
1808 cd->read_handle = yaz_read_marc8;
1809 else if (!yaz_matchstr(fromcode, "MARC8s"))
1810 cd->read_handle = yaz_read_marc8s;
1811 else if (!yaz_matchstr(fromcode, "advancegreek"))
1812 cd->read_handle = yaz_read_advancegreek;
1813 else if (!yaz_matchstr(fromcode, "iso54281984"))
1814 cd->read_handle = yaz_read_iso5428_1984;
1815 else if (!yaz_matchstr(fromcode, "iso5428:1984"))
1816 cd->read_handle = yaz_read_iso5428_1984;
1818 else if (!yaz_matchstr(fromcode, "WCHAR_T"))
1819 cd->read_handle = yaz_read_wchar_t;
1822 if (!yaz_matchstr(tocode, "UTF8"))
1823 cd->write_handle = yaz_write_UTF8;
1824 else if (!yaz_matchstr(tocode, "ISO88591"))
1826 cd->write_handle = yaz_write_ISO8859_1;
1827 cd->flush_handle = yaz_flush_ISO8859_1;
1829 else if (!yaz_matchstr (tocode, "UCS4"))
1830 cd->write_handle = yaz_write_UCS4;
1831 else if (!yaz_matchstr(tocode, "UCS4LE"))
1832 cd->write_handle = yaz_write_UCS4LE;
1833 else if (!yaz_matchstr(tocode, "MARC8"))
1835 cd->write_handle = yaz_write_marc8;
1836 cd->flush_handle = yaz_flush_marc8;
1838 else if (!yaz_matchstr(tocode, "MARC8s"))
1840 cd->write_handle = yaz_write_marc8;
1841 cd->flush_handle = yaz_flush_marc8;
1843 else if (!yaz_matchstr(tocode, "advancegreek"))
1845 cd->write_handle = yaz_write_advancegreek;
1847 else if (!yaz_matchstr(tocode, "iso54281984"))
1849 cd->write_handle = yaz_write_iso5428_1984;
1851 else if (!yaz_matchstr(tocode, "iso5428:1984"))
1853 cd->write_handle = yaz_write_iso5428_1984;
1856 else if (!yaz_matchstr(tocode, "WCHAR_T"))
1857 cd->write_handle = yaz_write_wchar_t;
1862 if (!cd->read_handle || !cd->write_handle)
1864 cd->iconv_cd = iconv_open (tocode, fromcode);
1865 if (cd->iconv_cd == (iconv_t) (-1))
1872 if (!cd->read_handle || !cd->write_handle)
1882 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
1883 char **outbuf, size_t *outbytesleft)
1892 iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
1893 if (r == (size_t)(-1))
1895 switch (yaz_errno())
1898 cd->my_errno = YAZ_ICONV_E2BIG;
1901 cd->my_errno = YAZ_ICONV_EINVAL;
1904 cd->my_errno = YAZ_ICONV_EILSEQ;
1907 cd->my_errno = YAZ_ICONV_UNKNOWN;
1919 cd->my_errno = YAZ_ICONV_UNKNOWN;
1923 cd->comb_offset = cd->comb_size = 0;
1924 cd->compose_char = 0;
1926 cd->write_marc8_second_half_char = 0;
1927 cd->write_marc8_last = 0;
1928 cd->write_marc8_lpage = 0;
1929 cd->write_marc8_g0 = ESC "(B";
1930 cd->write_marc8_g1 = 0;
1938 if (cd->init_handle && inbuf && *inbuf)
1941 size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
1942 *inbytesleft, &no_read);
1945 if (cd->my_errno == YAZ_ICONV_EINVAL)
1950 *inbytesleft -= no_read;
1956 if (!inbuf || !*inbuf)
1958 if (outbuf && *outbuf)
1961 r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
1962 if (cd->flush_handle)
1963 r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
1978 no_read = cd->no_read_x;
1982 if (*inbytesleft == 0)
1984 r = *inbuf - inbuf0;
1987 x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
1997 r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
2000 /* unable to write it. save it because read_handle cannot
2002 if (cd->my_errno == YAZ_ICONV_E2BIG)
2005 cd->no_read_x = no_read;
2011 *inbytesleft -= no_read;
2012 (*inbuf) += no_read;
2017 int yaz_iconv_error (yaz_iconv_t cd)
2019 return cd->my_errno;
2022 int yaz_iconv_close (yaz_iconv_t cd)
2026 iconv_close (cd->iconv_cd);
2035 * indent-tabs-mode: nil
2037 * vim: shiftwidth=4 tabstop=8 expandtab