2 * Copyright (C) 1995-2007, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: siconv.c,v 1.46 2007-10-05 15:11:36 adam Exp $
9 * \brief Implements simple ICONV
11 * This implements an interface similar to that of iconv and
12 * is used by YAZ to interface with iconv (if present).
13 * For systems where iconv is not present, this layer
14 * provides a few important conversions: UTF-8, MARC-8, Latin-1.
17 * http://www.loc.gov/marc/specifications/speccharmarc8.html
37 #include <yaz/yaz-util.h>
39 unsigned long yaz_marc8_42_conv(unsigned char *inp, size_t inbytesleft,
40 size_t *no_read, int *combining);
41 unsigned long yaz_marc8_45_conv(unsigned char *inp, size_t inbytesleft,
42 size_t *no_read, int *combining);
43 unsigned long yaz_marc8_67_conv(unsigned char *inp, size_t inbytesleft,
44 size_t *no_read, int *combining);
45 unsigned long yaz_marc8_62_conv(unsigned char *inp, size_t inbytesleft,
46 size_t *no_read, int *combining);
47 unsigned long yaz_marc8_70_conv(unsigned char *inp, size_t inbytesleft,
48 size_t *no_read, int *combining);
49 unsigned long yaz_marc8_32_conv(unsigned char *inp, size_t inbytesleft,
50 size_t *no_read, int *combining);
51 unsigned long yaz_marc8_4E_conv(unsigned char *inp, size_t inbytesleft,
52 size_t *no_read, int *combining);
53 unsigned long yaz_marc8_51_conv(unsigned char *inp, size_t inbytesleft,
54 size_t *no_read, int *combining);
55 unsigned long yaz_marc8_33_conv(unsigned char *inp, size_t inbytesleft,
56 size_t *no_read, int *combining);
57 unsigned long yaz_marc8_34_conv(unsigned char *inp, size_t inbytesleft,
58 size_t *no_read, int *combining);
59 unsigned long yaz_marc8_53_conv(unsigned char *inp, size_t inbytesleft,
60 size_t *no_read, int *combining);
61 unsigned long yaz_marc8_31_conv(unsigned char *inp, size_t inbytesleft,
62 size_t *no_read, int *combining);
65 unsigned long yaz_marc8r_42_conv(unsigned char *inp, size_t inbytesleft,
66 size_t *no_read, int *combining);
67 unsigned long yaz_marc8r_45_conv(unsigned char *inp, size_t inbytesleft,
68 size_t *no_read, int *combining);
69 unsigned long yaz_marc8r_67_conv(unsigned char *inp, size_t inbytesleft,
70 size_t *no_read, int *combining);
71 unsigned long yaz_marc8r_62_conv(unsigned char *inp, size_t inbytesleft,
72 size_t *no_read, int *combining);
73 unsigned long yaz_marc8r_70_conv(unsigned char *inp, size_t inbytesleft,
74 size_t *no_read, int *combining);
75 unsigned long yaz_marc8r_32_conv(unsigned char *inp, size_t inbytesleft,
76 size_t *no_read, int *combining);
77 unsigned long yaz_marc8r_4E_conv(unsigned char *inp, size_t inbytesleft,
78 size_t *no_read, int *combining);
79 unsigned long yaz_marc8r_51_conv(unsigned char *inp, size_t inbytesleft,
80 size_t *no_read, int *combining);
81 unsigned long yaz_marc8r_33_conv(unsigned char *inp, size_t inbytesleft,
82 size_t *no_read, int *combining);
83 unsigned long yaz_marc8r_34_conv(unsigned char *inp, size_t inbytesleft,
84 size_t *no_read, int *combining);
85 unsigned long yaz_marc8r_53_conv(unsigned char *inp, size_t inbytesleft,
86 size_t *no_read, int *combining);
87 unsigned long yaz_marc8r_31_conv(unsigned char *inp, size_t inbytesleft,
88 size_t *no_read, int *combining);
90 struct yaz_iconv_struct {
93 size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
94 size_t inbytesleft, size_t *no_read);
95 unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
96 size_t inbytesleft, size_t *no_read);
97 size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
98 char **outbuf, size_t *outbytesleft);
99 size_t (*flush_handle)(yaz_iconv_t cd,
100 char **outbuf, size_t *outbytesleft);
105 unsigned long comb_x[8];
106 size_t comb_no_read[8];
108 unsigned long unget_x;
112 unsigned long compose_char;
114 unsigned long write_marc8_comb_ch[8];
115 size_t write_marc8_comb_no;
116 unsigned write_marc8_second_half_char;
117 unsigned long write_marc8_last;
118 const char *write_marc8_page_chr;
122 unsigned long x1, x2;
125 { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
126 { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
127 { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
128 { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
129 { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
130 { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
131 /* no need for 0xc6 LATIN CAPITAL LETTER AE */
132 { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
133 { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
134 { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
135 { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
136 { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
137 { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
138 { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
139 { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
140 { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
141 { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
142 { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
143 { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
144 { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
145 { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
146 { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
147 /* omitted: 0xd7 MULTIPLICATION SIGN */
148 /* omitted: 0xd8 LATIN CAPITAL LETTER O WITH STROKE */
149 { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
150 { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
151 { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
152 { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
153 { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
154 /* omitted: 0xde LATIN CAPITAL LETTER THORN */
155 /* omitted: 0xdf LATIN SMALL LETTER SHARP S */
156 { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
157 { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
158 { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
159 { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
160 { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
161 { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
162 /* omitted: 0xe6 LATIN SMALL LETTER AE */
163 { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
164 { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
165 { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
166 { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
167 { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
168 { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
169 { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
170 { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
171 { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
172 /* omitted: 0xf0 LATIN SMALL LETTER ETH */
173 { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
174 { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
175 { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
176 { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
177 { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
178 { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
179 /* omitted: 0xf7 DIVISION SIGN */
180 /* omitted: 0xf8 LATIN SMALL LETTER O WITH STROKE */
181 { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
182 { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
183 { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
184 { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
185 { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
186 /* omitted: 0xfe LATIN SMALL LETTER THORN */
187 { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
192 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
193 size_t inbytesleft, size_t *no_read)
195 unsigned long x = inp[0];
201 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
202 size_t inbytesleft, size_t *no_read)
211 cd->my_errno = YAZ_ICONV_EINVAL;
214 if (inp[1] != 0xbb && inp[2] == 0xbf)
221 unsigned long yaz_read_UTF8_char(unsigned char *inp,
222 size_t inbytesleft, size_t *no_read,
227 *no_read = 0; /* by default */
233 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
235 *error = YAZ_ICONV_EILSEQ;
237 else if (inp[0] <= 0xdf && inbytesleft >= 2)
239 if ((inp[1] & 0xc0) == 0x80)
241 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
245 *error = YAZ_ICONV_EILSEQ;
248 *error = YAZ_ICONV_EILSEQ;
250 else if (inp[0] <= 0xef && inbytesleft >= 3)
252 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
254 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
259 *error = YAZ_ICONV_EILSEQ;
262 *error = YAZ_ICONV_EILSEQ;
264 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
266 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
267 && (inp[3] & 0xc0) == 0x80)
269 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
270 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
274 *error = YAZ_ICONV_EILSEQ;
277 *error = YAZ_ICONV_EILSEQ;
279 else if (inp[0] <= 0xfb && inbytesleft >= 5)
281 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
282 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
284 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
285 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
290 *error = YAZ_ICONV_EILSEQ;
293 *error = YAZ_ICONV_EILSEQ;
295 else if (inp[0] <= 0xfd && inbytesleft >= 6)
297 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
298 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
299 && (inp[5] & 0xc0) == 0x80)
301 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
302 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
303 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
307 *error = YAZ_ICONV_EILSEQ;
310 *error = YAZ_ICONV_EILSEQ;
313 *error = YAZ_ICONV_EINVAL; /* incomplete sentence */
318 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
319 size_t inbytesleft, size_t *no_read)
321 return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
324 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
325 size_t inbytesleft, size_t *no_read)
331 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
336 x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
342 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
343 size_t inbytesleft, size_t *no_read)
349 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
354 x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
361 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
362 size_t inbytesleft, size_t *no_read)
366 if (inbytesleft < sizeof(wchar_t))
368 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
374 memcpy (&wch, inp, sizeof(wch));
376 *no_read = sizeof(wch);
382 static unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
383 size_t inbytesleft, size_t *no_read)
390 while (inbytesleft > 0)
396 else if (*inp == 0xa3)
406 if (inbytesleft == 0)
408 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
413 case 0xe1: /* alpha small */
419 case 0xc1: /* alpha capital */
426 case 0xe2: /* Beta small */
429 case 0xc2: /* Beta capital */
433 case 0xe4: /* Gamma small */
436 case 0xc4: /* Gamma capital */
440 case 0xe5: /* Delta small */
443 case 0xc5: /* Delta capital */
446 case 0xe6: /* epsilon small */
452 case 0xc6: /* epsilon capital */
458 case 0xe9: /* Zeta small */
461 case 0xc9: /* Zeta capital */
464 case 0xea: /* Eta small */
470 case 0xca: /* Eta capital */
476 case 0xeb: /* Theta small */
479 case 0xcb: /* Theta capital */
482 case 0xec: /* Iota small */
494 case 0xcc: /* Iota capital */
503 case 0xed: /* Kappa small */
506 case 0xcd: /* Kappa capital */
509 case 0xee: /* Lambda small */
512 case 0xce: /* Lambda capital */
515 case 0xef: /* Mu small */
518 case 0xcf: /* Mu capital */
521 case 0xf0: /* Nu small */
524 case 0xd0: /* Nu capital */
527 case 0xf1: /* Xi small */
530 case 0xd1: /* Xi capital */
533 case 0xf2: /* Omicron small */
539 case 0xd2: /* Omicron capital */
545 case 0xf3: /* Pi small */
548 case 0xd3: /* Pi capital */
551 case 0xf5: /* Rho small */
554 case 0xd5: /* Rho capital */
557 case 0xf7: /* Sigma small (end of words) */
560 case 0xf6: /* Sigma small */
563 case 0xd6: /* Sigma capital */
566 case 0xf8: /* Tau small */
569 case 0xd8: /* Tau capital */
572 case 0xf9: /* Upsilon small */
584 case 0xd9: /* Upsilon capital */
593 case 0xfa: /* Phi small */
596 case 0xda: /* Phi capital */
599 case 0xfb: /* Chi small */
602 case 0xdb: /* Chi capital */
605 case 0xfc: /* Psi small */
608 case 0xdc: /* Psi capital */
611 case 0xfd: /* Omega small */
617 case 0xdd: /* Omega capital */
632 static size_t yaz_write_iso5428_1984(yaz_iconv_t cd, unsigned long x,
633 char **outbuf, size_t *outbytesleft)
636 unsigned char *out = (unsigned char*) *outbuf;
637 if (*outbytesleft < 3)
639 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
644 case 0x03ac : out[k++]=0xa2; out[k++]=0xe1; break;
645 case 0x03b1 : out[k++]=0xe1; break;
646 case 0x0386 : out[k++]=0xa2; out[k++]=0xc1; break;
647 case 0x0391 : out[k++]=0xc1; break;
648 case 0x03b2 : out[k++]=0xe2; break;
649 case 0x0392 : out[k++]=0xc2; break;
650 case 0x03b3 : out[k++]=0xe4; break;
651 case 0x0393 : out[k++]=0xc4; break;
652 case 0x03b4 : out[k++]=0xe5; break;
653 case 0x0394 : out[k++]=0xc5; break;
654 case 0x03ad : out[k++]=0xa2; out[k++]=0xe6; break;
655 case 0x03b5 : out[k++]=0xe6; break;
656 case 0x0388 : out[k++]=0xa2; out[k++]=0xc6; break;
657 case 0x0395 : out[k++]=0xc6; break;
658 case 0x03b6 : out[k++]=0xe9; break;
659 case 0x0396 : out[k++]=0xc9; break;
660 case 0x03ae : out[k++]=0xa2; out[k++]=0xea; break;
661 case 0x03b7 : out[k++]=0xea; break;
662 case 0x0389 : out[k++]=0xa2; out[k++]=0xca; break;
663 case 0x0397 : out[k++]=0xca; break;
664 case 0x03b8 : out[k++]=0xeb; break;
665 case 0x0398 : out[k++]=0xcb; break;
666 case 0x0390 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xec; break;
667 case 0x03af : out[k++]=0xa2; out[k++]=0xec; break;
668 case 0x03ca : out[k++]=0xa3; out[k++]=0xec; break;
669 case 0x03b9 : out[k++]=0xec; break;
670 case 0x038a : out[k++]=0xa2; out[k++]=0xcc; break;
671 case 0x03aa : out[k++]=0xa3; out[k++]=0xcc; break;
672 case 0x0399 : out[k++]=0xcc; break;
673 case 0x03ba : out[k++]=0xed; break;
674 case 0x039a : out[k++]=0xcd; break;
675 case 0x03bb : out[k++]=0xee; break;
676 case 0x039b : out[k++]=0xce; break;
677 case 0x03bc : out[k++]=0xef; break;
678 case 0x039c : out[k++]=0xcf; break;
679 case 0x03bd : out[k++]=0xf0; break;
680 case 0x039d : out[k++]=0xd0; break;
681 case 0x03be : out[k++]=0xf1; break;
682 case 0x039e : out[k++]=0xd1; break;
683 case 0x03cc : out[k++]=0xa2; out[k++]=0xf2; break;
684 case 0x03bf : out[k++]=0xf2; break;
685 case 0x038c : out[k++]=0xa2; out[k++]=0xd2; break;
686 case 0x039f : out[k++]=0xd2; break;
687 case 0x03c0 : out[k++]=0xf3; break;
688 case 0x03a0 : out[k++]=0xd3; break;
689 case 0x03c1 : out[k++]=0xf5; break;
690 case 0x03a1 : out[k++]=0xd5; break;
691 case 0x03c2 : out[k++]=0xf7; break;
692 case 0x03c3 : out[k++]=0xf6; break;
693 case 0x03a3 : out[k++]=0xd6; break;
694 case 0x03c4 : out[k++]=0xf8; break;
695 case 0x03a4 : out[k++]=0xd8; break;
696 case 0x03b0 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xf9; break;
697 case 0x03cd : out[k++]=0xa2; out[k++]=0xf9; break;
698 case 0x03cb : out[k++]=0xa3; out[k++]=0xf9; break;
699 case 0x03c5 : out[k++]=0xf9; break;
700 case 0x038e : out[k++]=0xa2; out[k++]=0xd9; break;
701 case 0x03ab : out[k++]=0xa3; out[k++]=0xd9; break;
702 case 0x03a5 : out[k++]=0xd9; break;
703 case 0x03c6 : out[k++]=0xfa; break;
704 case 0x03a6 : out[k++]=0xda; break;
705 case 0x03c7 : out[k++]=0xfb; break;
706 case 0x03a7 : out[k++]=0xdb; break;
707 case 0x03c8 : out[k++]=0xfc; break;
708 case 0x03a8 : out[k++]=0xdc; break;
709 case 0x03ce : out[k++]=0xa2; out[k++]=0xfd; break;
710 case 0x03c9 : out[k++]=0xfd; break;
711 case 0x038f : out[k++]=0xa2; out[k++]=0xdd; break;
712 case 0x03a9 : out[k++]=0xdd; break;
716 cd->my_errno = YAZ_ICONV_EILSEQ;
727 static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
728 size_t inbytesleft, size_t *no_read)
736 while (inbytesleft > 0)
742 else if (*inp == 0x9e)
746 else if (*inp == 0x9f)
756 if (inbytesleft == 0)
758 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
986 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
987 char **outbuf, size_t *outbytesleft)
990 unsigned char *out = (unsigned char*) *outbuf;
991 if (*outbytesleft < 3)
993 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
998 case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
999 case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
1000 case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
1001 case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
1002 case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
1003 case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
1004 case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
1005 case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
1006 case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
1007 case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
1008 case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
1009 case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
1010 case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
1011 case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
1012 case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
1013 case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
1014 case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
1015 case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
1016 case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
1017 case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
1018 case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
1019 case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
1020 case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
1021 case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
1022 case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
1023 case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
1024 case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
1025 case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
1026 case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
1027 case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
1028 case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
1029 case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
1030 case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
1031 case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
1032 case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
1033 case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
1034 case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
1035 case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
1036 case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
1037 case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
1038 case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
1039 case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
1040 case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
1041 case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
1042 case 0x03b1 : out[k++]=0x81; break;
1043 case 0x03b2 : out[k++]=0x82; break;
1044 case 0x03b3 : out[k++]=0x83; break;
1045 case 0x03b4 : out[k++]=0x84; break;
1046 case 0x03b5 : out[k++]=0x85; break;
1047 case 0x03b6 : out[k++]=0x86; break;
1048 case 0x03b7 : out[k++]=0x87; break;
1049 case 0x03b8 : out[k++]=0x88; break;
1050 case 0x03b9 : out[k++]=0x89; break;
1051 case 0x03ba : out[k++]=0x8a; break;
1052 case 0x03bb : out[k++]=0x8b; break;
1053 case 0x03bc : out[k++]=0x8c; break;
1054 case 0x03bd : out[k++]=0x8d; break;
1055 case 0x03be : out[k++]=0x8e; break;
1056 case 0x03bf : out[k++]=0x8f; break;
1057 case 0x03c0 : out[k++]=0x90; break;
1058 case 0x03c1 : out[k++]=0x91; break;
1059 case 0x03c2 : out[k++]=0x92; break;
1060 case 0x03c3 : out[k++]=0x93; break;
1061 case 0x03c4 : out[k++]=0x94; break;
1062 case 0x03c5 : out[k++]=0x95; break;
1063 case 0x03c6 : out[k++]=0x96; break;
1064 case 0x03c7 : out[k++]=0x96; break;
1065 case 0x03c8 : out[k++]=0x98; break;
1066 case 0x03c9 : out[k++]=0x99; break;
1070 cd->my_errno = YAZ_ICONV_EILSEQ;
1082 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
1083 size_t inbytesleft, size_t *no_read,
1086 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
1087 size_t inbytesleft, size_t *no_read)
1090 if (cd->comb_offset < cd->comb_size)
1092 *no_read = cd->comb_no_read[cd->comb_offset];
1093 x = cd->comb_x[cd->comb_offset];
1095 /* special case for double-diacritic combining characters,
1096 INVERTED BREVE and DOUBLE TILDE.
1097 We'll increment the no_read counter by 1, since we want to skip over
1098 the processing of the closing ligature character
1100 /* this code is no longer necessary.. our handlers code in
1101 yaz_marc8_?_conv (generated by charconv.tcl) now returns
1102 0 and no_read=1 when a sequence does not match the input.
1103 The SECOND HALFs in codetables.xml produces a non-existant
1104 entry in the conversion trie.. Hence when met, the input byte is
1105 skipped as it should (in yaz_iconv)
1108 if (x == 0x0361 || x == 0x0360)
1115 cd->comb_offset = 0;
1116 for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
1120 if (inbytesleft == 0 && cd->comb_size)
1122 cd->my_errno = YAZ_ICONV_EINVAL;
1127 x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
1130 cd->comb_x[cd->comb_size] = x;
1131 cd->comb_no_read[cd->comb_size] = *no_read;
1133 inbytesleft = inbytesleft - *no_read;
1138 static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
1139 size_t inbytesleft, size_t *no_read)
1141 unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
1142 if (x && cd->comb_size == 1)
1144 /* For MARC8s we try to get a Latin-1 page code out of it */
1146 for (i = 0; latin1_comb[i].x1; i++)
1147 if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
1149 *no_read += cd->comb_no_read[0];
1151 x = latin1_comb[i].y;
1158 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
1159 size_t inbytesleft, size_t *no_read,
1163 while(inbytesleft >= 1 && inp[0] == 27)
1165 size_t inbytesleft0 = inbytesleft;
1168 while(inbytesleft > 0 && strchr("(,$!)-", *inp))
1173 if (inbytesleft <= 0)
1176 cd->my_errno = YAZ_ICONV_EINVAL;
1179 cd->marc8_esc_mode = *inp++;
1181 (*no_read) += inbytesleft0 - inbytesleft;
1183 if (inbytesleft <= 0)
1185 else if (*inp == ' ')
1193 size_t no_read_sub = 0;
1196 switch(cd->marc8_esc_mode)
1198 case 'B': /* Basic ASCII */
1199 case 's': /* ASCII */
1200 case 'E': /* ANSEL */
1201 x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb);
1205 x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb);
1208 case 'g': /* Greek */
1209 x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb);
1211 case 'b': /* Subscripts */
1212 x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb);
1214 case 'p': /* Superscripts */
1215 x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb);
1217 case '2': /* Basic Hebrew */
1218 x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb);
1220 case 'N': /* Basic Cyrillic */
1221 x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1223 case 'Q': /* Extended Cyrillic */
1224 x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb);
1226 case '3': /* Basic Arabic */
1227 x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb);
1229 case '4': /* Extended Arabic */
1230 x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb);
1232 case 'S': /* Greek */
1233 x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb);
1235 case '1': /* Chinese, Japanese, Korean (EACC) */
1236 x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb);
1240 cd->my_errno = YAZ_ICONV_EILSEQ;
1243 *no_read += no_read_sub;
1248 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
1249 char **outbuf, size_t *outbytesleft)
1251 return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
1254 size_t yaz_write_UTF8_char(unsigned long x,
1255 char **outbuf, size_t *outbytesleft,
1258 unsigned char *outp = (unsigned char *) *outbuf;
1260 if (x <= 0x7f && *outbytesleft >= 1)
1262 *outp++ = (unsigned char) x;
1265 else if (x <= 0x7ff && *outbytesleft >= 2)
1267 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
1268 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1269 (*outbytesleft) -= 2;
1271 else if (x <= 0xffff && *outbytesleft >= 3)
1273 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
1274 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1275 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1276 (*outbytesleft) -= 3;
1278 else if (x <= 0x1fffff && *outbytesleft >= 4)
1280 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
1281 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1282 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1283 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1284 (*outbytesleft) -= 4;
1286 else if (x <= 0x3ffffff && *outbytesleft >= 5)
1288 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
1289 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1290 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1291 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1292 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1293 (*outbytesleft) -= 5;
1295 else if (*outbytesleft >= 6)
1297 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
1298 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
1299 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1300 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1301 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1302 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1303 (*outbytesleft) -= 6;
1307 *error = YAZ_ICONV_E2BIG; /* not room for output */
1308 return (size_t)(-1);
1310 *outbuf = (char *) outp;
1314 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
1315 char **outbuf, size_t *outbytesleft)
1317 /* list of two char unicode sequence that, when combined, are
1318 equivalent to single unicode chars that can be represented in
1320 Regular iconv on Linux at least does not seem to convert these,
1321 but since MARC-8 to UTF-8 generates these composed sequence
1322 we get a better chance of a successful MARC-8 -> ISO-8859-1
1324 unsigned char *outp = (unsigned char *) *outbuf;
1326 if (cd->compose_char)
1329 for (i = 0; latin1_comb[i].x1; i++)
1330 if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
1332 x = latin1_comb[i].y;
1335 if (*outbytesleft < 1)
1336 { /* no room. Retain compose_char and bail out */
1337 cd->my_errno = YAZ_ICONV_E2BIG;
1338 return (size_t)(-1);
1340 if (!latin1_comb[i].x1)
1341 { /* not found. Just write compose_char */
1342 *outp++ = (unsigned char) cd->compose_char;
1344 *outbuf = (char *) outp;
1346 /* compose_char used so reset it. x now holds current char */
1347 cd->compose_char = 0;
1350 if (x > 32 && x < 127 && cd->compose_char == 0)
1352 cd->compose_char = x;
1355 else if (x > 255 || x < 1)
1357 cd->my_errno = YAZ_ICONV_EILSEQ;
1360 else if (*outbytesleft < 1)
1362 cd->my_errno = YAZ_ICONV_E2BIG;
1363 return (size_t)(-1);
1365 *outp++ = (unsigned char) x;
1367 *outbuf = (char *) outp;
1371 static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
1372 char **outbuf, size_t *outbytesleft)
1374 if (cd->compose_char)
1376 unsigned char *outp = (unsigned char *) *outbuf;
1377 if (*outbytesleft < 1)
1379 cd->my_errno = YAZ_ICONV_E2BIG;
1380 return (size_t)(-1);
1382 *outp++ = (unsigned char) cd->compose_char;
1384 *outbuf = (char *) outp;
1385 cd->compose_char = 0;
1390 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
1391 char **outbuf, size_t *outbytesleft)
1393 unsigned char *outp = (unsigned char *) *outbuf;
1394 if (*outbytesleft >= 4)
1396 *outp++ = (unsigned char) (x>>24);
1397 *outp++ = (unsigned char) (x>>16);
1398 *outp++ = (unsigned char) (x>>8);
1399 *outp++ = (unsigned char) x;
1400 (*outbytesleft) -= 4;
1404 cd->my_errno = YAZ_ICONV_E2BIG;
1405 return (size_t)(-1);
1407 *outbuf = (char *) outp;
1411 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
1412 char **outbuf, size_t *outbytesleft)
1414 unsigned char *outp = (unsigned char *) *outbuf;
1415 if (*outbytesleft >= 4)
1417 *outp++ = (unsigned char) x;
1418 *outp++ = (unsigned char) (x>>8);
1419 *outp++ = (unsigned char) (x>>16);
1420 *outp++ = (unsigned char) (x>>24);
1421 (*outbytesleft) -= 4;
1425 cd->my_errno = YAZ_ICONV_E2BIG;
1426 return (size_t)(-1);
1428 *outbuf = (char *) outp;
1432 static unsigned long lookup_marc8(yaz_iconv_t cd,
1433 unsigned long x, int *comb,
1434 const char **page_chr)
1437 char *utf8_outbuf = utf8_buf;
1438 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
1440 r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
1441 if (r == (size_t)(-1))
1443 cd->my_errno = YAZ_ICONV_EILSEQ;
1449 size_t inbytesleft, no_read_sub = 0;
1452 *utf8_outbuf = '\0';
1453 inp = (unsigned char *) utf8_buf;
1454 inbytesleft = strlen(utf8_buf);
1456 x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb);
1459 *page_chr = "\033(B";
1462 x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb);
1465 *page_chr = "\033(B";
1468 x = yaz_marc8r_67_conv(inp, inbytesleft, &no_read_sub, comb);
1471 *page_chr = "\033g";
1474 x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb);
1477 *page_chr = "\033b";
1480 x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb);
1483 *page_chr = "\033p";
1486 x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb);
1489 *page_chr = "\033(2";
1492 x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1495 *page_chr = "\033(N";
1498 x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb);
1501 *page_chr = "\033(Q";
1504 x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb);
1507 *page_chr = "\033(3";
1510 x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb);
1513 *page_chr = "\033(4";
1516 x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb);
1519 *page_chr = "\033(S";
1522 x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb);
1525 *page_chr = "\033$1";
1528 cd->my_errno = YAZ_ICONV_EILSEQ;
1533 static size_t flush_combos(yaz_iconv_t cd,
1534 char **outbuf, size_t *outbytesleft)
1536 unsigned long y = cd->write_marc8_last;
1539 size_t i, out_no = 0;
1544 byte = (unsigned char )((y>>16) & 0xff);
1546 out_buf[out_no++] = byte;
1547 byte = (unsigned char)((y>>8) & 0xff);
1549 out_buf[out_no++] = byte;
1550 byte = (unsigned char )(y & 0xff);
1552 out_buf[out_no++] = byte;
1554 if (out_no + cd->write_marc8_comb_no + 1 > *outbytesleft)
1556 cd->my_errno = YAZ_ICONV_E2BIG;
1557 return (size_t) (-1);
1560 for (i = 0; i < cd->write_marc8_comb_no; i++)
1562 /* all MARC-8 combined characters are simple bytes */
1563 byte = (unsigned char )(cd->write_marc8_comb_ch[i]);
1564 *(*outbuf)++ = byte;
1567 memcpy(*outbuf, out_buf, out_no);
1569 (*outbytesleft) -= out_no;
1570 if (cd->write_marc8_second_half_char)
1572 *(*outbuf)++ = cd->write_marc8_second_half_char;
1576 cd->write_marc8_last = 0;
1577 cd->write_marc8_comb_no = 0;
1578 cd->write_marc8_second_half_char = 0;
1582 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
1583 char **outbuf, size_t *outbytesleft,
1584 const char *page_chr)
1586 const char *old_page_chr = cd->write_marc8_page_chr;
1587 if (strcmp(page_chr, old_page_chr))
1590 const char *page_out = page_chr;
1592 if (*outbytesleft < 8)
1594 cd->my_errno = YAZ_ICONV_E2BIG;
1596 return (size_t) (-1);
1598 cd->write_marc8_page_chr = page_chr;
1600 if (!strcmp(old_page_chr, "\033p")
1601 || !strcmp(old_page_chr, "\033g")
1602 || !strcmp(old_page_chr, "\033b"))
1604 /* Technique 1 leave */
1606 if (strcmp(page_chr, "\033(B")) /* Not going ASCII page? */
1608 /* Must leave script + enter new page */
1609 plen = strlen(page_out);
1610 memcpy(*outbuf, page_out, plen);
1612 (*outbytesleft) -= plen;
1613 page_out = page_chr;
1616 plen = strlen(page_out);
1617 memcpy(*outbuf, page_out, plen);
1619 (*outbytesleft) -= plen;
1625 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
1626 char **outbuf, size_t *outbytesleft)
1629 const char *page_chr = 0;
1630 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
1633 return (size_t) (-1);
1638 cd->write_marc8_second_half_char = 0xEC;
1639 else if (x == 0x0360)
1640 cd->write_marc8_second_half_char = 0xFB;
1642 if (cd->write_marc8_comb_no < 6)
1643 cd->write_marc8_comb_ch[cd->write_marc8_comb_no++] = y;
1647 size_t r = flush_combos(cd, outbuf, outbytesleft);
1653 r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, page_chr);
1657 cd->write_marc8_last = y;
1662 static size_t yaz_flush_marc8(yaz_iconv_t cd,
1663 char **outbuf, size_t *outbytesleft)
1665 size_t r = flush_combos(cd, outbuf, outbytesleft);
1668 return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, "\033(B");
1671 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
1672 char **outbuf, size_t *outbytesleft)
1675 for (i = 0; latin1_comb[i].x1; i++)
1677 if (x == latin1_comb[i].y)
1680 /* save the output pointers .. */
1681 char *outbuf0 = *outbuf;
1682 size_t outbytesleft0 = *outbytesleft;
1683 int last_ch = cd->write_marc8_last;
1685 r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
1686 outbuf, outbytesleft);
1689 r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
1690 outbuf, outbytesleft);
1691 if (r && cd->my_errno == YAZ_ICONV_E2BIG)
1693 /* not enough room. reset output to original values */
1695 *outbytesleft = outbytesleft0;
1696 cd->write_marc8_last = last_ch;
1701 return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
1706 static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
1707 char **outbuf, size_t *outbytesleft)
1709 unsigned char *outp = (unsigned char *) *outbuf;
1711 if (*outbytesleft >= sizeof(wchar_t))
1714 memcpy(outp, &wch, sizeof(wch));
1715 outp += sizeof(wch);
1716 (*outbytesleft) -= sizeof(wch);
1720 cd->my_errno = YAZ_ICONV_E2BIG;
1721 return (size_t)(-1);
1723 *outbuf = (char *) outp;
1728 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
1730 return cd->read_handle && cd->write_handle;
1733 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
1735 yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
1737 cd->write_handle = 0;
1738 cd->read_handle = 0;
1739 cd->init_handle = 0;
1740 cd->flush_handle = 0;
1741 cd->my_errno = YAZ_ICONV_UNKNOWN;
1743 /* a useful hack: if fromcode has leading @,
1744 the library not use YAZ's own conversions .. */
1745 if (fromcode[0] == '@')
1749 if (!yaz_matchstr(fromcode, "UTF8"))
1751 cd->read_handle = yaz_read_UTF8;
1752 cd->init_handle = yaz_init_UTF8;
1754 else if (!yaz_matchstr(fromcode, "ISO88591"))
1755 cd->read_handle = yaz_read_ISO8859_1;
1756 else if (!yaz_matchstr(fromcode, "UCS4"))
1757 cd->read_handle = yaz_read_UCS4;
1758 else if (!yaz_matchstr(fromcode, "UCS4LE"))
1759 cd->read_handle = yaz_read_UCS4LE;
1760 else if (!yaz_matchstr(fromcode, "MARC8"))
1761 cd->read_handle = yaz_read_marc8;
1762 else if (!yaz_matchstr(fromcode, "MARC8s"))
1763 cd->read_handle = yaz_read_marc8s;
1764 else if (!yaz_matchstr(fromcode, "advancegreek"))
1765 cd->read_handle = yaz_read_advancegreek;
1766 else if (!yaz_matchstr(fromcode, "iso54281984"))
1767 cd->read_handle = yaz_read_iso5428_1984;
1768 else if (!yaz_matchstr(fromcode, "iso5428:1984"))
1769 cd->read_handle = yaz_read_iso5428_1984;
1771 else if (!yaz_matchstr(fromcode, "WCHAR_T"))
1772 cd->read_handle = yaz_read_wchar_t;
1775 if (!yaz_matchstr(tocode, "UTF8"))
1776 cd->write_handle = yaz_write_UTF8;
1777 else if (!yaz_matchstr(tocode, "ISO88591"))
1779 cd->write_handle = yaz_write_ISO8859_1;
1780 cd->flush_handle = yaz_flush_ISO8859_1;
1782 else if (!yaz_matchstr (tocode, "UCS4"))
1783 cd->write_handle = yaz_write_UCS4;
1784 else if (!yaz_matchstr(tocode, "UCS4LE"))
1785 cd->write_handle = yaz_write_UCS4LE;
1786 else if (!yaz_matchstr(tocode, "MARC8"))
1788 cd->write_handle = yaz_write_marc8;
1789 cd->flush_handle = yaz_flush_marc8;
1791 else if (!yaz_matchstr(tocode, "MARC8s"))
1793 cd->write_handle = yaz_write_marc8;
1794 cd->flush_handle = yaz_flush_marc8;
1796 else if (!yaz_matchstr(tocode, "advancegreek"))
1798 cd->write_handle = yaz_write_advancegreek;
1800 else if (!yaz_matchstr(tocode, "iso54281984"))
1802 cd->write_handle = yaz_write_iso5428_1984;
1804 else if (!yaz_matchstr(tocode, "iso5428:1984"))
1806 cd->write_handle = yaz_write_iso5428_1984;
1809 else if (!yaz_matchstr(tocode, "WCHAR_T"))
1810 cd->write_handle = yaz_write_wchar_t;
1815 if (!cd->read_handle || !cd->write_handle)
1817 cd->iconv_cd = iconv_open (tocode, fromcode);
1818 if (cd->iconv_cd == (iconv_t) (-1))
1825 if (!cd->read_handle || !cd->write_handle)
1835 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
1836 char **outbuf, size_t *outbytesleft)
1845 iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
1846 if (r == (size_t)(-1))
1848 switch (yaz_errno())
1851 cd->my_errno = YAZ_ICONV_E2BIG;
1854 cd->my_errno = YAZ_ICONV_EINVAL;
1857 cd->my_errno = YAZ_ICONV_EILSEQ;
1860 cd->my_errno = YAZ_ICONV_UNKNOWN;
1872 cd->my_errno = YAZ_ICONV_UNKNOWN;
1873 cd->marc8_esc_mode = 'B';
1875 cd->comb_offset = cd->comb_size = 0;
1876 cd->compose_char = 0;
1878 cd->write_marc8_comb_no = 0;
1879 cd->write_marc8_second_half_char = 0;
1880 cd->write_marc8_last = 0;
1881 cd->write_marc8_page_chr = "\033(B";
1889 if (cd->init_handle && inbuf && *inbuf)
1892 size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
1893 *inbytesleft, &no_read);
1896 if (cd->my_errno == YAZ_ICONV_EINVAL)
1901 *inbytesleft -= no_read;
1907 if (!inbuf || !*inbuf)
1909 if (outbuf && *outbuf)
1912 r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
1913 if (cd->flush_handle)
1914 r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
1929 no_read = cd->no_read_x;
1933 if (*inbytesleft == 0)
1935 r = *inbuf - inbuf0;
1938 x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
1948 r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
1951 /* unable to write it. save it because read_handle cannot
1953 if (cd->my_errno == YAZ_ICONV_E2BIG)
1956 cd->no_read_x = no_read;
1962 *inbytesleft -= no_read;
1963 (*inbuf) += no_read;
1968 int yaz_iconv_error (yaz_iconv_t cd)
1970 return cd->my_errno;
1973 int yaz_iconv_close (yaz_iconv_t cd)
1977 iconv_close (cd->iconv_cd);
1986 * indent-tabs-mode: nil
1988 * vim: shiftwidth=4 tabstop=8 expandtab