2 * Copyright (C) 1995-2007, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: siconv.c,v 1.44 2007-09-22 18:49:55 adam Exp $
9 * \brief Implements simple ICONV
11 * This implements an interface similar to that of iconv and
12 * is used by YAZ to interface with iconv (if present).
13 * For systems where iconv is not present, this layer
14 * provides a few important conversions: UTF-8, MARC-8, Latin-1.
17 * http://www.loc.gov/marc/specifications/speccharmarc8.html
37 #include <yaz/yaz-util.h>
39 unsigned long yaz_marc8_42_conv(unsigned char *inp, size_t inbytesleft,
40 size_t *no_read, int *combining);
41 unsigned long yaz_marc8_45_conv(unsigned char *inp, size_t inbytesleft,
42 size_t *no_read, int *combining);
43 unsigned long yaz_marc8_67_conv(unsigned char *inp, size_t inbytesleft,
44 size_t *no_read, int *combining);
45 unsigned long yaz_marc8_62_conv(unsigned char *inp, size_t inbytesleft,
46 size_t *no_read, int *combining);
47 unsigned long yaz_marc8_70_conv(unsigned char *inp, size_t inbytesleft,
48 size_t *no_read, int *combining);
49 unsigned long yaz_marc8_32_conv(unsigned char *inp, size_t inbytesleft,
50 size_t *no_read, int *combining);
51 unsigned long yaz_marc8_4E_conv(unsigned char *inp, size_t inbytesleft,
52 size_t *no_read, int *combining);
53 unsigned long yaz_marc8_51_conv(unsigned char *inp, size_t inbytesleft,
54 size_t *no_read, int *combining);
55 unsigned long yaz_marc8_33_conv(unsigned char *inp, size_t inbytesleft,
56 size_t *no_read, int *combining);
57 unsigned long yaz_marc8_34_conv(unsigned char *inp, size_t inbytesleft,
58 size_t *no_read, int *combining);
59 unsigned long yaz_marc8_53_conv(unsigned char *inp, size_t inbytesleft,
60 size_t *no_read, int *combining);
61 unsigned long yaz_marc8_31_conv(unsigned char *inp, size_t inbytesleft,
62 size_t *no_read, int *combining);
65 unsigned long yaz_marc8r_42_conv(unsigned char *inp, size_t inbytesleft,
66 size_t *no_read, int *combining);
67 unsigned long yaz_marc8r_45_conv(unsigned char *inp, size_t inbytesleft,
68 size_t *no_read, int *combining);
69 unsigned long yaz_marc8r_67_conv(unsigned char *inp, size_t inbytesleft,
70 size_t *no_read, int *combining);
71 unsigned long yaz_marc8r_62_conv(unsigned char *inp, size_t inbytesleft,
72 size_t *no_read, int *combining);
73 unsigned long yaz_marc8r_70_conv(unsigned char *inp, size_t inbytesleft,
74 size_t *no_read, int *combining);
75 unsigned long yaz_marc8r_32_conv(unsigned char *inp, size_t inbytesleft,
76 size_t *no_read, int *combining);
77 unsigned long yaz_marc8r_4E_conv(unsigned char *inp, size_t inbytesleft,
78 size_t *no_read, int *combining);
79 unsigned long yaz_marc8r_51_conv(unsigned char *inp, size_t inbytesleft,
80 size_t *no_read, int *combining);
81 unsigned long yaz_marc8r_33_conv(unsigned char *inp, size_t inbytesleft,
82 size_t *no_read, int *combining);
83 unsigned long yaz_marc8r_34_conv(unsigned char *inp, size_t inbytesleft,
84 size_t *no_read, int *combining);
85 unsigned long yaz_marc8r_53_conv(unsigned char *inp, size_t inbytesleft,
86 size_t *no_read, int *combining);
87 unsigned long yaz_marc8r_31_conv(unsigned char *inp, size_t inbytesleft,
88 size_t *no_read, int *combining);
90 struct yaz_iconv_struct {
93 size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
94 size_t inbytesleft, size_t *no_read);
95 unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
96 size_t inbytesleft, size_t *no_read);
97 size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
98 char **outbuf, size_t *outbytesleft);
99 size_t (*flush_handle)(yaz_iconv_t cd,
100 char **outbuf, size_t *outbytesleft);
105 unsigned long comb_x[8];
106 size_t comb_no_read[8];
108 unsigned long unget_x;
112 unsigned long compose_char;
114 unsigned long write_marc8_comb_ch[8];
115 size_t write_marc8_comb_no;
116 unsigned write_marc8_second_half_char;
117 unsigned long write_marc8_last;
118 const char *write_marc8_page_chr;
122 unsigned long x1, x2;
125 { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
126 { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
127 { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
128 { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
129 { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
130 { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
131 /* no need for 0xc6 LATIN CAPITAL LETTER AE */
132 { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
133 { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
134 { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
135 { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
136 { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
137 { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
138 { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
139 { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
140 { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
141 { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
142 { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
143 { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
144 { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
145 { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
146 { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
147 /* omitted: 0xd7 MULTIPLICATION SIGN */
148 /* omitted: 0xd8 LATIN CAPITAL LETTER O WITH STROKE */
149 { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
150 { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
151 { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
152 { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
153 { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
154 /* omitted: 0xde LATIN CAPITAL LETTER THORN */
155 /* omitted: 0xdf LATIN SMALL LETTER SHARP S */
156 { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
157 { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
158 { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
159 { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
160 { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
161 { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
162 /* omitted: 0xe6 LATIN SMALL LETTER AE */
163 { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
164 { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
165 { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
166 { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
167 { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
168 { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
169 { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
170 { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
171 { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
172 /* omitted: 0xf0 LATIN SMALL LETTER ETH */
173 { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
174 { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
175 { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
176 { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
177 { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
178 { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
179 /* omitted: 0xf7 DIVISION SIGN */
180 /* omitted: 0xf8 LATIN SMALL LETTER O WITH STROKE */
181 { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
182 { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
183 { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
184 { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
185 { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
186 /* omitted: 0xfe LATIN SMALL LETTER THORN */
187 { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
192 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
193 size_t inbytesleft, size_t *no_read)
195 unsigned long x = inp[0];
201 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
202 size_t inbytesleft, size_t *no_read)
211 cd->my_errno = YAZ_ICONV_EINVAL;
214 if (inp[1] != 0xbb && inp[2] == 0xbf)
221 unsigned long yaz_read_UTF8_char(unsigned char *inp,
222 size_t inbytesleft, size_t *no_read,
227 *no_read = 0; /* by default */
233 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
235 *error = YAZ_ICONV_EILSEQ;
237 else if (inp[0] <= 0xdf && inbytesleft >= 2)
239 if ((inp[1] & 0xc0) == 0x80)
241 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
245 *error = YAZ_ICONV_EILSEQ;
248 *error = YAZ_ICONV_EILSEQ;
250 else if (inp[0] <= 0xef && inbytesleft >= 3)
252 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
254 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
259 *error = YAZ_ICONV_EILSEQ;
262 *error = YAZ_ICONV_EILSEQ;
264 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
266 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
267 && (inp[3] & 0xc0) == 0x80)
269 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
270 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
274 *error = YAZ_ICONV_EILSEQ;
277 *error = YAZ_ICONV_EILSEQ;
279 else if (inp[0] <= 0xfb && inbytesleft >= 5)
281 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
282 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
284 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
285 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
290 *error = YAZ_ICONV_EILSEQ;
293 *error = YAZ_ICONV_EILSEQ;
295 else if (inp[0] <= 0xfd && inbytesleft >= 6)
297 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
298 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
299 && (inp[5] & 0xc0) == 0x80)
301 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
302 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
303 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
307 *error = YAZ_ICONV_EILSEQ;
310 *error = YAZ_ICONV_EILSEQ;
313 *error = YAZ_ICONV_EINVAL; /* incomplete sentence */
318 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
319 size_t inbytesleft, size_t *no_read)
321 return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
324 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
325 size_t inbytesleft, size_t *no_read)
331 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
336 x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
342 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
343 size_t inbytesleft, size_t *no_read)
349 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
354 x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
361 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
362 size_t inbytesleft, size_t *no_read)
366 if (inbytesleft < sizeof(wchar_t))
368 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
374 memcpy (&wch, inp, sizeof(wch));
376 *no_read = sizeof(wch);
382 static unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
383 size_t inbytesleft, size_t *no_read)
390 while (inbytesleft > 0)
396 else if (*inp == 0xa3)
406 if (inbytesleft == 0)
408 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
413 case 0xe1: /* alpha small */
419 case 0xc1: /* alpha capital */
426 case 0xe2: /* Beta small */
429 case 0xc2: /* Beta capital */
433 case 0xe4: /* Gamma small */
436 case 0xc4: /* Gamma capital */
440 case 0xe5: /* Delta small */
443 case 0xc5: /* Delta capital */
446 case 0xe6: /* epsilon small */
452 case 0xc6: /* epsilon capital */
458 case 0xe9: /* Zeta small */
461 case 0xc9: /* Zeta capital */
464 case 0xea: /* Eta small */
470 case 0xca: /* Eta capital */
476 case 0xeb: /* Theta small */
479 case 0xcb: /* Theta capital */
482 case 0xec: /* Iota small */
494 case 0xcc: /* Iota capital */
503 case 0xed: /* Kappa small */
506 case 0xcd: /* Kappa capital */
509 case 0xee: /* Lambda small */
512 case 0xce: /* Lambda capital */
515 case 0xef: /* Mu small */
518 case 0xcf: /* Mu capital */
521 case 0xf0: /* Nu small */
524 case 0xd0: /* Nu capital */
527 case 0xf1: /* Xi small */
530 case 0xd1: /* Xi capital */
533 case 0xf2: /* Omicron small */
539 case 0xd2: /* Omicron capital */
545 case 0xf3: /* Pi small */
548 case 0xd3: /* Pi capital */
551 case 0xf5: /* Rho small */
554 case 0xd5: /* Rho capital */
557 case 0xf7: /* Sigma small (end of words) */
560 case 0xf6: /* Sigma small */
563 case 0xd6: /* Sigma capital */
566 case 0xf8: /* Tau small */
569 case 0xd8: /* Tau capital */
572 case 0xf9: /* Upsilon small */
584 case 0xd9: /* Upsilon capital */
593 case 0xfa: /* Phi small */
596 case 0xda: /* Phi capital */
599 case 0xfb: /* Chi small */
602 case 0xdb: /* Chi capital */
605 case 0xfc: /* Psi small */
608 case 0xdc: /* Psi capital */
611 case 0xfd: /* Omega small */
617 case 0xdd: /* Omega capital */
632 static size_t yaz_write_iso5428_1984(yaz_iconv_t cd, unsigned long x,
633 char **outbuf, size_t *outbytesleft)
636 unsigned char *out = (unsigned char*) *outbuf;
637 if (*outbytesleft < 3)
639 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
644 case 0x03ac : out[k++]=0xa2; out[k++]=0xe1; break;
645 case 0x03b1 : out[k++]=0xe1; break;
646 case 0x0386 : out[k++]=0xa2; out[k++]=0xc1; break;
647 case 0x0391 : out[k++]=0xc1; break;
648 case 0x03b2 : out[k++]=0xe2; break;
649 case 0x0392 : out[k++]=0xc2; break;
650 case 0x03b3 : out[k++]=0xe4; break;
651 case 0x0393 : out[k++]=0xc4; break;
652 case 0x03b4 : out[k++]=0xe5; break;
653 case 0x0394 : out[k++]=0xc5; break;
654 case 0x03ad : out[k++]=0xa2; out[k++]=0xe6; break;
655 case 0x03b5 : out[k++]=0xe6; break;
656 case 0x0388 : out[k++]=0xa2; out[k++]=0xc6; break;
657 case 0x0395 : out[k++]=0xc6; break;
658 case 0x03b6 : out[k++]=0xe9; break;
659 case 0x0396 : out[k++]=0xc9; break;
660 case 0x03ae : out[k++]=0xa2; out[k++]=0xea; break;
661 case 0x03b7 : out[k++]=0xea; break;
662 case 0x0389 : out[k++]=0xa2; out[k++]=0xca; break;
663 case 0x0397 : out[k++]=0xca; break;
664 case 0x03b8 : out[k++]=0xeb; break;
665 case 0x0398 : out[k++]=0xcb; break;
666 case 0x0390 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xec; break;
667 case 0x03af : out[k++]=0xa2; out[k++]=0xec; break;
668 case 0x03ca : out[k++]=0xa3; out[k++]=0xec; break;
669 case 0x03b9 : out[k++]=0xec; break;
670 case 0x038a : out[k++]=0xa2; out[k++]=0xcc; break;
671 case 0x03aa : out[k++]=0xa3; out[k++]=0xcc; break;
672 case 0x0399 : out[k++]=0xcc; break;
673 case 0x03ba : out[k++]=0xed; break;
674 case 0x039a : out[k++]=0xcd; break;
675 case 0x03bb : out[k++]=0xee; break;
676 case 0x039b : out[k++]=0xce; break;
677 case 0x03bc : out[k++]=0xef; break;
678 case 0x039c : out[k++]=0xcf; break;
679 case 0x03bd : out[k++]=0xf0; break;
680 case 0x039d : out[k++]=0xd0; break;
681 case 0x03be : out[k++]=0xf1; break;
682 case 0x039e : out[k++]=0xd1; break;
683 case 0x03cc : out[k++]=0xa2; out[k++]=0xf2; break;
684 case 0x03bf : out[k++]=0xf2; break;
685 case 0x038c : out[k++]=0xa2; out[k++]=0xd2; break;
686 case 0x039f : out[k++]=0xd2; break;
687 case 0x03c0 : out[k++]=0xf3; break;
688 case 0x03a0 : out[k++]=0xd3; break;
689 case 0x03c1 : out[k++]=0xf5; break;
690 case 0x03a1 : out[k++]=0xd5; break;
691 case 0x03c2 : out[k++]=0xf7; break;
692 case 0x03c3 : out[k++]=0xf6; break;
693 case 0x03a3 : out[k++]=0xd6; break;
694 case 0x03c4 : out[k++]=0xf8; break;
695 case 0x03a4 : out[k++]=0xd8; break;
696 case 0x03b0 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xf9; break;
697 case 0x03cd : out[k++]=0xa2; out[k++]=0xf9; break;
698 case 0x03cb : out[k++]=0xa3; out[k++]=0xf9; break;
699 case 0x03c5 : out[k++]=0xf9; break;
700 case 0x038e : out[k++]=0xa2; out[k++]=0xd9; break;
701 case 0x03ab : out[k++]=0xa3; out[k++]=0xd9; break;
702 case 0x03a5 : out[k++]=0xd9; break;
703 case 0x03c6 : out[k++]=0xfa; break;
704 case 0x03a6 : out[k++]=0xda; break;
705 case 0x03c7 : out[k++]=0xfb; break;
706 case 0x03a7 : out[k++]=0xdb; break;
707 case 0x03c8 : out[k++]=0xfc; break;
708 case 0x03a8 : out[k++]=0xdc; break;
709 case 0x03ce : out[k++]=0xa2; out[k++]=0xfd; break;
710 case 0x03c9 : out[k++]=0xfd; break;
711 case 0x038f : out[k++]=0xa2; out[k++]=0xdd; break;
712 case 0x03a9 : out[k++]=0xdd; break;
716 cd->my_errno = YAZ_ICONV_EILSEQ;
727 static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
728 size_t inbytesleft, size_t *no_read)
736 while (inbytesleft > 0)
742 else if (*inp == 0x9e)
746 else if (*inp == 0x9f)
756 if (inbytesleft == 0)
758 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
986 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
987 char **outbuf, size_t *outbytesleft)
990 unsigned char *out = (unsigned char*) *outbuf;
991 if (*outbytesleft < 3)
993 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
998 case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
999 case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
1000 case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
1001 case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
1002 case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
1003 case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
1004 case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
1005 case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
1006 case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
1007 case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
1008 case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
1009 case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
1010 case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
1011 case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
1012 case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
1013 case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
1014 case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
1015 case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
1016 case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
1017 case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
1018 case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
1019 case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
1020 case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
1021 case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
1022 case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
1023 case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
1024 case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
1025 case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
1026 case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
1027 case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
1028 case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
1029 case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
1030 case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
1031 case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
1032 case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
1033 case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
1034 case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
1035 case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
1036 case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
1037 case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
1038 case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
1039 case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
1040 case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
1041 case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
1042 case 0x03b1 : out[k++]=0x81; break;
1043 case 0x03b2 : out[k++]=0x82; break;
1044 case 0x03b3 : out[k++]=0x83; break;
1045 case 0x03b4 : out[k++]=0x84; break;
1046 case 0x03b5 : out[k++]=0x85; break;
1047 case 0x03b6 : out[k++]=0x86; break;
1048 case 0x03b7 : out[k++]=0x87; break;
1049 case 0x03b8 : out[k++]=0x88; break;
1050 case 0x03b9 : out[k++]=0x89; break;
1051 case 0x03ba : out[k++]=0x8a; break;
1052 case 0x03bb : out[k++]=0x8b; break;
1053 case 0x03bc : out[k++]=0x8c; break;
1054 case 0x03bd : out[k++]=0x8d; break;
1055 case 0x03be : out[k++]=0x8e; break;
1056 case 0x03bf : out[k++]=0x8f; break;
1057 case 0x03c0 : out[k++]=0x90; break;
1058 case 0x03c1 : out[k++]=0x91; break;
1059 case 0x03c2 : out[k++]=0x92; break;
1060 case 0x03c3 : out[k++]=0x93; break;
1061 case 0x03c4 : out[k++]=0x94; break;
1062 case 0x03c5 : out[k++]=0x95; break;
1063 case 0x03c6 : out[k++]=0x96; break;
1064 case 0x03c7 : out[k++]=0x96; break;
1065 case 0x03c8 : out[k++]=0x98; break;
1066 case 0x03c9 : out[k++]=0x99; break;
1070 cd->my_errno = YAZ_ICONV_EILSEQ;
1082 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
1083 size_t inbytesleft, size_t *no_read,
1086 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
1087 size_t inbytesleft, size_t *no_read)
1090 if (cd->comb_offset < cd->comb_size)
1092 *no_read = cd->comb_no_read[cd->comb_offset];
1093 x = cd->comb_x[cd->comb_offset];
1095 /* special case for double-diacritic combining characters,
1096 INVERTED BREVE and DOUBLE TILDE.
1097 We'll increment the no_read counter by 1, since we want to skip over
1098 the processing of the closing ligature character
1100 /* this code is no longer necessary.. our handlers code in
1101 yaz_marc8_?_conv (generated by charconv.tcl) now returns
1102 0 and no_read=1 when a sequence does not match the input.
1103 The SECOND HALFs in codetables.xml produces a non-existant
1104 entry in the conversion trie.. Hence when met, the input byte is
1105 skipped as it should (in yaz_iconv)
1108 if (x == 0x0361 || x == 0x0360)
1115 cd->comb_offset = 0;
1116 for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
1120 if (inbytesleft == 0 && cd->comb_size)
1122 cd->my_errno = YAZ_ICONV_EINVAL;
1127 x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
1130 cd->comb_x[cd->comb_size] = x;
1131 cd->comb_no_read[cd->comb_size] = *no_read;
1133 inbytesleft = inbytesleft - *no_read;
1138 static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
1139 size_t inbytesleft, size_t *no_read)
1141 unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
1142 if (x && cd->comb_size == 1)
1144 /* For MARC8s we try to get a Latin-1 page code out of it */
1146 for (i = 0; latin1_comb[i].x1; i++)
1147 if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
1149 *no_read += cd->comb_no_read[0];
1151 x = latin1_comb[i].y;
1158 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
1159 size_t inbytesleft, size_t *no_read,
1163 while(inbytesleft >= 1 && inp[0] == 27)
1165 size_t inbytesleft0 = inbytesleft;
1168 while(inbytesleft > 0 && strchr("(,$!)-", *inp))
1173 if (inbytesleft <= 0)
1176 cd->my_errno = YAZ_ICONV_EINVAL;
1179 cd->marc8_esc_mode = *inp++;
1181 (*no_read) += inbytesleft0 - inbytesleft;
1183 if (inbytesleft <= 0)
1185 else if (*inp == ' ')
1193 size_t no_read_sub = 0;
1196 switch(cd->marc8_esc_mode)
1198 case 'B': /* Basic ASCII */
1199 case 's': /* ASCII */
1200 case 'E': /* ANSEL */
1201 x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb);
1205 x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb);
1208 case 'g': /* Greek */
1209 x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb);
1211 case 'b': /* Subscripts */
1212 x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb);
1214 case 'p': /* Superscripts */
1215 x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb);
1217 case '2': /* Basic Hebrew */
1218 x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb);
1220 case 'N': /* Basic Cyrillic */
1221 x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1223 case 'Q': /* Extended Cyrillic */
1224 x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb);
1226 case '3': /* Basic Arabic */
1227 x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb);
1229 case '4': /* Extended Arabic */
1230 x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb);
1232 case 'S': /* Greek */
1233 x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb);
1235 case '1': /* Chinese, Japanese, Korean (EACC) */
1236 x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb);
1240 cd->my_errno = YAZ_ICONV_EILSEQ;
1243 *no_read += no_read_sub;
1248 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
1249 char **outbuf, size_t *outbytesleft)
1251 return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
1254 size_t yaz_write_UTF8_char(unsigned long x,
1255 char **outbuf, size_t *outbytesleft,
1258 unsigned char *outp = (unsigned char *) *outbuf;
1260 if (x <= 0x7f && *outbytesleft >= 1)
1262 *outp++ = (unsigned char) x;
1265 else if (x <= 0x7ff && *outbytesleft >= 2)
1267 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
1268 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1269 (*outbytesleft) -= 2;
1271 else if (x <= 0xffff && *outbytesleft >= 3)
1273 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
1274 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1275 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1276 (*outbytesleft) -= 3;
1278 else if (x <= 0x1fffff && *outbytesleft >= 4)
1280 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
1281 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1282 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1283 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1284 (*outbytesleft) -= 4;
1286 else if (x <= 0x3ffffff && *outbytesleft >= 5)
1288 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
1289 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1290 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1291 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1292 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1293 (*outbytesleft) -= 5;
1295 else if (*outbytesleft >= 6)
1297 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
1298 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
1299 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1300 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1301 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1302 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1303 (*outbytesleft) -= 6;
1307 *error = YAZ_ICONV_E2BIG; /* not room for output */
1308 return (size_t)(-1);
1310 *outbuf = (char *) outp;
1314 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
1315 char **outbuf, size_t *outbytesleft)
1317 /* list of two char unicode sequence that, when combined, are
1318 equivalent to single unicode chars that can be represented in
1320 Regular iconv on Linux at least does not seem to convert these,
1321 but since MARC-8 to UTF-8 generates these composed sequence
1322 we get a better chance of a successful MARC-8 -> ISO-8859-1
1324 unsigned char *outp = (unsigned char *) *outbuf;
1326 if (cd->compose_char)
1329 for (i = 0; latin1_comb[i].x1; i++)
1330 if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
1332 x = latin1_comb[i].y;
1335 if (*outbytesleft < 1)
1336 { /* no room. Retain compose_char and bail out */
1337 cd->my_errno = YAZ_ICONV_E2BIG;
1338 return (size_t)(-1);
1340 if (!latin1_comb[i].x1)
1341 { /* not found. Just write compose_char */
1342 *outp++ = (unsigned char) cd->compose_char;
1344 *outbuf = (char *) outp;
1346 /* compose_char used so reset it. x now holds current char */
1347 cd->compose_char = 0;
1350 if (x > 32 && x < 127 && cd->compose_char == 0)
1352 cd->compose_char = x;
1355 else if (x > 255 || x < 1)
1357 cd->my_errno = YAZ_ICONV_EILSEQ;
1360 else if (*outbytesleft < 1)
1362 cd->my_errno = YAZ_ICONV_E2BIG;
1363 return (size_t)(-1);
1365 *outp++ = (unsigned char) x;
1367 *outbuf = (char *) outp;
1371 static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
1372 char **outbuf, size_t *outbytesleft)
1374 if (cd->compose_char)
1376 unsigned char *outp = (unsigned char *) *outbuf;
1377 if (*outbytesleft < 1)
1379 cd->my_errno = YAZ_ICONV_E2BIG;
1380 return (size_t)(-1);
1382 *outp++ = (unsigned char) cd->compose_char;
1384 *outbuf = (char *) outp;
1385 cd->compose_char = 0;
1390 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
1391 char **outbuf, size_t *outbytesleft)
1393 unsigned char *outp = (unsigned char *) *outbuf;
1394 if (*outbytesleft >= 4)
1396 *outp++ = (unsigned char) (x>>24);
1397 *outp++ = (unsigned char) (x>>16);
1398 *outp++ = (unsigned char) (x>>8);
1399 *outp++ = (unsigned char) x;
1400 (*outbytesleft) -= 4;
1404 cd->my_errno = YAZ_ICONV_E2BIG;
1405 return (size_t)(-1);
1407 *outbuf = (char *) outp;
1411 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
1412 char **outbuf, size_t *outbytesleft)
1414 unsigned char *outp = (unsigned char *) *outbuf;
1415 if (*outbytesleft >= 4)
1417 *outp++ = (unsigned char) x;
1418 *outp++ = (unsigned char) (x>>8);
1419 *outp++ = (unsigned char) (x>>16);
1420 *outp++ = (unsigned char) (x>>24);
1421 (*outbytesleft) -= 4;
1425 cd->my_errno = YAZ_ICONV_E2BIG;
1426 return (size_t)(-1);
1428 *outbuf = (char *) outp;
1432 static unsigned long lookup_marc8(yaz_iconv_t cd,
1433 unsigned long x, int *comb,
1434 const char **page_chr)
1437 char *utf8_outbuf = utf8_buf;
1438 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
1440 r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
1441 if (r == (size_t)(-1))
1443 cd->my_errno = YAZ_ICONV_EILSEQ;
1449 size_t inbytesleft, no_read_sub = 0;
1452 *utf8_outbuf = '\0';
1453 inp = (unsigned char *) utf8_buf;
1454 inbytesleft = strlen(utf8_buf);
1459 x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb);
1462 *page_chr = "\033(B";
1465 x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb);
1468 *page_chr = "\033(B";
1471 x = yaz_marc8r_67_conv(inp, inbytesleft, &no_read_sub, comb);
1474 *page_chr = "\033g";
1477 x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb);
1480 *page_chr = "\033b";
1483 x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb);
1486 *page_chr = "\033p";
1489 x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb);
1492 *page_chr = "\033(2";
1495 x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1498 *page_chr = "\033(N";
1501 x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb);
1504 *page_chr = "\033(Q";
1507 x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb);
1510 *page_chr = "\033(3";
1513 x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb);
1516 *page_chr = "\033(4";
1519 x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb);
1522 *page_chr = "\033(S";
1525 x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb);
1528 *page_chr = "\033$1";
1531 cd->my_errno = YAZ_ICONV_EILSEQ;
1536 static size_t flush_combos(yaz_iconv_t cd,
1537 char **outbuf, size_t *outbytesleft)
1539 unsigned long y = cd->write_marc8_last;
1542 size_t i, out_no = 0;
1547 byte = (unsigned char )((y>>16) & 0xff);
1549 out_buf[out_no++] = byte;
1550 byte = (unsigned char)((y>>8) & 0xff);
1552 out_buf[out_no++] = byte;
1553 byte = (unsigned char )(y & 0xff);
1555 out_buf[out_no++] = byte;
1557 if (out_no + cd->write_marc8_comb_no + 1 > *outbytesleft)
1559 cd->my_errno = YAZ_ICONV_E2BIG;
1560 return (size_t) (-1);
1563 for (i = 0; i < cd->write_marc8_comb_no; i++)
1565 /* all MARC-8 combined characters are simple bytes */
1566 byte = (unsigned char )(cd->write_marc8_comb_ch[i]);
1567 *(*outbuf)++ = byte;
1570 memcpy(*outbuf, out_buf, out_no);
1572 (*outbytesleft) -= out_no;
1573 if (cd->write_marc8_second_half_char)
1575 *(*outbuf)++ = cd->write_marc8_second_half_char;
1579 cd->write_marc8_last = 0;
1580 cd->write_marc8_comb_no = 0;
1581 cd->write_marc8_second_half_char = 0;
1585 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
1586 char **outbuf, size_t *outbytesleft,
1587 const char *page_chr)
1589 const char *old_page_chr = cd->write_marc8_page_chr;
1590 if (strcmp(page_chr, old_page_chr))
1593 const char *page_out = page_chr;
1595 if (*outbytesleft < 8)
1597 cd->my_errno = YAZ_ICONV_E2BIG;
1599 return (size_t) (-1);
1601 cd->write_marc8_page_chr = page_chr;
1603 if (!strcmp(old_page_chr, "\033p")
1604 || !strcmp(old_page_chr, "\033g")
1605 || !strcmp(old_page_chr, "\033b"))
1607 /* Technique 1 leave */
1609 if (strcmp(page_chr, "\033(B")) /* Not going ASCII page? */
1611 /* Must leave script + enter new page */
1612 plen = strlen(page_out);
1613 memcpy(*outbuf, page_out, plen);
1615 (*outbytesleft) -= plen;
1616 page_out = page_chr;
1619 plen = strlen(page_out);
1620 memcpy(*outbuf, page_out, plen);
1622 (*outbytesleft) -= plen;
1628 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
1629 char **outbuf, size_t *outbytesleft)
1632 const char *page_chr = 0;
1633 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
1636 return (size_t) (-1);
1641 cd->write_marc8_second_half_char = 0xEC;
1642 else if (x == 0x0360)
1643 cd->write_marc8_second_half_char = 0xFB;
1645 if (cd->write_marc8_comb_no < 6)
1646 cd->write_marc8_comb_ch[cd->write_marc8_comb_no++] = y;
1650 size_t r = flush_combos(cd, outbuf, outbytesleft);
1656 r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, page_chr);
1660 cd->write_marc8_last = y;
1665 static size_t yaz_flush_marc8(yaz_iconv_t cd,
1666 char **outbuf, size_t *outbytesleft)
1668 size_t r = flush_combos(cd, outbuf, outbytesleft);
1671 return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, "\033(B");
1674 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
1675 char **outbuf, size_t *outbytesleft)
1678 for (i = 0; latin1_comb[i].x1; i++)
1680 if (x == latin1_comb[i].y)
1683 /* save the output pointers .. */
1684 char *outbuf0 = *outbuf;
1685 size_t outbytesleft0 = *outbytesleft;
1686 int last_ch = cd->write_marc8_last;
1688 r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
1689 outbuf, outbytesleft);
1692 r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
1693 outbuf, outbytesleft);
1694 if (r && cd->my_errno == YAZ_ICONV_E2BIG)
1696 /* not enough room. reset output to original values */
1698 *outbytesleft = outbytesleft0;
1699 cd->write_marc8_last = last_ch;
1704 return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
1709 static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
1710 char **outbuf, size_t *outbytesleft)
1712 unsigned char *outp = (unsigned char *) *outbuf;
1714 if (*outbytesleft >= sizeof(wchar_t))
1717 memcpy(outp, &wch, sizeof(wch));
1718 outp += sizeof(wch);
1719 (*outbytesleft) -= sizeof(wch);
1723 cd->my_errno = YAZ_ICONV_E2BIG;
1724 return (size_t)(-1);
1726 *outbuf = (char *) outp;
1731 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
1733 return cd->read_handle && cd->write_handle;
1736 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
1738 yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
1740 cd->write_handle = 0;
1741 cd->read_handle = 0;
1742 cd->init_handle = 0;
1743 cd->flush_handle = 0;
1744 cd->my_errno = YAZ_ICONV_UNKNOWN;
1746 /* a useful hack: if fromcode has leading @,
1747 the library not use YAZ's own conversions .. */
1748 if (fromcode[0] == '@')
1752 if (!yaz_matchstr(fromcode, "UTF8"))
1754 cd->read_handle = yaz_read_UTF8;
1755 cd->init_handle = yaz_init_UTF8;
1757 else if (!yaz_matchstr(fromcode, "ISO88591"))
1758 cd->read_handle = yaz_read_ISO8859_1;
1759 else if (!yaz_matchstr(fromcode, "UCS4"))
1760 cd->read_handle = yaz_read_UCS4;
1761 else if (!yaz_matchstr(fromcode, "UCS4LE"))
1762 cd->read_handle = yaz_read_UCS4LE;
1763 else if (!yaz_matchstr(fromcode, "MARC8"))
1764 cd->read_handle = yaz_read_marc8;
1765 else if (!yaz_matchstr(fromcode, "MARC8s"))
1766 cd->read_handle = yaz_read_marc8s;
1767 else if (!yaz_matchstr(fromcode, "advancegreek"))
1768 cd->read_handle = yaz_read_advancegreek;
1769 else if (!yaz_matchstr(fromcode, "iso54281984"))
1770 cd->read_handle = yaz_read_iso5428_1984;
1771 else if (!yaz_matchstr(fromcode, "iso5428:1984"))
1772 cd->read_handle = yaz_read_iso5428_1984;
1774 else if (!yaz_matchstr(fromcode, "WCHAR_T"))
1775 cd->read_handle = yaz_read_wchar_t;
1778 if (!yaz_matchstr(tocode, "UTF8"))
1779 cd->write_handle = yaz_write_UTF8;
1780 else if (!yaz_matchstr(tocode, "ISO88591"))
1782 cd->write_handle = yaz_write_ISO8859_1;
1783 cd->flush_handle = yaz_flush_ISO8859_1;
1785 else if (!yaz_matchstr (tocode, "UCS4"))
1786 cd->write_handle = yaz_write_UCS4;
1787 else if (!yaz_matchstr(tocode, "UCS4LE"))
1788 cd->write_handle = yaz_write_UCS4LE;
1789 else if (!yaz_matchstr(tocode, "MARC8"))
1791 cd->write_handle = yaz_write_marc8;
1792 cd->flush_handle = yaz_flush_marc8;
1794 else if (!yaz_matchstr(tocode, "MARC8s"))
1796 cd->write_handle = yaz_write_marc8;
1797 cd->flush_handle = yaz_flush_marc8;
1799 else if (!yaz_matchstr(tocode, "advancegreek"))
1801 cd->write_handle = yaz_write_advancegreek;
1803 else if (!yaz_matchstr(tocode, "iso54281984"))
1805 cd->write_handle = yaz_write_iso5428_1984;
1807 else if (!yaz_matchstr(tocode, "iso5428:1984"))
1809 cd->write_handle = yaz_write_iso5428_1984;
1812 else if (!yaz_matchstr(tocode, "WCHAR_T"))
1813 cd->write_handle = yaz_write_wchar_t;
1818 if (!cd->read_handle || !cd->write_handle)
1820 cd->iconv_cd = iconv_open (tocode, fromcode);
1821 if (cd->iconv_cd == (iconv_t) (-1))
1828 if (!cd->read_handle || !cd->write_handle)
1838 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
1839 char **outbuf, size_t *outbytesleft)
1848 iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
1849 if (r == (size_t)(-1))
1851 switch (yaz_errno())
1854 cd->my_errno = YAZ_ICONV_E2BIG;
1857 cd->my_errno = YAZ_ICONV_EINVAL;
1860 cd->my_errno = YAZ_ICONV_EILSEQ;
1863 cd->my_errno = YAZ_ICONV_UNKNOWN;
1875 cd->my_errno = YAZ_ICONV_UNKNOWN;
1876 cd->marc8_esc_mode = 'B';
1878 cd->comb_offset = cd->comb_size = 0;
1879 cd->compose_char = 0;
1881 cd->write_marc8_comb_no = 0;
1882 cd->write_marc8_second_half_char = 0;
1883 cd->write_marc8_last = 0;
1884 cd->write_marc8_page_chr = "\033(B";
1892 if (cd->init_handle && inbuf && *inbuf)
1895 size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
1896 *inbytesleft, &no_read);
1899 if (cd->my_errno == YAZ_ICONV_EINVAL)
1904 *inbytesleft -= no_read;
1910 if (!inbuf || !*inbuf)
1912 if (outbuf && *outbuf)
1915 r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
1916 if (cd->flush_handle)
1917 r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
1932 no_read = cd->no_read_x;
1936 if (*inbytesleft == 0)
1938 r = *inbuf - inbuf0;
1941 x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
1951 r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
1954 /* unable to write it. save it because read_handle cannot
1956 if (cd->my_errno == YAZ_ICONV_E2BIG)
1959 cd->no_read_x = no_read;
1965 *inbytesleft -= no_read;
1966 (*inbuf) += no_read;
1971 int yaz_iconv_error (yaz_iconv_t cd)
1973 return cd->my_errno;
1976 int yaz_iconv_close (yaz_iconv_t cd)
1980 iconv_close (cd->iconv_cd);
1989 * indent-tabs-mode: nil
1991 * vim: shiftwidth=4 tabstop=8 expandtab