2 * Copyright (C) 1995-2007, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: siconv.c,v 1.42 2007-05-30 08:22:03 adam Exp $
9 * \brief Implements simple ICONV
11 * This implements an interface similar to that of iconv and
12 * is used by YAZ to interface with iconv (if present).
13 * For systems where iconv is not present, this layer
14 * provides a few important conversions: UTF-8, MARC-8, Latin-1.
17 * http://www.loc.gov/marc/specifications/speccharmarc8.html
37 #include <yaz/yaz-util.h>
39 unsigned long yaz_marc8_1_conv(unsigned char *inp, size_t inbytesleft,
40 size_t *no_read, int *combining);
41 unsigned long yaz_marc8_2_conv(unsigned char *inp, size_t inbytesleft,
42 size_t *no_read, int *combining);
43 unsigned long yaz_marc8_3_conv(unsigned char *inp, size_t inbytesleft,
44 size_t *no_read, int *combining);
45 unsigned long yaz_marc8_4_conv(unsigned char *inp, size_t inbytesleft,
46 size_t *no_read, int *combining);
47 unsigned long yaz_marc8_5_conv(unsigned char *inp, size_t inbytesleft,
48 size_t *no_read, int *combining);
49 unsigned long yaz_marc8_6_conv(unsigned char *inp, size_t inbytesleft,
50 size_t *no_read, int *combining);
51 unsigned long yaz_marc8_7_conv(unsigned char *inp, size_t inbytesleft,
52 size_t *no_read, int *combining);
53 unsigned long yaz_marc8_8_conv(unsigned char *inp, size_t inbytesleft,
54 size_t *no_read, int *combining);
55 unsigned long yaz_marc8_9_conv(unsigned char *inp, size_t inbytesleft,
56 size_t *no_read, int *combining);
59 unsigned long yaz_marc8r_1_conv(unsigned char *inp, size_t inbytesleft,
60 size_t *no_read, int *combining);
61 unsigned long yaz_marc8r_2_conv(unsigned char *inp, size_t inbytesleft,
62 size_t *no_read, int *combining);
63 unsigned long yaz_marc8r_3_conv(unsigned char *inp, size_t inbytesleft,
64 size_t *no_read, int *combining);
65 unsigned long yaz_marc8r_4_conv(unsigned char *inp, size_t inbytesleft,
66 size_t *no_read, int *combining);
67 unsigned long yaz_marc8r_5_conv(unsigned char *inp, size_t inbytesleft,
68 size_t *no_read, int *combining);
69 unsigned long yaz_marc8r_6_conv(unsigned char *inp, size_t inbytesleft,
70 size_t *no_read, int *combining);
71 unsigned long yaz_marc8r_7_conv(unsigned char *inp, size_t inbytesleft,
72 size_t *no_read, int *combining);
73 unsigned long yaz_marc8r_8_conv(unsigned char *inp, size_t inbytesleft,
74 size_t *no_read, int *combining);
75 unsigned long yaz_marc8r_9_conv(unsigned char *inp, size_t inbytesleft,
76 size_t *no_read, int *combining);
78 struct yaz_iconv_struct {
81 size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
82 size_t inbytesleft, size_t *no_read);
83 unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
84 size_t inbytesleft, size_t *no_read);
85 size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
86 char **outbuf, size_t *outbytesleft);
87 size_t (*flush_handle)(yaz_iconv_t cd,
88 char **outbuf, size_t *outbytesleft);
93 unsigned long comb_x[8];
94 size_t comb_no_read[8];
96 unsigned long unget_x;
100 unsigned long compose_char;
102 unsigned long write_marc8_comb_ch[8];
103 size_t write_marc8_comb_no;
104 unsigned write_marc8_second_half_char;
105 unsigned long write_marc8_last;
106 const char *write_marc8_page_chr;
110 unsigned long x1, x2;
113 { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
114 { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
115 { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
116 { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
117 { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
118 { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
119 /* no need for 0xc6 LATIN CAPITAL LETTER AE */
120 { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
121 { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
122 { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
123 { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
124 { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
125 { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
126 { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
127 { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
128 { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
129 { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
130 { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
131 { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
132 { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
133 { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
134 { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
135 /* omitted: 0xd7 MULTIPLICATION SIGN */
136 /* omitted: 0xd8 LATIN CAPITAL LETTER O WITH STROKE */
137 { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
138 { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
139 { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
140 { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
141 { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
142 /* omitted: 0xde LATIN CAPITAL LETTER THORN */
143 /* omitted: 0xdf LATIN SMALL LETTER SHARP S */
144 { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
145 { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
146 { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
147 { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
148 { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
149 { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
150 /* omitted: 0xe6 LATIN SMALL LETTER AE */
151 { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
152 { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
153 { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
154 { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
155 { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
156 { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
157 { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
158 { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
159 { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
160 /* omitted: 0xf0 LATIN SMALL LETTER ETH */
161 { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
162 { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
163 { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
164 { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
165 { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
166 { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
167 /* omitted: 0xf7 DIVISION SIGN */
168 /* omitted: 0xf8 LATIN SMALL LETTER O WITH STROKE */
169 { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
170 { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
171 { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
172 { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
173 { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
174 /* omitted: 0xfe LATIN SMALL LETTER THORN */
175 { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
180 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
181 size_t inbytesleft, size_t *no_read)
183 unsigned long x = inp[0];
189 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
190 size_t inbytesleft, size_t *no_read)
199 cd->my_errno = YAZ_ICONV_EINVAL;
202 if (inp[1] != 0xbb && inp[2] == 0xbf)
209 unsigned long yaz_read_UTF8_char(unsigned char *inp,
210 size_t inbytesleft, size_t *no_read,
215 *no_read = 0; /* by default */
221 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
223 *error = YAZ_ICONV_EILSEQ;
225 else if (inp[0] <= 0xdf && inbytesleft >= 2)
227 if ((inp[1] & 0xc0) == 0x80)
229 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
233 *error = YAZ_ICONV_EILSEQ;
236 *error = YAZ_ICONV_EILSEQ;
238 else if (inp[0] <= 0xef && inbytesleft >= 3)
240 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
242 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
247 *error = YAZ_ICONV_EILSEQ;
250 *error = YAZ_ICONV_EILSEQ;
252 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
254 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
255 && (inp[3] & 0xc0) == 0x80)
257 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
258 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
262 *error = YAZ_ICONV_EILSEQ;
265 *error = YAZ_ICONV_EILSEQ;
267 else if (inp[0] <= 0xfb && inbytesleft >= 5)
269 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
270 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
272 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
273 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
278 *error = YAZ_ICONV_EILSEQ;
281 *error = YAZ_ICONV_EILSEQ;
283 else if (inp[0] <= 0xfd && inbytesleft >= 6)
285 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
286 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
287 && (inp[5] & 0xc0) == 0x80)
289 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
290 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
291 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
295 *error = YAZ_ICONV_EILSEQ;
298 *error = YAZ_ICONV_EILSEQ;
301 *error = YAZ_ICONV_EINVAL; /* incomplete sentence */
306 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
307 size_t inbytesleft, size_t *no_read)
309 return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
312 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
313 size_t inbytesleft, size_t *no_read)
319 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
324 x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
330 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
331 size_t inbytesleft, size_t *no_read)
337 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
342 x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
349 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
350 size_t inbytesleft, size_t *no_read)
354 if (inbytesleft < sizeof(wchar_t))
356 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
362 memcpy (&wch, inp, sizeof(wch));
364 *no_read = sizeof(wch);
370 static unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
371 size_t inbytesleft, size_t *no_read)
378 while (inbytesleft > 0)
384 else if (*inp == 0xa3)
394 if (inbytesleft == 0)
396 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
401 case 0xe1: /* alpha small */
407 case 0xc1: /* alpha capital */
414 case 0xe2: /* Beta small */
417 case 0xc2: /* Beta capital */
421 case 0xe4: /* Gamma small */
424 case 0xc4: /* Gamma capital */
428 case 0xe5: /* Delta small */
431 case 0xc5: /* Delta capital */
434 case 0xe6: /* epsilon small */
440 case 0xc6: /* epsilon capital */
446 case 0xe9: /* Zeta small */
449 case 0xc9: /* Zeta capital */
452 case 0xea: /* Eta small */
458 case 0xca: /* Eta capital */
464 case 0xeb: /* Theta small */
467 case 0xcb: /* Theta capital */
470 case 0xec: /* Iota small */
482 case 0xcc: /* Iota capital */
491 case 0xed: /* Kappa small */
494 case 0xcd: /* Kappa capital */
497 case 0xee: /* Lambda small */
500 case 0xce: /* Lambda capital */
503 case 0xef: /* Mu small */
506 case 0xcf: /* Mu capital */
509 case 0xf0: /* Nu small */
512 case 0xd0: /* Nu capital */
515 case 0xf1: /* Xi small */
518 case 0xd1: /* Xi capital */
521 case 0xf2: /* Omicron small */
527 case 0xd2: /* Omicron capital */
533 case 0xf3: /* Pi small */
536 case 0xd3: /* Pi capital */
539 case 0xf5: /* Rho small */
542 case 0xd5: /* Rho capital */
545 case 0xf7: /* Sigma small (end of words) */
548 case 0xf6: /* Sigma small */
551 case 0xd6: /* Sigma capital */
554 case 0xf8: /* Tau small */
557 case 0xd8: /* Tau capital */
560 case 0xf9: /* Upsilon small */
572 case 0xd9: /* Upsilon capital */
581 case 0xfa: /* Phi small */
584 case 0xda: /* Phi capital */
587 case 0xfb: /* Chi small */
590 case 0xdb: /* Chi capital */
593 case 0xfc: /* Psi small */
596 case 0xdc: /* Psi capital */
599 case 0xfd: /* Omega small */
605 case 0xdd: /* Omega capital */
620 static size_t yaz_write_iso5428_1984(yaz_iconv_t cd, unsigned long x,
621 char **outbuf, size_t *outbytesleft)
624 unsigned char *out = (unsigned char*) *outbuf;
625 if (*outbytesleft < 3)
627 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
632 case 0x03ac : out[k++]=0xa2; out[k++]=0xe1; break;
633 case 0x03b1 : out[k++]=0xe1; break;
634 case 0x0386 : out[k++]=0xa2; out[k++]=0xc1; break;
635 case 0x0391 : out[k++]=0xc1; break;
636 case 0x03b2 : out[k++]=0xe2; break;
637 case 0x0392 : out[k++]=0xc2; break;
638 case 0x03b3 : out[k++]=0xe4; break;
639 case 0x0393 : out[k++]=0xc4; break;
640 case 0x03b4 : out[k++]=0xe5; break;
641 case 0x0394 : out[k++]=0xc5; break;
642 case 0x03ad : out[k++]=0xa2; out[k++]=0xe6; break;
643 case 0x03b5 : out[k++]=0xe6; break;
644 case 0x0388 : out[k++]=0xa2; out[k++]=0xc6; break;
645 case 0x0395 : out[k++]=0xc6; break;
646 case 0x03b6 : out[k++]=0xe9; break;
647 case 0x0396 : out[k++]=0xc9; break;
648 case 0x03ae : out[k++]=0xa2; out[k++]=0xea; break;
649 case 0x03b7 : out[k++]=0xea; break;
650 case 0x0389 : out[k++]=0xa2; out[k++]=0xca; break;
651 case 0x0397 : out[k++]=0xca; break;
652 case 0x03b8 : out[k++]=0xeb; break;
653 case 0x0398 : out[k++]=0xcb; break;
654 case 0x0390 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xec; break;
655 case 0x03af : out[k++]=0xa2; out[k++]=0xec; break;
656 case 0x03ca : out[k++]=0xa3; out[k++]=0xec; break;
657 case 0x03b9 : out[k++]=0xec; break;
658 case 0x038a : out[k++]=0xa2; out[k++]=0xcc; break;
659 case 0x03aa : out[k++]=0xa3; out[k++]=0xcc; break;
660 case 0x0399 : out[k++]=0xcc; break;
661 case 0x03ba : out[k++]=0xed; break;
662 case 0x039a : out[k++]=0xcd; break;
663 case 0x03bb : out[k++]=0xee; break;
664 case 0x039b : out[k++]=0xce; break;
665 case 0x03bc : out[k++]=0xef; break;
666 case 0x039c : out[k++]=0xcf; break;
667 case 0x03bd : out[k++]=0xf0; break;
668 case 0x039d : out[k++]=0xd0; break;
669 case 0x03be : out[k++]=0xf1; break;
670 case 0x039e : out[k++]=0xd1; break;
671 case 0x03cc : out[k++]=0xa2; out[k++]=0xf2; break;
672 case 0x03bf : out[k++]=0xf2; break;
673 case 0x038c : out[k++]=0xa2; out[k++]=0xd2; break;
674 case 0x039f : out[k++]=0xd2; break;
675 case 0x03c0 : out[k++]=0xf3; break;
676 case 0x03a0 : out[k++]=0xd3; break;
677 case 0x03c1 : out[k++]=0xf5; break;
678 case 0x03a1 : out[k++]=0xd5; break;
679 case 0x03c2 : out[k++]=0xf7; break;
680 case 0x03c3 : out[k++]=0xf6; break;
681 case 0x03a3 : out[k++]=0xd6; break;
682 case 0x03c4 : out[k++]=0xf8; break;
683 case 0x03a4 : out[k++]=0xd8; break;
684 case 0x03b0 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xf9; break;
685 case 0x03cd : out[k++]=0xa2; out[k++]=0xf9; break;
686 case 0x03cb : out[k++]=0xa3; out[k++]=0xf9; break;
687 case 0x03c5 : out[k++]=0xf9; break;
688 case 0x038e : out[k++]=0xa2; out[k++]=0xd9; break;
689 case 0x03ab : out[k++]=0xa3; out[k++]=0xd9; break;
690 case 0x03a5 : out[k++]=0xd9; break;
691 case 0x03c6 : out[k++]=0xfa; break;
692 case 0x03a6 : out[k++]=0xda; break;
693 case 0x03c7 : out[k++]=0xfb; break;
694 case 0x03a7 : out[k++]=0xdb; break;
695 case 0x03c8 : out[k++]=0xfc; break;
696 case 0x03a8 : out[k++]=0xdc; break;
697 case 0x03ce : out[k++]=0xa2; out[k++]=0xfd; break;
698 case 0x03c9 : out[k++]=0xfd; break;
699 case 0x038f : out[k++]=0xa2; out[k++]=0xdd; break;
700 case 0x03a9 : out[k++]=0xdd; break;
704 cd->my_errno = YAZ_ICONV_EILSEQ;
715 static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
716 size_t inbytesleft, size_t *no_read)
724 while (inbytesleft > 0)
730 else if (*inp == 0x9e)
734 else if (*inp == 0x9f)
744 if (inbytesleft == 0)
746 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
974 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
975 char **outbuf, size_t *outbytesleft)
978 unsigned char *out = (unsigned char*) *outbuf;
979 if (*outbytesleft < 3)
981 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
986 case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
987 case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
988 case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
989 case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
990 case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
991 case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
992 case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
993 case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
994 case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
995 case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
996 case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
997 case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
998 case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
999 case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
1000 case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
1001 case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
1002 case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
1003 case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
1004 case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
1005 case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
1006 case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
1007 case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
1008 case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
1009 case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
1010 case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
1011 case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
1012 case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
1013 case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
1014 case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
1015 case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
1016 case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
1017 case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
1018 case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
1019 case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
1020 case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
1021 case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
1022 case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
1023 case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
1024 case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
1025 case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
1026 case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
1027 case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
1028 case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
1029 case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
1030 case 0x03b1 : out[k++]=0x81; break;
1031 case 0x03b2 : out[k++]=0x82; break;
1032 case 0x03b3 : out[k++]=0x83; break;
1033 case 0x03b4 : out[k++]=0x84; break;
1034 case 0x03b5 : out[k++]=0x85; break;
1035 case 0x03b6 : out[k++]=0x86; break;
1036 case 0x03b7 : out[k++]=0x87; break;
1037 case 0x03b8 : out[k++]=0x88; break;
1038 case 0x03b9 : out[k++]=0x89; break;
1039 case 0x03ba : out[k++]=0x8a; break;
1040 case 0x03bb : out[k++]=0x8b; break;
1041 case 0x03bc : out[k++]=0x8c; break;
1042 case 0x03bd : out[k++]=0x8d; break;
1043 case 0x03be : out[k++]=0x8e; break;
1044 case 0x03bf : out[k++]=0x8f; break;
1045 case 0x03c0 : out[k++]=0x90; break;
1046 case 0x03c1 : out[k++]=0x91; break;
1047 case 0x03c2 : out[k++]=0x92; break;
1048 case 0x03c3 : out[k++]=0x93; break;
1049 case 0x03c4 : out[k++]=0x94; break;
1050 case 0x03c5 : out[k++]=0x95; break;
1051 case 0x03c6 : out[k++]=0x96; break;
1052 case 0x03c7 : out[k++]=0x96; break;
1053 case 0x03c8 : out[k++]=0x98; break;
1054 case 0x03c9 : out[k++]=0x99; break;
1058 cd->my_errno = YAZ_ICONV_EILSEQ;
1070 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
1071 size_t inbytesleft, size_t *no_read,
1074 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
1075 size_t inbytesleft, size_t *no_read)
1078 if (cd->comb_offset < cd->comb_size)
1080 *no_read = cd->comb_no_read[cd->comb_offset];
1081 x = cd->comb_x[cd->comb_offset];
1083 /* special case for double-diacritic combining characters,
1084 INVERTED BREVE and DOUBLE TILDE.
1085 We'll increment the no_read counter by 1, since we want to skip over
1086 the processing of the closing ligature character
1088 /* this code is no longer necessary.. our handlers code in
1089 yaz_marc8_?_conv (generated by charconv.tcl) now returns
1090 0 and no_read=1 when a sequence does not match the input.
1091 The SECOND HALFs in codetables.xml produces a non-existant
1092 entry in the conversion trie.. Hence when met, the input byte is
1093 skipped as it should (in yaz_iconv)
1096 if (x == 0x0361 || x == 0x0360)
1103 cd->comb_offset = 0;
1104 for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
1108 if (inbytesleft == 0 && cd->comb_size)
1110 cd->my_errno = YAZ_ICONV_EINVAL;
1115 x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
1118 cd->comb_x[cd->comb_size] = x;
1119 cd->comb_no_read[cd->comb_size] = *no_read;
1121 inbytesleft = inbytesleft - *no_read;
1126 static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
1127 size_t inbytesleft, size_t *no_read)
1129 unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
1130 if (x && cd->comb_size == 1)
1132 /* For MARC8s we try to get a Latin-1 page code out of it */
1134 for (i = 0; latin1_comb[i].x1; i++)
1135 if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
1137 *no_read += cd->comb_no_read[0];
1139 x = latin1_comb[i].y;
1146 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
1147 size_t inbytesleft, size_t *no_read,
1151 while(inbytesleft >= 1 && inp[0] == 27)
1153 size_t inbytesleft0 = inbytesleft;
1156 while(inbytesleft > 0 && strchr("(,$!)-", *inp))
1161 if (inbytesleft <= 0)
1164 cd->my_errno = YAZ_ICONV_EINVAL;
1167 cd->marc8_esc_mode = *inp++;
1169 (*no_read) += inbytesleft0 - inbytesleft;
1171 if (inbytesleft <= 0)
1176 size_t no_read_sub = 0;
1179 switch(cd->marc8_esc_mode)
1181 case 'B': /* Basic ASCII */
1182 case 'E': /* ANSEL */
1183 case 's': /* ASCII */
1184 x = yaz_marc8_1_conv(inp, inbytesleft, &no_read_sub, comb);
1186 case 'g': /* Greek */
1187 x = yaz_marc8_2_conv(inp, inbytesleft, &no_read_sub, comb);
1189 case 'b': /* Subscripts */
1190 x = yaz_marc8_3_conv(inp, inbytesleft, &no_read_sub, comb);
1192 case 'p': /* Superscripts */
1193 x = yaz_marc8_4_conv(inp, inbytesleft, &no_read_sub, comb);
1195 case '2': /* Basic Hebrew */
1196 x = yaz_marc8_5_conv(inp, inbytesleft, &no_read_sub, comb);
1198 case 'N': /* Basic Cyrillic */
1199 case 'Q': /* Extended Cyrillic */
1200 x = yaz_marc8_6_conv(inp, inbytesleft, &no_read_sub, comb);
1202 case '3': /* Basic Arabic */
1203 case '4': /* Extended Arabic */
1204 x = yaz_marc8_7_conv(inp, inbytesleft, &no_read_sub, comb);
1206 case 'S': /* Greek */
1207 x = yaz_marc8_8_conv(inp, inbytesleft, &no_read_sub, comb);
1209 case '1': /* Chinese, Japanese, Korean (EACC) */
1210 x = yaz_marc8_9_conv(inp, inbytesleft, &no_read_sub, comb);
1214 cd->my_errno = YAZ_ICONV_EILSEQ;
1217 *no_read += no_read_sub;
1222 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
1223 char **outbuf, size_t *outbytesleft)
1225 return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
1228 size_t yaz_write_UTF8_char(unsigned long x,
1229 char **outbuf, size_t *outbytesleft,
1232 unsigned char *outp = (unsigned char *) *outbuf;
1234 if (x <= 0x7f && *outbytesleft >= 1)
1236 *outp++ = (unsigned char) x;
1239 else if (x <= 0x7ff && *outbytesleft >= 2)
1241 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
1242 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1243 (*outbytesleft) -= 2;
1245 else if (x <= 0xffff && *outbytesleft >= 3)
1247 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
1248 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1249 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1250 (*outbytesleft) -= 3;
1252 else if (x <= 0x1fffff && *outbytesleft >= 4)
1254 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
1255 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1256 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1257 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1258 (*outbytesleft) -= 4;
1260 else if (x <= 0x3ffffff && *outbytesleft >= 5)
1262 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
1263 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1264 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1265 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1266 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1267 (*outbytesleft) -= 5;
1269 else if (*outbytesleft >= 6)
1271 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
1272 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
1273 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1274 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1275 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1276 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1277 (*outbytesleft) -= 6;
1281 *error = YAZ_ICONV_E2BIG; /* not room for output */
1282 return (size_t)(-1);
1284 *outbuf = (char *) outp;
1288 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
1289 char **outbuf, size_t *outbytesleft)
1291 /* list of two char unicode sequence that, when combined, are
1292 equivalent to single unicode chars that can be represented in
1294 Regular iconv on Linux at least does not seem to convert these,
1295 but since MARC-8 to UTF-8 generates these composed sequence
1296 we get a better chance of a successful MARC-8 -> ISO-8859-1
1298 unsigned char *outp = (unsigned char *) *outbuf;
1300 if (cd->compose_char)
1303 for (i = 0; latin1_comb[i].x1; i++)
1304 if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
1306 x = latin1_comb[i].y;
1309 if (*outbytesleft < 1)
1310 { /* no room. Retain compose_char and bail out */
1311 cd->my_errno = YAZ_ICONV_E2BIG;
1312 return (size_t)(-1);
1314 if (!latin1_comb[i].x1)
1315 { /* not found. Just write compose_char */
1316 *outp++ = (unsigned char) cd->compose_char;
1318 *outbuf = (char *) outp;
1320 /* compose_char used so reset it. x now holds current char */
1321 cd->compose_char = 0;
1324 if (x > 32 && x < 127 && cd->compose_char == 0)
1326 cd->compose_char = x;
1329 else if (x > 255 || x < 1)
1331 cd->my_errno = YAZ_ICONV_EILSEQ;
1334 else if (*outbytesleft < 1)
1336 cd->my_errno = YAZ_ICONV_E2BIG;
1337 return (size_t)(-1);
1339 *outp++ = (unsigned char) x;
1341 *outbuf = (char *) outp;
1345 static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
1346 char **outbuf, size_t *outbytesleft)
1348 if (cd->compose_char)
1350 unsigned char *outp = (unsigned char *) *outbuf;
1351 if (*outbytesleft < 1)
1353 cd->my_errno = YAZ_ICONV_E2BIG;
1354 return (size_t)(-1);
1356 *outp++ = (unsigned char) cd->compose_char;
1358 *outbuf = (char *) outp;
1359 cd->compose_char = 0;
1364 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
1365 char **outbuf, size_t *outbytesleft)
1367 unsigned char *outp = (unsigned char *) *outbuf;
1368 if (*outbytesleft >= 4)
1370 *outp++ = (unsigned char) (x>>24);
1371 *outp++ = (unsigned char) (x>>16);
1372 *outp++ = (unsigned char) (x>>8);
1373 *outp++ = (unsigned char) x;
1374 (*outbytesleft) -= 4;
1378 cd->my_errno = YAZ_ICONV_E2BIG;
1379 return (size_t)(-1);
1381 *outbuf = (char *) outp;
1385 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
1386 char **outbuf, size_t *outbytesleft)
1388 unsigned char *outp = (unsigned char *) *outbuf;
1389 if (*outbytesleft >= 4)
1391 *outp++ = (unsigned char) x;
1392 *outp++ = (unsigned char) (x>>8);
1393 *outp++ = (unsigned char) (x>>16);
1394 *outp++ = (unsigned char) (x>>24);
1395 (*outbytesleft) -= 4;
1399 cd->my_errno = YAZ_ICONV_E2BIG;
1400 return (size_t)(-1);
1402 *outbuf = (char *) outp;
1406 static unsigned long lookup_marc8(yaz_iconv_t cd,
1407 unsigned long x, int *comb,
1408 const char **page_chr)
1411 char *utf8_outbuf = utf8_buf;
1412 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
1414 r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
1415 if (r == (size_t)(-1))
1417 cd->my_errno = YAZ_ICONV_EILSEQ;
1423 size_t inbytesleft, no_read_sub = 0;
1426 *utf8_outbuf = '\0';
1427 inp = (unsigned char *) utf8_buf;
1428 inbytesleft = strlen(utf8_buf);
1430 x = yaz_marc8r_1_conv(inp, inbytesleft, &no_read_sub, comb);
1433 *page_chr = "\033(B";
1436 x = yaz_marc8r_2_conv(inp, inbytesleft, &no_read_sub, comb);
1439 *page_chr = "\033g";
1442 x = yaz_marc8r_3_conv(inp, inbytesleft, &no_read_sub, comb);
1445 *page_chr = "\033b";
1448 x = yaz_marc8r_4_conv(inp, inbytesleft, &no_read_sub, comb);
1451 *page_chr = "\033p";
1454 x = yaz_marc8r_5_conv(inp, inbytesleft, &no_read_sub, comb);
1457 *page_chr = "\033(2";
1460 x = yaz_marc8r_6_conv(inp, inbytesleft, &no_read_sub, comb);
1463 *page_chr = "\033(N";
1466 x = yaz_marc8r_7_conv(inp, inbytesleft, &no_read_sub, comb);
1469 *page_chr = "\033(3";
1472 x = yaz_marc8r_8_conv(inp, inbytesleft, &no_read_sub, comb);
1475 *page_chr = "\033(S";
1478 x = yaz_marc8r_9_conv(inp, inbytesleft, &no_read_sub, comb);
1481 *page_chr = "\033$1";
1484 cd->my_errno = YAZ_ICONV_EILSEQ;
1489 static size_t flush_combos(yaz_iconv_t cd,
1490 char **outbuf, size_t *outbytesleft)
1492 unsigned long y = cd->write_marc8_last;
1495 size_t i, out_no = 0;
1500 byte = (unsigned char )((y>>16) & 0xff);
1502 out_buf[out_no++] = byte;
1503 byte = (unsigned char)((y>>8) & 0xff);
1505 out_buf[out_no++] = byte;
1506 byte = (unsigned char )(y & 0xff);
1508 out_buf[out_no++] = byte;
1510 if (out_no + cd->write_marc8_comb_no + 1 > *outbytesleft)
1512 cd->my_errno = YAZ_ICONV_E2BIG;
1513 return (size_t) (-1);
1516 for (i = 0; i < cd->write_marc8_comb_no; i++)
1518 /* all MARC-8 combined characters are simple bytes */
1519 byte = (unsigned char )(cd->write_marc8_comb_ch[i]);
1520 *(*outbuf)++ = byte;
1523 memcpy(*outbuf, out_buf, out_no);
1525 (*outbytesleft) -= out_no;
1526 if (cd->write_marc8_second_half_char)
1528 *(*outbuf)++ = cd->write_marc8_second_half_char;
1532 cd->write_marc8_last = 0;
1533 cd->write_marc8_comb_no = 0;
1534 cd->write_marc8_second_half_char = 0;
1538 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
1539 char **outbuf, size_t *outbytesleft,
1540 const char *page_chr)
1542 const char *old_page_chr = cd->write_marc8_page_chr;
1543 if (strcmp(page_chr, old_page_chr))
1546 const char *page_out = page_chr;
1548 if (*outbytesleft < 8)
1550 cd->my_errno = YAZ_ICONV_E2BIG;
1552 return (size_t) (-1);
1554 cd->write_marc8_page_chr = page_chr;
1556 if (!strcmp(old_page_chr, "\033p")
1557 || !strcmp(old_page_chr, "\033g")
1558 || !strcmp(old_page_chr, "\033b"))
1560 /* Technique 1 leave */
1562 if (strcmp(page_chr, "\033(B")) /* Not going ASCII page? */
1564 /* Must leave script + enter new page */
1565 plen = strlen(page_out);
1566 memcpy(*outbuf, page_out, plen);
1568 (*outbytesleft) -= plen;
1569 page_out = page_chr;
1572 plen = strlen(page_out);
1573 memcpy(*outbuf, page_out, plen);
1575 (*outbytesleft) -= plen;
1581 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
1582 char **outbuf, size_t *outbytesleft)
1585 const char *page_chr = 0;
1586 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
1589 return (size_t) (-1);
1594 cd->write_marc8_second_half_char = 0xEC;
1595 else if (x == 0x0360)
1596 cd->write_marc8_second_half_char = 0xFB;
1598 if (cd->write_marc8_comb_no < 6)
1599 cd->write_marc8_comb_ch[cd->write_marc8_comb_no++] = y;
1603 size_t r = flush_combos(cd, outbuf, outbytesleft);
1607 r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, page_chr);
1610 cd->write_marc8_last = y;
1615 static size_t yaz_flush_marc8(yaz_iconv_t cd,
1616 char **outbuf, size_t *outbytesleft)
1618 size_t r = flush_combos(cd, outbuf, outbytesleft);
1621 return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, "\033(B");
1624 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
1625 char **outbuf, size_t *outbytesleft)
1628 for (i = 0; latin1_comb[i].x1; i++)
1630 if (x == latin1_comb[i].y)
1633 /* save the output pointers .. */
1634 char *outbuf0 = *outbuf;
1635 size_t outbytesleft0 = *outbytesleft;
1636 int last_ch = cd->write_marc8_last;
1638 r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
1639 outbuf, outbytesleft);
1642 r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
1643 outbuf, outbytesleft);
1644 if (r && cd->my_errno == YAZ_ICONV_E2BIG)
1646 /* not enough room. reset output to original values */
1648 *outbytesleft = outbytesleft0;
1649 cd->write_marc8_last = last_ch;
1654 return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
1659 static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
1660 char **outbuf, size_t *outbytesleft)
1662 unsigned char *outp = (unsigned char *) *outbuf;
1664 if (*outbytesleft >= sizeof(wchar_t))
1667 memcpy(outp, &wch, sizeof(wch));
1668 outp += sizeof(wch);
1669 (*outbytesleft) -= sizeof(wch);
1673 cd->my_errno = YAZ_ICONV_E2BIG;
1674 return (size_t)(-1);
1676 *outbuf = (char *) outp;
1681 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
1683 return cd->read_handle && cd->write_handle;
1686 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
1688 yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
1690 cd->write_handle = 0;
1691 cd->read_handle = 0;
1692 cd->init_handle = 0;
1693 cd->flush_handle = 0;
1694 cd->my_errno = YAZ_ICONV_UNKNOWN;
1696 /* a useful hack: if fromcode has leading @,
1697 the library not use YAZ's own conversions .. */
1698 if (fromcode[0] == '@')
1702 if (!yaz_matchstr(fromcode, "UTF8"))
1704 cd->read_handle = yaz_read_UTF8;
1705 cd->init_handle = yaz_init_UTF8;
1707 else if (!yaz_matchstr(fromcode, "ISO88591"))
1708 cd->read_handle = yaz_read_ISO8859_1;
1709 else if (!yaz_matchstr(fromcode, "UCS4"))
1710 cd->read_handle = yaz_read_UCS4;
1711 else if (!yaz_matchstr(fromcode, "UCS4LE"))
1712 cd->read_handle = yaz_read_UCS4LE;
1713 else if (!yaz_matchstr(fromcode, "MARC8"))
1714 cd->read_handle = yaz_read_marc8;
1715 else if (!yaz_matchstr(fromcode, "MARC8s"))
1716 cd->read_handle = yaz_read_marc8s;
1717 else if (!yaz_matchstr(fromcode, "advancegreek"))
1718 cd->read_handle = yaz_read_advancegreek;
1719 else if (!yaz_matchstr(fromcode, "iso54281984"))
1720 cd->read_handle = yaz_read_iso5428_1984;
1721 else if (!yaz_matchstr(fromcode, "iso5428:1984"))
1722 cd->read_handle = yaz_read_iso5428_1984;
1724 else if (!yaz_matchstr(fromcode, "WCHAR_T"))
1725 cd->read_handle = yaz_read_wchar_t;
1728 if (!yaz_matchstr(tocode, "UTF8"))
1729 cd->write_handle = yaz_write_UTF8;
1730 else if (!yaz_matchstr(tocode, "ISO88591"))
1732 cd->write_handle = yaz_write_ISO8859_1;
1733 cd->flush_handle = yaz_flush_ISO8859_1;
1735 else if (!yaz_matchstr (tocode, "UCS4"))
1736 cd->write_handle = yaz_write_UCS4;
1737 else if (!yaz_matchstr(tocode, "UCS4LE"))
1738 cd->write_handle = yaz_write_UCS4LE;
1739 else if (!yaz_matchstr(tocode, "MARC8"))
1741 cd->write_handle = yaz_write_marc8;
1742 cd->flush_handle = yaz_flush_marc8;
1744 else if (!yaz_matchstr(tocode, "MARC8s"))
1746 cd->write_handle = yaz_write_marc8;
1747 cd->flush_handle = yaz_flush_marc8;
1749 else if (!yaz_matchstr(tocode, "advancegreek"))
1751 cd->write_handle = yaz_write_advancegreek;
1753 else if (!yaz_matchstr(tocode, "iso54281984"))
1755 cd->write_handle = yaz_write_iso5428_1984;
1757 else if (!yaz_matchstr(tocode, "iso5428:1984"))
1759 cd->write_handle = yaz_write_iso5428_1984;
1762 else if (!yaz_matchstr(tocode, "WCHAR_T"))
1763 cd->write_handle = yaz_write_wchar_t;
1768 if (!cd->read_handle || !cd->write_handle)
1770 cd->iconv_cd = iconv_open (tocode, fromcode);
1771 if (cd->iconv_cd == (iconv_t) (-1))
1778 if (!cd->read_handle || !cd->write_handle)
1788 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
1789 char **outbuf, size_t *outbytesleft)
1798 iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
1799 if (r == (size_t)(-1))
1801 switch (yaz_errno())
1804 cd->my_errno = YAZ_ICONV_E2BIG;
1807 cd->my_errno = YAZ_ICONV_EINVAL;
1810 cd->my_errno = YAZ_ICONV_EILSEQ;
1813 cd->my_errno = YAZ_ICONV_UNKNOWN;
1825 cd->my_errno = YAZ_ICONV_UNKNOWN;
1826 cd->marc8_esc_mode = 'B';
1828 cd->comb_offset = cd->comb_size = 0;
1829 cd->compose_char = 0;
1831 cd->write_marc8_comb_no = 0;
1832 cd->write_marc8_second_half_char = 0;
1833 cd->write_marc8_last = 0;
1834 cd->write_marc8_page_chr = "\033(B";
1842 if (cd->init_handle && inbuf && *inbuf)
1845 size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
1846 *inbytesleft, &no_read);
1849 if (cd->my_errno == YAZ_ICONV_EINVAL)
1854 *inbytesleft -= no_read;
1860 if (!inbuf || !*inbuf)
1862 if (outbuf && *outbuf)
1865 r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
1866 if (cd->flush_handle)
1867 r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
1882 no_read = cd->no_read_x;
1886 if (*inbytesleft == 0)
1888 r = *inbuf - inbuf0;
1891 x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
1901 r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
1904 /* unable to write it. save it because read_handle cannot
1906 if (cd->my_errno == YAZ_ICONV_E2BIG)
1909 cd->no_read_x = no_read;
1915 *inbytesleft -= no_read;
1916 (*inbuf) += no_read;
1921 int yaz_iconv_error (yaz_iconv_t cd)
1923 return cd->my_errno;
1926 int yaz_iconv_close (yaz_iconv_t cd)
1930 iconv_close (cd->iconv_cd);
1939 * indent-tabs-mode: nil
1941 * vim: shiftwidth=4 tabstop=8 expandtab