2 * Copyright (C) 1995-2007, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: siconv.c,v 1.43 2007-09-17 19:18:27 adam Exp $
9 * \brief Implements simple ICONV
11 * This implements an interface similar to that of iconv and
12 * is used by YAZ to interface with iconv (if present).
13 * For systems where iconv is not present, this layer
14 * provides a few important conversions: UTF-8, MARC-8, Latin-1.
17 * http://www.loc.gov/marc/specifications/speccharmarc8.html
37 #include <yaz/yaz-util.h>
39 unsigned long yaz_marc8_42_conv(unsigned char *inp, size_t inbytesleft,
40 size_t *no_read, int *combining);
41 unsigned long yaz_marc8_45_conv(unsigned char *inp, size_t inbytesleft,
42 size_t *no_read, int *combining);
43 unsigned long yaz_marc8_67_conv(unsigned char *inp, size_t inbytesleft,
44 size_t *no_read, int *combining);
45 unsigned long yaz_marc8_62_conv(unsigned char *inp, size_t inbytesleft,
46 size_t *no_read, int *combining);
47 unsigned long yaz_marc8_70_conv(unsigned char *inp, size_t inbytesleft,
48 size_t *no_read, int *combining);
49 unsigned long yaz_marc8_32_conv(unsigned char *inp, size_t inbytesleft,
50 size_t *no_read, int *combining);
51 unsigned long yaz_marc8_4E_conv(unsigned char *inp, size_t inbytesleft,
52 size_t *no_read, int *combining);
53 unsigned long yaz_marc8_51_conv(unsigned char *inp, size_t inbytesleft,
54 size_t *no_read, int *combining);
55 unsigned long yaz_marc8_33_conv(unsigned char *inp, size_t inbytesleft,
56 size_t *no_read, int *combining);
57 unsigned long yaz_marc8_34_conv(unsigned char *inp, size_t inbytesleft,
58 size_t *no_read, int *combining);
59 unsigned long yaz_marc8_53_conv(unsigned char *inp, size_t inbytesleft,
60 size_t *no_read, int *combining);
61 unsigned long yaz_marc8_31_conv(unsigned char *inp, size_t inbytesleft,
62 size_t *no_read, int *combining);
65 unsigned long yaz_marc8r_42_conv(unsigned char *inp, size_t inbytesleft,
66 size_t *no_read, int *combining);
67 unsigned long yaz_marc8r_45_conv(unsigned char *inp, size_t inbytesleft,
68 size_t *no_read, int *combining);
69 unsigned long yaz_marc8r_67_conv(unsigned char *inp, size_t inbytesleft,
70 size_t *no_read, int *combining);
71 unsigned long yaz_marc8r_62_conv(unsigned char *inp, size_t inbytesleft,
72 size_t *no_read, int *combining);
73 unsigned long yaz_marc8r_70_conv(unsigned char *inp, size_t inbytesleft,
74 size_t *no_read, int *combining);
75 unsigned long yaz_marc8r_32_conv(unsigned char *inp, size_t inbytesleft,
76 size_t *no_read, int *combining);
77 unsigned long yaz_marc8r_4E_conv(unsigned char *inp, size_t inbytesleft,
78 size_t *no_read, int *combining);
79 unsigned long yaz_marc8r_51_conv(unsigned char *inp, size_t inbytesleft,
80 size_t *no_read, int *combining);
81 unsigned long yaz_marc8r_33_conv(unsigned char *inp, size_t inbytesleft,
82 size_t *no_read, int *combining);
83 unsigned long yaz_marc8r_34_conv(unsigned char *inp, size_t inbytesleft,
84 size_t *no_read, int *combining);
85 unsigned long yaz_marc8r_53_conv(unsigned char *inp, size_t inbytesleft,
86 size_t *no_read, int *combining);
87 unsigned long yaz_marc8r_31_conv(unsigned char *inp, size_t inbytesleft,
88 size_t *no_read, int *combining);
90 struct yaz_iconv_struct {
93 size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
94 size_t inbytesleft, size_t *no_read);
95 unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
96 size_t inbytesleft, size_t *no_read);
97 size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
98 char **outbuf, size_t *outbytesleft);
99 size_t (*flush_handle)(yaz_iconv_t cd,
100 char **outbuf, size_t *outbytesleft);
105 unsigned long comb_x[8];
106 size_t comb_no_read[8];
108 unsigned long unget_x;
112 unsigned long compose_char;
114 unsigned long write_marc8_comb_ch[8];
115 size_t write_marc8_comb_no;
116 unsigned write_marc8_second_half_char;
117 unsigned long write_marc8_last;
118 const char *write_marc8_page_chr;
122 unsigned long x1, x2;
125 { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
126 { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
127 { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
128 { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
129 { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
130 { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
131 /* no need for 0xc6 LATIN CAPITAL LETTER AE */
132 { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
133 { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
134 { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
135 { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
136 { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
137 { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
138 { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
139 { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
140 { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
141 { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
142 { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
143 { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
144 { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
145 { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
146 { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
147 /* omitted: 0xd7 MULTIPLICATION SIGN */
148 /* omitted: 0xd8 LATIN CAPITAL LETTER O WITH STROKE */
149 { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
150 { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
151 { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
152 { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
153 { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
154 /* omitted: 0xde LATIN CAPITAL LETTER THORN */
155 /* omitted: 0xdf LATIN SMALL LETTER SHARP S */
156 { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
157 { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
158 { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
159 { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
160 { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
161 { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
162 /* omitted: 0xe6 LATIN SMALL LETTER AE */
163 { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
164 { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
165 { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
166 { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
167 { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
168 { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
169 { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
170 { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
171 { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
172 /* omitted: 0xf0 LATIN SMALL LETTER ETH */
173 { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
174 { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
175 { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
176 { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
177 { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
178 { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
179 /* omitted: 0xf7 DIVISION SIGN */
180 /* omitted: 0xf8 LATIN SMALL LETTER O WITH STROKE */
181 { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
182 { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
183 { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
184 { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
185 { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
186 /* omitted: 0xfe LATIN SMALL LETTER THORN */
187 { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
192 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
193 size_t inbytesleft, size_t *no_read)
195 unsigned long x = inp[0];
201 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
202 size_t inbytesleft, size_t *no_read)
211 cd->my_errno = YAZ_ICONV_EINVAL;
214 if (inp[1] != 0xbb && inp[2] == 0xbf)
221 unsigned long yaz_read_UTF8_char(unsigned char *inp,
222 size_t inbytesleft, size_t *no_read,
227 *no_read = 0; /* by default */
233 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
235 *error = YAZ_ICONV_EILSEQ;
237 else if (inp[0] <= 0xdf && inbytesleft >= 2)
239 if ((inp[1] & 0xc0) == 0x80)
241 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
245 *error = YAZ_ICONV_EILSEQ;
248 *error = YAZ_ICONV_EILSEQ;
250 else if (inp[0] <= 0xef && inbytesleft >= 3)
252 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
254 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
259 *error = YAZ_ICONV_EILSEQ;
262 *error = YAZ_ICONV_EILSEQ;
264 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
266 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
267 && (inp[3] & 0xc0) == 0x80)
269 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
270 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
274 *error = YAZ_ICONV_EILSEQ;
277 *error = YAZ_ICONV_EILSEQ;
279 else if (inp[0] <= 0xfb && inbytesleft >= 5)
281 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
282 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
284 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
285 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
290 *error = YAZ_ICONV_EILSEQ;
293 *error = YAZ_ICONV_EILSEQ;
295 else if (inp[0] <= 0xfd && inbytesleft >= 6)
297 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
298 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
299 && (inp[5] & 0xc0) == 0x80)
301 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
302 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
303 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
307 *error = YAZ_ICONV_EILSEQ;
310 *error = YAZ_ICONV_EILSEQ;
313 *error = YAZ_ICONV_EINVAL; /* incomplete sentence */
318 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
319 size_t inbytesleft, size_t *no_read)
321 return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
324 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
325 size_t inbytesleft, size_t *no_read)
331 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
336 x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
342 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
343 size_t inbytesleft, size_t *no_read)
349 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
354 x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
361 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
362 size_t inbytesleft, size_t *no_read)
366 if (inbytesleft < sizeof(wchar_t))
368 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
374 memcpy (&wch, inp, sizeof(wch));
376 *no_read = sizeof(wch);
382 static unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
383 size_t inbytesleft, size_t *no_read)
390 while (inbytesleft > 0)
396 else if (*inp == 0xa3)
406 if (inbytesleft == 0)
408 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
413 case 0xe1: /* alpha small */
419 case 0xc1: /* alpha capital */
426 case 0xe2: /* Beta small */
429 case 0xc2: /* Beta capital */
433 case 0xe4: /* Gamma small */
436 case 0xc4: /* Gamma capital */
440 case 0xe5: /* Delta small */
443 case 0xc5: /* Delta capital */
446 case 0xe6: /* epsilon small */
452 case 0xc6: /* epsilon capital */
458 case 0xe9: /* Zeta small */
461 case 0xc9: /* Zeta capital */
464 case 0xea: /* Eta small */
470 case 0xca: /* Eta capital */
476 case 0xeb: /* Theta small */
479 case 0xcb: /* Theta capital */
482 case 0xec: /* Iota small */
494 case 0xcc: /* Iota capital */
503 case 0xed: /* Kappa small */
506 case 0xcd: /* Kappa capital */
509 case 0xee: /* Lambda small */
512 case 0xce: /* Lambda capital */
515 case 0xef: /* Mu small */
518 case 0xcf: /* Mu capital */
521 case 0xf0: /* Nu small */
524 case 0xd0: /* Nu capital */
527 case 0xf1: /* Xi small */
530 case 0xd1: /* Xi capital */
533 case 0xf2: /* Omicron small */
539 case 0xd2: /* Omicron capital */
545 case 0xf3: /* Pi small */
548 case 0xd3: /* Pi capital */
551 case 0xf5: /* Rho small */
554 case 0xd5: /* Rho capital */
557 case 0xf7: /* Sigma small (end of words) */
560 case 0xf6: /* Sigma small */
563 case 0xd6: /* Sigma capital */
566 case 0xf8: /* Tau small */
569 case 0xd8: /* Tau capital */
572 case 0xf9: /* Upsilon small */
584 case 0xd9: /* Upsilon capital */
593 case 0xfa: /* Phi small */
596 case 0xda: /* Phi capital */
599 case 0xfb: /* Chi small */
602 case 0xdb: /* Chi capital */
605 case 0xfc: /* Psi small */
608 case 0xdc: /* Psi capital */
611 case 0xfd: /* Omega small */
617 case 0xdd: /* Omega capital */
632 static size_t yaz_write_iso5428_1984(yaz_iconv_t cd, unsigned long x,
633 char **outbuf, size_t *outbytesleft)
636 unsigned char *out = (unsigned char*) *outbuf;
637 if (*outbytesleft < 3)
639 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
644 case 0x03ac : out[k++]=0xa2; out[k++]=0xe1; break;
645 case 0x03b1 : out[k++]=0xe1; break;
646 case 0x0386 : out[k++]=0xa2; out[k++]=0xc1; break;
647 case 0x0391 : out[k++]=0xc1; break;
648 case 0x03b2 : out[k++]=0xe2; break;
649 case 0x0392 : out[k++]=0xc2; break;
650 case 0x03b3 : out[k++]=0xe4; break;
651 case 0x0393 : out[k++]=0xc4; break;
652 case 0x03b4 : out[k++]=0xe5; break;
653 case 0x0394 : out[k++]=0xc5; break;
654 case 0x03ad : out[k++]=0xa2; out[k++]=0xe6; break;
655 case 0x03b5 : out[k++]=0xe6; break;
656 case 0x0388 : out[k++]=0xa2; out[k++]=0xc6; break;
657 case 0x0395 : out[k++]=0xc6; break;
658 case 0x03b6 : out[k++]=0xe9; break;
659 case 0x0396 : out[k++]=0xc9; break;
660 case 0x03ae : out[k++]=0xa2; out[k++]=0xea; break;
661 case 0x03b7 : out[k++]=0xea; break;
662 case 0x0389 : out[k++]=0xa2; out[k++]=0xca; break;
663 case 0x0397 : out[k++]=0xca; break;
664 case 0x03b8 : out[k++]=0xeb; break;
665 case 0x0398 : out[k++]=0xcb; break;
666 case 0x0390 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xec; break;
667 case 0x03af : out[k++]=0xa2; out[k++]=0xec; break;
668 case 0x03ca : out[k++]=0xa3; out[k++]=0xec; break;
669 case 0x03b9 : out[k++]=0xec; break;
670 case 0x038a : out[k++]=0xa2; out[k++]=0xcc; break;
671 case 0x03aa : out[k++]=0xa3; out[k++]=0xcc; break;
672 case 0x0399 : out[k++]=0xcc; break;
673 case 0x03ba : out[k++]=0xed; break;
674 case 0x039a : out[k++]=0xcd; break;
675 case 0x03bb : out[k++]=0xee; break;
676 case 0x039b : out[k++]=0xce; break;
677 case 0x03bc : out[k++]=0xef; break;
678 case 0x039c : out[k++]=0xcf; break;
679 case 0x03bd : out[k++]=0xf0; break;
680 case 0x039d : out[k++]=0xd0; break;
681 case 0x03be : out[k++]=0xf1; break;
682 case 0x039e : out[k++]=0xd1; break;
683 case 0x03cc : out[k++]=0xa2; out[k++]=0xf2; break;
684 case 0x03bf : out[k++]=0xf2; break;
685 case 0x038c : out[k++]=0xa2; out[k++]=0xd2; break;
686 case 0x039f : out[k++]=0xd2; break;
687 case 0x03c0 : out[k++]=0xf3; break;
688 case 0x03a0 : out[k++]=0xd3; break;
689 case 0x03c1 : out[k++]=0xf5; break;
690 case 0x03a1 : out[k++]=0xd5; break;
691 case 0x03c2 : out[k++]=0xf7; break;
692 case 0x03c3 : out[k++]=0xf6; break;
693 case 0x03a3 : out[k++]=0xd6; break;
694 case 0x03c4 : out[k++]=0xf8; break;
695 case 0x03a4 : out[k++]=0xd8; break;
696 case 0x03b0 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xf9; break;
697 case 0x03cd : out[k++]=0xa2; out[k++]=0xf9; break;
698 case 0x03cb : out[k++]=0xa3; out[k++]=0xf9; break;
699 case 0x03c5 : out[k++]=0xf9; break;
700 case 0x038e : out[k++]=0xa2; out[k++]=0xd9; break;
701 case 0x03ab : out[k++]=0xa3; out[k++]=0xd9; break;
702 case 0x03a5 : out[k++]=0xd9; break;
703 case 0x03c6 : out[k++]=0xfa; break;
704 case 0x03a6 : out[k++]=0xda; break;
705 case 0x03c7 : out[k++]=0xfb; break;
706 case 0x03a7 : out[k++]=0xdb; break;
707 case 0x03c8 : out[k++]=0xfc; break;
708 case 0x03a8 : out[k++]=0xdc; break;
709 case 0x03ce : out[k++]=0xa2; out[k++]=0xfd; break;
710 case 0x03c9 : out[k++]=0xfd; break;
711 case 0x038f : out[k++]=0xa2; out[k++]=0xdd; break;
712 case 0x03a9 : out[k++]=0xdd; break;
716 cd->my_errno = YAZ_ICONV_EILSEQ;
727 static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
728 size_t inbytesleft, size_t *no_read)
736 while (inbytesleft > 0)
742 else if (*inp == 0x9e)
746 else if (*inp == 0x9f)
756 if (inbytesleft == 0)
758 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
986 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
987 char **outbuf, size_t *outbytesleft)
990 unsigned char *out = (unsigned char*) *outbuf;
991 if (*outbytesleft < 3)
993 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
998 case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
999 case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
1000 case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
1001 case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
1002 case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
1003 case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
1004 case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
1005 case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
1006 case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
1007 case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
1008 case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
1009 case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
1010 case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
1011 case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
1012 case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
1013 case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
1014 case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
1015 case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
1016 case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
1017 case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
1018 case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
1019 case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
1020 case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
1021 case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
1022 case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
1023 case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
1024 case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
1025 case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
1026 case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
1027 case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
1028 case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
1029 case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
1030 case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
1031 case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
1032 case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
1033 case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
1034 case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
1035 case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
1036 case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
1037 case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
1038 case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
1039 case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
1040 case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
1041 case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
1042 case 0x03b1 : out[k++]=0x81; break;
1043 case 0x03b2 : out[k++]=0x82; break;
1044 case 0x03b3 : out[k++]=0x83; break;
1045 case 0x03b4 : out[k++]=0x84; break;
1046 case 0x03b5 : out[k++]=0x85; break;
1047 case 0x03b6 : out[k++]=0x86; break;
1048 case 0x03b7 : out[k++]=0x87; break;
1049 case 0x03b8 : out[k++]=0x88; break;
1050 case 0x03b9 : out[k++]=0x89; break;
1051 case 0x03ba : out[k++]=0x8a; break;
1052 case 0x03bb : out[k++]=0x8b; break;
1053 case 0x03bc : out[k++]=0x8c; break;
1054 case 0x03bd : out[k++]=0x8d; break;
1055 case 0x03be : out[k++]=0x8e; break;
1056 case 0x03bf : out[k++]=0x8f; break;
1057 case 0x03c0 : out[k++]=0x90; break;
1058 case 0x03c1 : out[k++]=0x91; break;
1059 case 0x03c2 : out[k++]=0x92; break;
1060 case 0x03c3 : out[k++]=0x93; break;
1061 case 0x03c4 : out[k++]=0x94; break;
1062 case 0x03c5 : out[k++]=0x95; break;
1063 case 0x03c6 : out[k++]=0x96; break;
1064 case 0x03c7 : out[k++]=0x96; break;
1065 case 0x03c8 : out[k++]=0x98; break;
1066 case 0x03c9 : out[k++]=0x99; break;
1070 cd->my_errno = YAZ_ICONV_EILSEQ;
1082 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
1083 size_t inbytesleft, size_t *no_read,
1086 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
1087 size_t inbytesleft, size_t *no_read)
1090 if (cd->comb_offset < cd->comb_size)
1092 *no_read = cd->comb_no_read[cd->comb_offset];
1093 x = cd->comb_x[cd->comb_offset];
1095 /* special case for double-diacritic combining characters,
1096 INVERTED BREVE and DOUBLE TILDE.
1097 We'll increment the no_read counter by 1, since we want to skip over
1098 the processing of the closing ligature character
1100 /* this code is no longer necessary.. our handlers code in
1101 yaz_marc8_?_conv (generated by charconv.tcl) now returns
1102 0 and no_read=1 when a sequence does not match the input.
1103 The SECOND HALFs in codetables.xml produces a non-existant
1104 entry in the conversion trie.. Hence when met, the input byte is
1105 skipped as it should (in yaz_iconv)
1108 if (x == 0x0361 || x == 0x0360)
1115 cd->comb_offset = 0;
1116 for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
1120 if (inbytesleft == 0 && cd->comb_size)
1122 cd->my_errno = YAZ_ICONV_EINVAL;
1127 x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
1130 cd->comb_x[cd->comb_size] = x;
1131 cd->comb_no_read[cd->comb_size] = *no_read;
1133 inbytesleft = inbytesleft - *no_read;
1138 static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
1139 size_t inbytesleft, size_t *no_read)
1141 unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
1142 if (x && cd->comb_size == 1)
1144 /* For MARC8s we try to get a Latin-1 page code out of it */
1146 for (i = 0; latin1_comb[i].x1; i++)
1147 if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
1149 *no_read += cd->comb_no_read[0];
1151 x = latin1_comb[i].y;
1158 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
1159 size_t inbytesleft, size_t *no_read,
1163 while(inbytesleft >= 1 && inp[0] == 27)
1165 size_t inbytesleft0 = inbytesleft;
1168 while(inbytesleft > 0 && strchr("(,$!)-", *inp))
1173 if (inbytesleft <= 0)
1176 cd->my_errno = YAZ_ICONV_EINVAL;
1179 cd->marc8_esc_mode = *inp++;
1181 (*no_read) += inbytesleft0 - inbytesleft;
1183 if (inbytesleft <= 0)
1188 size_t no_read_sub = 0;
1191 switch(cd->marc8_esc_mode)
1193 case 'B': /* Basic ASCII */
1194 case 's': /* ASCII */
1195 case 'E': /* ANSEL */
1196 x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb);
1200 x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb);
1203 case 'g': /* Greek */
1204 x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb);
1206 case 'b': /* Subscripts */
1207 x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb);
1209 case 'p': /* Superscripts */
1210 x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb);
1212 case '2': /* Basic Hebrew */
1213 x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb);
1215 case 'N': /* Basic Cyrillic */
1216 x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1218 case 'Q': /* Extended Cyrillic */
1219 x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb);
1221 case '3': /* Basic Arabic */
1222 x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb);
1224 case '4': /* Extended Arabic */
1225 x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb);
1227 case 'S': /* Greek */
1228 x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb);
1230 case '1': /* Chinese, Japanese, Korean (EACC) */
1231 x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb);
1235 cd->my_errno = YAZ_ICONV_EILSEQ;
1238 *no_read += no_read_sub;
1243 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
1244 char **outbuf, size_t *outbytesleft)
1246 return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
1249 size_t yaz_write_UTF8_char(unsigned long x,
1250 char **outbuf, size_t *outbytesleft,
1253 unsigned char *outp = (unsigned char *) *outbuf;
1255 if (x <= 0x7f && *outbytesleft >= 1)
1257 *outp++ = (unsigned char) x;
1260 else if (x <= 0x7ff && *outbytesleft >= 2)
1262 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
1263 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1264 (*outbytesleft) -= 2;
1266 else if (x <= 0xffff && *outbytesleft >= 3)
1268 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
1269 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1270 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1271 (*outbytesleft) -= 3;
1273 else if (x <= 0x1fffff && *outbytesleft >= 4)
1275 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
1276 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1277 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1278 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1279 (*outbytesleft) -= 4;
1281 else if (x <= 0x3ffffff && *outbytesleft >= 5)
1283 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
1284 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1285 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1286 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1287 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1288 (*outbytesleft) -= 5;
1290 else if (*outbytesleft >= 6)
1292 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
1293 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
1294 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1295 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1296 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1297 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1298 (*outbytesleft) -= 6;
1302 *error = YAZ_ICONV_E2BIG; /* not room for output */
1303 return (size_t)(-1);
1305 *outbuf = (char *) outp;
1309 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
1310 char **outbuf, size_t *outbytesleft)
1312 /* list of two char unicode sequence that, when combined, are
1313 equivalent to single unicode chars that can be represented in
1315 Regular iconv on Linux at least does not seem to convert these,
1316 but since MARC-8 to UTF-8 generates these composed sequence
1317 we get a better chance of a successful MARC-8 -> ISO-8859-1
1319 unsigned char *outp = (unsigned char *) *outbuf;
1321 if (cd->compose_char)
1324 for (i = 0; latin1_comb[i].x1; i++)
1325 if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
1327 x = latin1_comb[i].y;
1330 if (*outbytesleft < 1)
1331 { /* no room. Retain compose_char and bail out */
1332 cd->my_errno = YAZ_ICONV_E2BIG;
1333 return (size_t)(-1);
1335 if (!latin1_comb[i].x1)
1336 { /* not found. Just write compose_char */
1337 *outp++ = (unsigned char) cd->compose_char;
1339 *outbuf = (char *) outp;
1341 /* compose_char used so reset it. x now holds current char */
1342 cd->compose_char = 0;
1345 if (x > 32 && x < 127 && cd->compose_char == 0)
1347 cd->compose_char = x;
1350 else if (x > 255 || x < 1)
1352 cd->my_errno = YAZ_ICONV_EILSEQ;
1355 else if (*outbytesleft < 1)
1357 cd->my_errno = YAZ_ICONV_E2BIG;
1358 return (size_t)(-1);
1360 *outp++ = (unsigned char) x;
1362 *outbuf = (char *) outp;
1366 static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
1367 char **outbuf, size_t *outbytesleft)
1369 if (cd->compose_char)
1371 unsigned char *outp = (unsigned char *) *outbuf;
1372 if (*outbytesleft < 1)
1374 cd->my_errno = YAZ_ICONV_E2BIG;
1375 return (size_t)(-1);
1377 *outp++ = (unsigned char) cd->compose_char;
1379 *outbuf = (char *) outp;
1380 cd->compose_char = 0;
1385 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
1386 char **outbuf, size_t *outbytesleft)
1388 unsigned char *outp = (unsigned char *) *outbuf;
1389 if (*outbytesleft >= 4)
1391 *outp++ = (unsigned char) (x>>24);
1392 *outp++ = (unsigned char) (x>>16);
1393 *outp++ = (unsigned char) (x>>8);
1394 *outp++ = (unsigned char) x;
1395 (*outbytesleft) -= 4;
1399 cd->my_errno = YAZ_ICONV_E2BIG;
1400 return (size_t)(-1);
1402 *outbuf = (char *) outp;
1406 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
1407 char **outbuf, size_t *outbytesleft)
1409 unsigned char *outp = (unsigned char *) *outbuf;
1410 if (*outbytesleft >= 4)
1412 *outp++ = (unsigned char) x;
1413 *outp++ = (unsigned char) (x>>8);
1414 *outp++ = (unsigned char) (x>>16);
1415 *outp++ = (unsigned char) (x>>24);
1416 (*outbytesleft) -= 4;
1420 cd->my_errno = YAZ_ICONV_E2BIG;
1421 return (size_t)(-1);
1423 *outbuf = (char *) outp;
1427 static unsigned long lookup_marc8(yaz_iconv_t cd,
1428 unsigned long x, int *comb,
1429 const char **page_chr)
1432 char *utf8_outbuf = utf8_buf;
1433 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
1435 r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
1436 if (r == (size_t)(-1))
1438 cd->my_errno = YAZ_ICONV_EILSEQ;
1444 size_t inbytesleft, no_read_sub = 0;
1447 *utf8_outbuf = '\0';
1448 inp = (unsigned char *) utf8_buf;
1449 inbytesleft = strlen(utf8_buf);
1451 x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb);
1454 *page_chr = "\033(B";
1457 x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb);
1460 *page_chr = "\033(B";
1463 x = yaz_marc8r_67_conv(inp, inbytesleft, &no_read_sub, comb);
1466 *page_chr = "\033g";
1469 x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb);
1472 *page_chr = "\033b";
1475 x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb);
1478 *page_chr = "\033p";
1481 x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb);
1484 *page_chr = "\033(2";
1487 x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1490 *page_chr = "\033(N";
1493 x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb);
1496 *page_chr = "\033(Q";
1499 x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb);
1502 *page_chr = "\033(3";
1505 x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb);
1508 *page_chr = "\033(4";
1511 x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb);
1514 *page_chr = "\033(S";
1517 x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb);
1520 *page_chr = "\033$1";
1523 cd->my_errno = YAZ_ICONV_EILSEQ;
1528 static size_t flush_combos(yaz_iconv_t cd,
1529 char **outbuf, size_t *outbytesleft)
1531 unsigned long y = cd->write_marc8_last;
1534 size_t i, out_no = 0;
1539 byte = (unsigned char )((y>>16) & 0xff);
1541 out_buf[out_no++] = byte;
1542 byte = (unsigned char)((y>>8) & 0xff);
1544 out_buf[out_no++] = byte;
1545 byte = (unsigned char )(y & 0xff);
1547 out_buf[out_no++] = byte;
1549 if (out_no + cd->write_marc8_comb_no + 1 > *outbytesleft)
1551 cd->my_errno = YAZ_ICONV_E2BIG;
1552 return (size_t) (-1);
1555 for (i = 0; i < cd->write_marc8_comb_no; i++)
1557 /* all MARC-8 combined characters are simple bytes */
1558 byte = (unsigned char )(cd->write_marc8_comb_ch[i]);
1559 *(*outbuf)++ = byte;
1562 memcpy(*outbuf, out_buf, out_no);
1564 (*outbytesleft) -= out_no;
1565 if (cd->write_marc8_second_half_char)
1567 *(*outbuf)++ = cd->write_marc8_second_half_char;
1571 cd->write_marc8_last = 0;
1572 cd->write_marc8_comb_no = 0;
1573 cd->write_marc8_second_half_char = 0;
1577 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
1578 char **outbuf, size_t *outbytesleft,
1579 const char *page_chr)
1581 const char *old_page_chr = cd->write_marc8_page_chr;
1582 if (strcmp(page_chr, old_page_chr))
1585 const char *page_out = page_chr;
1587 if (*outbytesleft < 8)
1589 cd->my_errno = YAZ_ICONV_E2BIG;
1591 return (size_t) (-1);
1593 cd->write_marc8_page_chr = page_chr;
1595 if (!strcmp(old_page_chr, "\033p")
1596 || !strcmp(old_page_chr, "\033g")
1597 || !strcmp(old_page_chr, "\033b"))
1599 /* Technique 1 leave */
1601 if (strcmp(page_chr, "\033(B")) /* Not going ASCII page? */
1603 /* Must leave script + enter new page */
1604 plen = strlen(page_out);
1605 memcpy(*outbuf, page_out, plen);
1607 (*outbytesleft) -= plen;
1608 page_out = page_chr;
1611 plen = strlen(page_out);
1612 memcpy(*outbuf, page_out, plen);
1614 (*outbytesleft) -= plen;
1620 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
1621 char **outbuf, size_t *outbytesleft)
1624 const char *page_chr = 0;
1625 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
1628 return (size_t) (-1);
1633 cd->write_marc8_second_half_char = 0xEC;
1634 else if (x == 0x0360)
1635 cd->write_marc8_second_half_char = 0xFB;
1637 if (cd->write_marc8_comb_no < 6)
1638 cd->write_marc8_comb_ch[cd->write_marc8_comb_no++] = y;
1642 size_t r = flush_combos(cd, outbuf, outbytesleft);
1646 r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, page_chr);
1649 cd->write_marc8_last = y;
1654 static size_t yaz_flush_marc8(yaz_iconv_t cd,
1655 char **outbuf, size_t *outbytesleft)
1657 size_t r = flush_combos(cd, outbuf, outbytesleft);
1660 return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, "\033(B");
1663 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
1664 char **outbuf, size_t *outbytesleft)
1667 for (i = 0; latin1_comb[i].x1; i++)
1669 if (x == latin1_comb[i].y)
1672 /* save the output pointers .. */
1673 char *outbuf0 = *outbuf;
1674 size_t outbytesleft0 = *outbytesleft;
1675 int last_ch = cd->write_marc8_last;
1677 r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
1678 outbuf, outbytesleft);
1681 r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
1682 outbuf, outbytesleft);
1683 if (r && cd->my_errno == YAZ_ICONV_E2BIG)
1685 /* not enough room. reset output to original values */
1687 *outbytesleft = outbytesleft0;
1688 cd->write_marc8_last = last_ch;
1693 return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
1698 static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
1699 char **outbuf, size_t *outbytesleft)
1701 unsigned char *outp = (unsigned char *) *outbuf;
1703 if (*outbytesleft >= sizeof(wchar_t))
1706 memcpy(outp, &wch, sizeof(wch));
1707 outp += sizeof(wch);
1708 (*outbytesleft) -= sizeof(wch);
1712 cd->my_errno = YAZ_ICONV_E2BIG;
1713 return (size_t)(-1);
1715 *outbuf = (char *) outp;
1720 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
1722 return cd->read_handle && cd->write_handle;
1725 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
1727 yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
1729 cd->write_handle = 0;
1730 cd->read_handle = 0;
1731 cd->init_handle = 0;
1732 cd->flush_handle = 0;
1733 cd->my_errno = YAZ_ICONV_UNKNOWN;
1735 /* a useful hack: if fromcode has leading @,
1736 the library not use YAZ's own conversions .. */
1737 if (fromcode[0] == '@')
1741 if (!yaz_matchstr(fromcode, "UTF8"))
1743 cd->read_handle = yaz_read_UTF8;
1744 cd->init_handle = yaz_init_UTF8;
1746 else if (!yaz_matchstr(fromcode, "ISO88591"))
1747 cd->read_handle = yaz_read_ISO8859_1;
1748 else if (!yaz_matchstr(fromcode, "UCS4"))
1749 cd->read_handle = yaz_read_UCS4;
1750 else if (!yaz_matchstr(fromcode, "UCS4LE"))
1751 cd->read_handle = yaz_read_UCS4LE;
1752 else if (!yaz_matchstr(fromcode, "MARC8"))
1753 cd->read_handle = yaz_read_marc8;
1754 else if (!yaz_matchstr(fromcode, "MARC8s"))
1755 cd->read_handle = yaz_read_marc8s;
1756 else if (!yaz_matchstr(fromcode, "advancegreek"))
1757 cd->read_handle = yaz_read_advancegreek;
1758 else if (!yaz_matchstr(fromcode, "iso54281984"))
1759 cd->read_handle = yaz_read_iso5428_1984;
1760 else if (!yaz_matchstr(fromcode, "iso5428:1984"))
1761 cd->read_handle = yaz_read_iso5428_1984;
1763 else if (!yaz_matchstr(fromcode, "WCHAR_T"))
1764 cd->read_handle = yaz_read_wchar_t;
1767 if (!yaz_matchstr(tocode, "UTF8"))
1768 cd->write_handle = yaz_write_UTF8;
1769 else if (!yaz_matchstr(tocode, "ISO88591"))
1771 cd->write_handle = yaz_write_ISO8859_1;
1772 cd->flush_handle = yaz_flush_ISO8859_1;
1774 else if (!yaz_matchstr (tocode, "UCS4"))
1775 cd->write_handle = yaz_write_UCS4;
1776 else if (!yaz_matchstr(tocode, "UCS4LE"))
1777 cd->write_handle = yaz_write_UCS4LE;
1778 else if (!yaz_matchstr(tocode, "MARC8"))
1780 cd->write_handle = yaz_write_marc8;
1781 cd->flush_handle = yaz_flush_marc8;
1783 else if (!yaz_matchstr(tocode, "MARC8s"))
1785 cd->write_handle = yaz_write_marc8;
1786 cd->flush_handle = yaz_flush_marc8;
1788 else if (!yaz_matchstr(tocode, "advancegreek"))
1790 cd->write_handle = yaz_write_advancegreek;
1792 else if (!yaz_matchstr(tocode, "iso54281984"))
1794 cd->write_handle = yaz_write_iso5428_1984;
1796 else if (!yaz_matchstr(tocode, "iso5428:1984"))
1798 cd->write_handle = yaz_write_iso5428_1984;
1801 else if (!yaz_matchstr(tocode, "WCHAR_T"))
1802 cd->write_handle = yaz_write_wchar_t;
1807 if (!cd->read_handle || !cd->write_handle)
1809 cd->iconv_cd = iconv_open (tocode, fromcode);
1810 if (cd->iconv_cd == (iconv_t) (-1))
1817 if (!cd->read_handle || !cd->write_handle)
1827 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
1828 char **outbuf, size_t *outbytesleft)
1837 iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
1838 if (r == (size_t)(-1))
1840 switch (yaz_errno())
1843 cd->my_errno = YAZ_ICONV_E2BIG;
1846 cd->my_errno = YAZ_ICONV_EINVAL;
1849 cd->my_errno = YAZ_ICONV_EILSEQ;
1852 cd->my_errno = YAZ_ICONV_UNKNOWN;
1864 cd->my_errno = YAZ_ICONV_UNKNOWN;
1865 cd->marc8_esc_mode = 'B';
1867 cd->comb_offset = cd->comb_size = 0;
1868 cd->compose_char = 0;
1870 cd->write_marc8_comb_no = 0;
1871 cd->write_marc8_second_half_char = 0;
1872 cd->write_marc8_last = 0;
1873 cd->write_marc8_page_chr = "\033(B";
1881 if (cd->init_handle && inbuf && *inbuf)
1884 size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
1885 *inbytesleft, &no_read);
1888 if (cd->my_errno == YAZ_ICONV_EINVAL)
1893 *inbytesleft -= no_read;
1899 if (!inbuf || !*inbuf)
1901 if (outbuf && *outbuf)
1904 r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
1905 if (cd->flush_handle)
1906 r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
1921 no_read = cd->no_read_x;
1925 if (*inbytesleft == 0)
1927 r = *inbuf - inbuf0;
1930 x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
1940 r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
1943 /* unable to write it. save it because read_handle cannot
1945 if (cd->my_errno == YAZ_ICONV_E2BIG)
1948 cd->no_read_x = no_read;
1954 *inbytesleft -= no_read;
1955 (*inbuf) += no_read;
1960 int yaz_iconv_error (yaz_iconv_t cd)
1962 return cd->my_errno;
1965 int yaz_iconv_close (yaz_iconv_t cd)
1969 iconv_close (cd->iconv_cd);
1978 * indent-tabs-mode: nil
1980 * vim: shiftwidth=4 tabstop=8 expandtab