2 * Copyright (C) 1995-2007, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: siconv.c,v 1.38 2007-05-03 13:14:41 adam Exp $
9 * \brief Implements simple ICONV
11 * This implements an interface similar to that of iconv and
12 * is used by YAZ to interface with iconv (if present).
13 * For systems where iconv is not present, this layer
14 * provides a few important conversions: UTF-8, MARC-8, Latin-1.
17 * http://www.loc.gov/marc/specifications/speccharmarc8.html
37 #include <yaz/yaz-util.h>
39 unsigned long yaz_marc8_1_conv(unsigned char *inp, size_t inbytesleft,
40 size_t *no_read, int *combining);
41 unsigned long yaz_marc8_2_conv(unsigned char *inp, size_t inbytesleft,
42 size_t *no_read, int *combining);
43 unsigned long yaz_marc8_3_conv(unsigned char *inp, size_t inbytesleft,
44 size_t *no_read, int *combining);
45 unsigned long yaz_marc8_4_conv(unsigned char *inp, size_t inbytesleft,
46 size_t *no_read, int *combining);
47 unsigned long yaz_marc8_5_conv(unsigned char *inp, size_t inbytesleft,
48 size_t *no_read, int *combining);
49 unsigned long yaz_marc8_6_conv(unsigned char *inp, size_t inbytesleft,
50 size_t *no_read, int *combining);
51 unsigned long yaz_marc8_7_conv(unsigned char *inp, size_t inbytesleft,
52 size_t *no_read, int *combining);
53 unsigned long yaz_marc8_8_conv(unsigned char *inp, size_t inbytesleft,
54 size_t *no_read, int *combining);
55 unsigned long yaz_marc8_9_conv(unsigned char *inp, size_t inbytesleft,
56 size_t *no_read, int *combining);
59 unsigned long yaz_marc8r_1_conv(unsigned char *inp, size_t inbytesleft,
60 size_t *no_read, int *combining);
61 unsigned long yaz_marc8r_2_conv(unsigned char *inp, size_t inbytesleft,
62 size_t *no_read, int *combining);
63 unsigned long yaz_marc8r_3_conv(unsigned char *inp, size_t inbytesleft,
64 size_t *no_read, int *combining);
65 unsigned long yaz_marc8r_4_conv(unsigned char *inp, size_t inbytesleft,
66 size_t *no_read, int *combining);
67 unsigned long yaz_marc8r_5_conv(unsigned char *inp, size_t inbytesleft,
68 size_t *no_read, int *combining);
69 unsigned long yaz_marc8r_6_conv(unsigned char *inp, size_t inbytesleft,
70 size_t *no_read, int *combining);
71 unsigned long yaz_marc8r_7_conv(unsigned char *inp, size_t inbytesleft,
72 size_t *no_read, int *combining);
73 unsigned long yaz_marc8r_8_conv(unsigned char *inp, size_t inbytesleft,
74 size_t *no_read, int *combining);
75 unsigned long yaz_marc8r_9_conv(unsigned char *inp, size_t inbytesleft,
76 size_t *no_read, int *combining);
78 struct yaz_iconv_struct {
81 size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
82 size_t inbytesleft, size_t *no_read);
83 unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
84 size_t inbytesleft, size_t *no_read);
85 size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
86 char **outbuf, size_t *outbytesleft);
87 size_t (*flush_handle)(yaz_iconv_t cd,
88 char **outbuf, size_t *outbytesleft);
93 unsigned long comb_x[8];
94 size_t comb_no_read[8];
96 unsigned long unget_x;
100 unsigned long compose_char;
102 unsigned long write_marc8_comb_ch[8];
103 size_t write_marc8_comb_no;
104 unsigned write_marc8_second_half_char;
105 unsigned long write_marc8_last;
106 const char *write_marc8_page_chr;
110 unsigned long x1, x2;
113 { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
114 { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
115 { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
116 { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
117 { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
118 { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
119 /* no need for 0xc6 LATIN CAPITAL LETTER AE */
120 { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
121 { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
122 { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
123 { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
124 { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
125 { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
126 { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
127 { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
128 { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
129 { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
130 { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
131 { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
132 { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
133 { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
134 { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
135 /* omitted: 0xd7 MULTIPLICATION SIGN */
136 /* omitted: 0xd8 LATIN CAPITAL LETTER O WITH STROKE */
137 { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
138 { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
139 { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
140 { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
141 { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
142 /* omitted: 0xde LATIN CAPITAL LETTER THORN */
143 /* omitted: 0xdf LATIN SMALL LETTER SHARP S */
144 { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
145 { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
146 { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
147 { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
148 { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
149 { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
150 /* omitted: 0xe6 LATIN SMALL LETTER AE */
151 { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
152 { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
153 { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
154 { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
155 { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
156 { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
157 { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
158 { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
159 { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
160 /* omitted: 0xf0 LATIN SMALL LETTER ETH */
161 { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
162 { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
163 { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
164 { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
165 { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
166 { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
167 /* omitted: 0xf7 DIVISION SIGN */
168 /* omitted: 0xf8 LATIN SMALL LETTER O WITH STROKE */
169 { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
170 { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
171 { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
172 { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
173 { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
174 /* omitted: 0xfe LATIN SMALL LETTER THORN */
175 { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
180 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
181 size_t inbytesleft, size_t *no_read)
183 unsigned long x = inp[0];
189 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
190 size_t inbytesleft, size_t *no_read)
199 cd->my_errno = YAZ_ICONV_EINVAL;
202 if (inp[1] != 0xbb && inp[2] == 0xbf)
209 unsigned long yaz_read_UTF8_char(unsigned char *inp,
210 size_t inbytesleft, size_t *no_read,
220 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
223 *error = YAZ_ICONV_EILSEQ;
225 else if (inp[0] <= 0xdf && inbytesleft >= 2)
227 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
233 *error = YAZ_ICONV_EILSEQ;
236 else if (inp[0] <= 0xef && inbytesleft >= 3)
238 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
245 *error = YAZ_ICONV_EILSEQ;
248 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
250 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
251 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
257 *error = YAZ_ICONV_EILSEQ;
260 else if (inp[0] <= 0xfb && inbytesleft >= 5)
262 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
263 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
270 *error = YAZ_ICONV_EILSEQ;
273 else if (inp[0] <= 0xfd && inbytesleft >= 6)
275 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
276 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
277 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
283 *error = YAZ_ICONV_EILSEQ;
289 *error = YAZ_ICONV_EINVAL;
294 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
295 size_t inbytesleft, size_t *no_read)
297 return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
300 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
301 size_t inbytesleft, size_t *no_read)
307 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
312 x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
318 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
319 size_t inbytesleft, size_t *no_read)
325 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
330 x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
337 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
338 size_t inbytesleft, size_t *no_read)
342 if (inbytesleft < sizeof(wchar_t))
344 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
350 memcpy (&wch, inp, sizeof(wch));
352 *no_read = sizeof(wch);
358 static unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
359 size_t inbytesleft, size_t *no_read)
366 while (inbytesleft > 0)
372 else if (*inp == 0xa3)
382 if (inbytesleft == 0)
384 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
389 case 0xe1: // alpha small
395 case 0xc1: // alpha capital
402 case 0xe3: // Beta small
405 case 0xc2: // Beta capital
409 case 0xe4: // Gamma small
412 case 0xc4: // Gamma capital
416 case 0xe5: // Delta small
419 case 0xc5: // Delta capital
422 case 0xe6: // epsilon small
428 case 0xc6: // epsilon capital
434 case 0xe9: // Zeta small
437 case 0xc9: // Zeta capital
440 case 0xea: // Eta small
446 case 0xca: // Eta capital
452 case 0xeb: // Theta small
455 case 0xcb: // Theta capital
458 case 0xec: // Iota small
470 case 0xcc: // Iota capital
479 case 0xed: // Kappa small
482 case 0xcd: // Kappa capital
485 case 0xee: // Lambda small
488 case 0xce: // Lambda capital
491 case 0xef: // Mu small
494 case 0xcf: // Mu capital
497 case 0xf0: // Nu small
500 case 0xd0: // Nu capital
503 case 0xf1: // Xi small
506 case 0xd1: // Xi capital
509 case 0xf2: // Omicron small
515 case 0xd2: // Omicron capital
521 case 0xf3: // Pi small
524 case 0xd3: // Pi capital
527 case 0xf5: // Rho small
530 case 0xd5: // Rho capital
533 case 0xf7: // Sigma small (end of words)
536 case 0xf6: // Sigma small
539 case 0xd6: // Sigma capital
542 case 0xf8: // Tau small
545 case 0xd8: // Tau capital
548 case 0xf9: // Upsilon small
560 case 0xd9: // Upsilon capital
569 case 0xfa: // Phi small
572 case 0xda: // Phi capital
575 case 0xfb: // Chi small
578 case 0xdb: // Chi capital
581 case 0xfc: // Psi small
584 case 0xdc: // Psi capital
587 case 0xfd: // Omega small
593 case 0xdd: // Omega capital
608 static size_t yaz_write_iso5428_1984(yaz_iconv_t cd, unsigned long x,
609 char **outbuf, size_t *outbytesleft)
612 unsigned char *out = (unsigned char*) *outbuf;
613 if (*outbytesleft < 3)
615 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
620 case 0x03ac : out[k++]=0xa2; out[k++]=0xe1; break;
621 case 0x03b1 : out[k++]=0xe1; break;
622 case 0x0386 : out[k++]=0xa2; out[k++]=0xc1; break;
623 case 0x0391 : out[k++]=0xc1; break;
624 case 0x03b2 : out[k++]=0xe3; break;
625 case 0x0392 : out[k++]=0xc2; break;
626 case 0x03b3 : out[k++]=0xe4; break;
627 case 0x0393 : out[k++]=0xc4; break;
628 case 0x03b4 : out[k++]=0xe5; break;
629 case 0x0394 : out[k++]=0xc5; break;
630 case 0x03ad : out[k++]=0xa2; out[k++]=0xe6; break;
631 case 0x03b5 : out[k++]=0xe6; break;
632 case 0x0388 : out[k++]=0xa2; out[k++]=0xc6; break;
633 case 0x0395 : out[k++]=0xc6; break;
634 case 0x03b6 : out[k++]=0xe9; break;
635 case 0x0396 : out[k++]=0xc9; break;
636 case 0x03ae : out[k++]=0xa2; out[k++]=0xea; break;
637 case 0x03b7 : out[k++]=0xea; break;
638 case 0x0389 : out[k++]=0xa2; out[k++]=0xca; break;
639 case 0x0397 : out[k++]=0xca; break;
640 case 0x03b8 : out[k++]=0xeb; break;
641 case 0x0398 : out[k++]=0xcb; break;
642 case 0x0390 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xec; break;
643 case 0x03af : out[k++]=0xa2; out[k++]=0xec; break;
644 case 0x03ca : out[k++]=0xa3; out[k++]=0xec; break;
645 case 0x03b9 : out[k++]=0xec; break;
646 case 0x038a : out[k++]=0xa2; out[k++]=0xcc; break;
647 case 0x03aa : out[k++]=0xa3; out[k++]=0xcc; break;
648 case 0x0399 : out[k++]=0xcc; break;
649 case 0x03ba : out[k++]=0xed; break;
650 case 0x039a : out[k++]=0xcd; break;
651 case 0x03bb : out[k++]=0xee; break;
652 case 0x039b : out[k++]=0xce; break;
653 case 0x03bc : out[k++]=0xef; break;
654 case 0x039c : out[k++]=0xcf; break;
655 case 0x03bd : out[k++]=0xf0; break;
656 case 0x039d : out[k++]=0xd0; break;
657 case 0x03be : out[k++]=0xf1; break;
658 case 0x039e : out[k++]=0xd1; break;
659 case 0x03cc : out[k++]=0xa2; out[k++]=0xf2; break;
660 case 0x03bf : out[k++]=0xf2; break;
661 case 0x038c : out[k++]=0xa2; out[k++]=0xd2; break;
662 case 0x039f : out[k++]=0xd2; break;
663 case 0x03c0 : out[k++]=0xf3; break;
664 case 0x03a0 : out[k++]=0xd3; break;
665 case 0x03c1 : out[k++]=0xf5; break;
666 case 0x03a1 : out[k++]=0xd5; break;
667 case 0x03c2 : out[k++]=0xf7; break;
668 case 0x03c3 : out[k++]=0xf6; break;
669 case 0x03a3 : out[k++]=0xd6; break;
670 case 0x03c4 : out[k++]=0xf8; break;
671 case 0x03a4 : out[k++]=0xd8; break;
672 case 0x03b0 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xf9; break;
673 case 0x03cd : out[k++]=0xa2; out[k++]=0xf9; break;
674 case 0x03cb : out[k++]=0xa3; out[k++]=0xf9; break;
675 case 0x03c5 : out[k++]=0xf9; break;
676 case 0x038e : out[k++]=0xa2; out[k++]=0xd9; break;
677 case 0x03ab : out[k++]=0xa3; out[k++]=0xd9; break;
678 case 0x03a5 : out[k++]=0xd9; break;
679 case 0x03c6 : out[k++]=0xfa; break;
680 case 0x03a6 : out[k++]=0xda; break;
681 case 0x03c7 : out[k++]=0xfb; break;
682 case 0x03a7 : out[k++]=0xdb; break;
683 case 0x03c8 : out[k++]=0xfc; break;
684 case 0x03a8 : out[k++]=0xdc; break;
685 case 0x03ce : out[k++]=0xa2; out[k++]=0xfd; break;
686 case 0x03c9 : out[k++]=0xfd; break;
687 case 0x038f : out[k++]=0xa2; out[k++]=0xdd; break;
688 case 0x03a9 : out[k++]=0xdd; break;
692 cd->my_errno = YAZ_ICONV_EILSEQ;
703 static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
704 size_t inbytesleft, size_t *no_read)
712 while (inbytesleft > 0)
718 else if (*inp == 0x9e)
722 else if (*inp == 0x9f)
732 if (inbytesleft == 0)
734 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
962 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
963 char **outbuf, size_t *outbytesleft)
966 unsigned char *out = (unsigned char*) *outbuf;
967 if (*outbytesleft < 3)
969 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
974 case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
975 case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
976 case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
977 case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
978 case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
979 case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
980 case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
981 case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
982 case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
983 case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
984 case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
985 case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
986 case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
987 case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
988 case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
989 case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
990 case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
991 case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
992 case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
993 case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
994 case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
995 case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
996 case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
997 case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
998 case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
999 case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
1000 case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
1001 case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
1002 case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
1003 case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
1004 case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
1005 case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
1006 case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
1007 case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
1008 case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
1009 case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
1010 case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
1011 case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
1012 case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
1013 case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
1014 case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
1015 case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
1016 case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
1017 case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
1018 case 0x03b1 : out[k++]=0x81; break;
1019 case 0x03b2 : out[k++]=0x82; break;
1020 case 0x03b3 : out[k++]=0x83; break;
1021 case 0x03b4 : out[k++]=0x84; break;
1022 case 0x03b5 : out[k++]=0x85; break;
1023 case 0x03b6 : out[k++]=0x86; break;
1024 case 0x03b7 : out[k++]=0x87; break;
1025 case 0x03b8 : out[k++]=0x88; break;
1026 case 0x03b9 : out[k++]=0x89; break;
1027 case 0x03ba : out[k++]=0x8a; break;
1028 case 0x03bb : out[k++]=0x8b; break;
1029 case 0x03bc : out[k++]=0x8c; break;
1030 case 0x03bd : out[k++]=0x8d; break;
1031 case 0x03be : out[k++]=0x8e; break;
1032 case 0x03bf : out[k++]=0x8f; break;
1033 case 0x03c0 : out[k++]=0x90; break;
1034 case 0x03c1 : out[k++]=0x91; break;
1035 case 0x03c2 : out[k++]=0x92; break;
1036 case 0x03c3 : out[k++]=0x93; break;
1037 case 0x03c4 : out[k++]=0x94; break;
1038 case 0x03c5 : out[k++]=0x95; break;
1039 case 0x03c6 : out[k++]=0x96; break;
1040 case 0x03c7 : out[k++]=0x96; break;
1041 case 0x03c8 : out[k++]=0x98; break;
1042 case 0x03c9 : out[k++]=0x99; break;
1046 cd->my_errno = YAZ_ICONV_EILSEQ;
1058 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
1059 size_t inbytesleft, size_t *no_read,
1062 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
1063 size_t inbytesleft, size_t *no_read)
1066 if (cd->comb_offset < cd->comb_size)
1068 *no_read = cd->comb_no_read[cd->comb_offset];
1069 x = cd->comb_x[cd->comb_offset];
1071 /* special case for double-diacritic combining characters,
1072 INVERTED BREVE and DOUBLE TILDE.
1073 We'll increment the no_read counter by 1, since we want to skip over
1074 the processing of the closing ligature character
1076 /* this code is no longer necessary.. our handlers code in
1077 yaz_marc8_?_conv (generated by charconv.tcl) now returns
1078 0 and no_read=1 when a sequence does not match the input.
1079 The SECOND HALFs in codetables.xml produces a non-existant
1080 entry in the conversion trie.. Hence when met, the input byte is
1081 skipped as it should (in yaz_iconv)
1084 if (x == 0x0361 || x == 0x0360)
1091 cd->comb_offset = 0;
1092 for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
1096 if (inbytesleft == 0 && cd->comb_size)
1098 cd->my_errno = YAZ_ICONV_EINVAL;
1103 x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
1106 cd->comb_x[cd->comb_size] = x;
1107 cd->comb_no_read[cd->comb_size] = *no_read;
1109 inbytesleft = inbytesleft - *no_read;
1114 static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
1115 size_t inbytesleft, size_t *no_read)
1117 unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
1118 if (x && cd->comb_size == 1)
1120 /* For MARC8s we try to get a Latin-1 page code out of it */
1122 for (i = 0; latin1_comb[i].x1; i++)
1123 if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
1125 *no_read += cd->comb_no_read[0];
1127 x = latin1_comb[i].y;
1134 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
1135 size_t inbytesleft, size_t *no_read,
1139 while(inbytesleft >= 1 && inp[0] == 27)
1141 size_t inbytesleft0 = inbytesleft;
1144 while(inbytesleft > 0 && strchr("(,$!)-", *inp))
1149 if (inbytesleft <= 0)
1152 cd->my_errno = YAZ_ICONV_EINVAL;
1155 cd->marc8_esc_mode = *inp++;
1157 (*no_read) += inbytesleft0 - inbytesleft;
1159 if (inbytesleft <= 0)
1164 size_t no_read_sub = 0;
1167 switch(cd->marc8_esc_mode)
1169 case 'B': /* Basic ASCII */
1170 case 'E': /* ANSEL */
1171 case 's': /* ASCII */
1172 x = yaz_marc8_1_conv(inp, inbytesleft, &no_read_sub, comb);
1174 case 'g': /* Greek */
1175 x = yaz_marc8_2_conv(inp, inbytesleft, &no_read_sub, comb);
1177 case 'b': /* Subscripts */
1178 x = yaz_marc8_3_conv(inp, inbytesleft, &no_read_sub, comb);
1180 case 'p': /* Superscripts */
1181 x = yaz_marc8_4_conv(inp, inbytesleft, &no_read_sub, comb);
1183 case '2': /* Basic Hebrew */
1184 x = yaz_marc8_5_conv(inp, inbytesleft, &no_read_sub, comb);
1186 case 'N': /* Basic Cyrillic */
1187 case 'Q': /* Extended Cyrillic */
1188 x = yaz_marc8_6_conv(inp, inbytesleft, &no_read_sub, comb);
1190 case '3': /* Basic Arabic */
1191 case '4': /* Extended Arabic */
1192 x = yaz_marc8_7_conv(inp, inbytesleft, &no_read_sub, comb);
1194 case 'S': /* Greek */
1195 x = yaz_marc8_8_conv(inp, inbytesleft, &no_read_sub, comb);
1197 case '1': /* Chinese, Japanese, Korean (EACC) */
1198 x = yaz_marc8_9_conv(inp, inbytesleft, &no_read_sub, comb);
1202 cd->my_errno = YAZ_ICONV_EILSEQ;
1205 *no_read += no_read_sub;
1210 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
1211 char **outbuf, size_t *outbytesleft)
1213 return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
1216 size_t yaz_write_UTF8_char(unsigned long x,
1217 char **outbuf, size_t *outbytesleft,
1220 unsigned char *outp = (unsigned char *) *outbuf;
1222 if (x <= 0x7f && *outbytesleft >= 1)
1224 *outp++ = (unsigned char) x;
1227 else if (x <= 0x7ff && *outbytesleft >= 2)
1229 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
1230 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1231 (*outbytesleft) -= 2;
1233 else if (x <= 0xffff && *outbytesleft >= 3)
1235 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
1236 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1237 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1238 (*outbytesleft) -= 3;
1240 else if (x <= 0x1fffff && *outbytesleft >= 4)
1242 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
1243 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1244 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1245 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1246 (*outbytesleft) -= 4;
1248 else if (x <= 0x3ffffff && *outbytesleft >= 5)
1250 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
1251 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1252 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1253 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1254 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1255 (*outbytesleft) -= 5;
1257 else if (*outbytesleft >= 6)
1259 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
1260 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
1261 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1262 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1263 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1264 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1265 (*outbytesleft) -= 6;
1269 *error = YAZ_ICONV_E2BIG; /* not room for output */
1270 return (size_t)(-1);
1272 *outbuf = (char *) outp;
1276 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
1277 char **outbuf, size_t *outbytesleft)
1279 /* list of two char unicode sequence that, when combined, are
1280 equivalent to single unicode chars that can be represented in
1282 Regular iconv on Linux at least does not seem to convert these,
1283 but since MARC-8 to UTF-8 generates these composed sequence
1284 we get a better chance of a successful MARC-8 -> ISO-8859-1
1286 unsigned char *outp = (unsigned char *) *outbuf;
1288 if (cd->compose_char)
1291 for (i = 0; latin1_comb[i].x1; i++)
1292 if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
1294 x = latin1_comb[i].y;
1297 if (*outbytesleft < 1)
1298 { /* no room. Retain compose_char and bail out */
1299 cd->my_errno = YAZ_ICONV_E2BIG;
1300 return (size_t)(-1);
1302 if (!latin1_comb[i].x1)
1303 { /* not found. Just write compose_char */
1304 *outp++ = (unsigned char) cd->compose_char;
1306 *outbuf = (char *) outp;
1308 /* compose_char used so reset it. x now holds current char */
1309 cd->compose_char = 0;
1312 if (x > 32 && x < 127 && cd->compose_char == 0)
1314 cd->compose_char = x;
1317 else if (x > 255 || x < 1)
1319 cd->my_errno = YAZ_ICONV_EILSEQ;
1322 else if (*outbytesleft < 1)
1324 cd->my_errno = YAZ_ICONV_E2BIG;
1325 return (size_t)(-1);
1327 *outp++ = (unsigned char) x;
1329 *outbuf = (char *) outp;
1333 static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
1334 char **outbuf, size_t *outbytesleft)
1336 if (cd->compose_char)
1338 unsigned char *outp = (unsigned char *) *outbuf;
1339 if (*outbytesleft < 1)
1341 cd->my_errno = YAZ_ICONV_E2BIG;
1342 return (size_t)(-1);
1344 *outp++ = (unsigned char) cd->compose_char;
1346 *outbuf = (char *) outp;
1347 cd->compose_char = 0;
1352 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
1353 char **outbuf, size_t *outbytesleft)
1355 unsigned char *outp = (unsigned char *) *outbuf;
1356 if (*outbytesleft >= 4)
1358 *outp++ = (unsigned char) (x>>24);
1359 *outp++ = (unsigned char) (x>>16);
1360 *outp++ = (unsigned char) (x>>8);
1361 *outp++ = (unsigned char) x;
1362 (*outbytesleft) -= 4;
1366 cd->my_errno = YAZ_ICONV_E2BIG;
1367 return (size_t)(-1);
1369 *outbuf = (char *) outp;
1373 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
1374 char **outbuf, size_t *outbytesleft)
1376 unsigned char *outp = (unsigned char *) *outbuf;
1377 if (*outbytesleft >= 4)
1379 *outp++ = (unsigned char) x;
1380 *outp++ = (unsigned char) (x>>8);
1381 *outp++ = (unsigned char) (x>>16);
1382 *outp++ = (unsigned char) (x>>24);
1383 (*outbytesleft) -= 4;
1387 cd->my_errno = YAZ_ICONV_E2BIG;
1388 return (size_t)(-1);
1390 *outbuf = (char *) outp;
1394 static unsigned long lookup_marc8(yaz_iconv_t cd,
1395 unsigned long x, int *comb,
1396 const char **page_chr)
1399 char *utf8_outbuf = utf8_buf;
1400 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
1402 r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
1403 if (r == (size_t)(-1))
1405 cd->my_errno = YAZ_ICONV_EILSEQ;
1411 size_t inbytesleft, no_read_sub = 0;
1414 *utf8_outbuf = '\0';
1415 inp = (unsigned char *) utf8_buf;
1416 inbytesleft = strlen(utf8_buf);
1418 x = yaz_marc8r_1_conv(inp, inbytesleft, &no_read_sub, comb);
1421 *page_chr = "\033(B";
1424 x = yaz_marc8r_2_conv(inp, inbytesleft, &no_read_sub, comb);
1427 *page_chr = "\033g";
1430 x = yaz_marc8r_3_conv(inp, inbytesleft, &no_read_sub, comb);
1433 *page_chr = "\033b";
1436 x = yaz_marc8r_4_conv(inp, inbytesleft, &no_read_sub, comb);
1439 *page_chr = "\033p";
1442 x = yaz_marc8r_5_conv(inp, inbytesleft, &no_read_sub, comb);
1445 *page_chr = "\033(2";
1448 x = yaz_marc8r_6_conv(inp, inbytesleft, &no_read_sub, comb);
1451 *page_chr = "\033(N";
1454 x = yaz_marc8r_7_conv(inp, inbytesleft, &no_read_sub, comb);
1457 *page_chr = "\033(3";
1460 x = yaz_marc8r_8_conv(inp, inbytesleft, &no_read_sub, comb);
1463 *page_chr = "\033(S";
1466 x = yaz_marc8r_9_conv(inp, inbytesleft, &no_read_sub, comb);
1469 *page_chr = "\033$1";
1472 cd->my_errno = YAZ_ICONV_EILSEQ;
1477 static size_t flush_combos(yaz_iconv_t cd,
1478 char **outbuf, size_t *outbytesleft)
1480 unsigned long y = cd->write_marc8_last;
1483 size_t i, out_no = 0;
1488 byte = (unsigned char )((y>>16) & 0xff);
1490 out_buf[out_no++] = byte;
1491 byte = (unsigned char)((y>>8) & 0xff);
1493 out_buf[out_no++] = byte;
1494 byte = (unsigned char )(y & 0xff);
1496 out_buf[out_no++] = byte;
1498 if (out_no + cd->write_marc8_comb_no + 1 > *outbytesleft)
1500 cd->my_errno = YAZ_ICONV_E2BIG;
1501 return (size_t) (-1);
1504 for (i = 0; i < cd->write_marc8_comb_no; i++)
1506 /* all MARC-8 combined characters are simple bytes */
1507 byte = (unsigned char )(cd->write_marc8_comb_ch[i]);
1508 *(*outbuf)++ = byte;
1511 memcpy(*outbuf, out_buf, out_no);
1513 (*outbytesleft) -= out_no;
1514 if (cd->write_marc8_second_half_char)
1516 *(*outbuf)++ = cd->write_marc8_second_half_char;
1520 cd->write_marc8_last = 0;
1521 cd->write_marc8_comb_no = 0;
1522 cd->write_marc8_second_half_char = 0;
1526 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
1527 char **outbuf, size_t *outbytesleft,
1528 const char *page_chr)
1530 const char *old_page_chr = cd->write_marc8_page_chr;
1531 if (strcmp(page_chr, old_page_chr))
1534 const char *page_out = page_chr;
1536 if (*outbytesleft < 8)
1538 cd->my_errno = YAZ_ICONV_E2BIG;
1540 return (size_t) (-1);
1542 cd->write_marc8_page_chr = page_chr;
1544 if (!strcmp(old_page_chr, "\033p")
1545 || !strcmp(old_page_chr, "\033g")
1546 || !strcmp(old_page_chr, "\033b"))
1548 /* Technique 1 leave */
1550 if (strcmp(page_chr, "\033(B")) /* Not going ASCII page? */
1552 /* Must leave script + enter new page */
1553 plen = strlen(page_out);
1554 memcpy(*outbuf, page_out, plen);
1556 (*outbytesleft) -= plen;
1557 page_out = page_chr;
1560 plen = strlen(page_out);
1561 memcpy(*outbuf, page_out, plen);
1563 (*outbytesleft) -= plen;
1569 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
1570 char **outbuf, size_t *outbytesleft)
1573 const char *page_chr = 0;
1574 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
1577 return (size_t) (-1);
1582 cd->write_marc8_second_half_char = 0xEC;
1583 else if (x == 0x0360)
1584 cd->write_marc8_second_half_char = 0xFB;
1586 if (cd->write_marc8_comb_no < 6)
1587 cd->write_marc8_comb_ch[cd->write_marc8_comb_no++] = y;
1591 size_t r = flush_combos(cd, outbuf, outbytesleft);
1595 r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, page_chr);
1598 cd->write_marc8_last = y;
1603 static size_t yaz_flush_marc8(yaz_iconv_t cd,
1604 char **outbuf, size_t *outbytesleft)
1606 size_t r = flush_combos(cd, outbuf, outbytesleft);
1609 return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, "\033(B");
1612 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
1613 char **outbuf, size_t *outbytesleft)
1616 for (i = 0; latin1_comb[i].x1; i++)
1618 if (x == latin1_comb[i].y)
1621 /* save the output pointers .. */
1622 char *outbuf0 = *outbuf;
1623 size_t outbytesleft0 = *outbytesleft;
1624 int last_ch = cd->write_marc8_last;
1626 r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
1627 outbuf, outbytesleft);
1630 r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
1631 outbuf, outbytesleft);
1632 if (r && cd->my_errno == YAZ_ICONV_E2BIG)
1634 /* not enough room. reset output to original values */
1636 *outbytesleft = outbytesleft0;
1637 cd->write_marc8_last = last_ch;
1642 return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
1647 static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
1648 char **outbuf, size_t *outbytesleft)
1650 unsigned char *outp = (unsigned char *) *outbuf;
1652 if (*outbytesleft >= sizeof(wchar_t))
1655 memcpy(outp, &wch, sizeof(wch));
1656 outp += sizeof(wch);
1657 (*outbytesleft) -= sizeof(wch);
1661 cd->my_errno = YAZ_ICONV_E2BIG;
1662 return (size_t)(-1);
1664 *outbuf = (char *) outp;
1669 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
1671 return cd->read_handle && cd->write_handle;
1674 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
1676 yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
1678 cd->write_handle = 0;
1679 cd->read_handle = 0;
1680 cd->init_handle = 0;
1681 cd->flush_handle = 0;
1682 cd->my_errno = YAZ_ICONV_UNKNOWN;
1684 /* a useful hack: if fromcode has leading @,
1685 the library not use YAZ's own conversions .. */
1686 if (fromcode[0] == '@')
1690 if (!yaz_matchstr(fromcode, "UTF8"))
1692 cd->read_handle = yaz_read_UTF8;
1693 cd->init_handle = yaz_init_UTF8;
1695 else if (!yaz_matchstr(fromcode, "ISO88591"))
1696 cd->read_handle = yaz_read_ISO8859_1;
1697 else if (!yaz_matchstr(fromcode, "UCS4"))
1698 cd->read_handle = yaz_read_UCS4;
1699 else if (!yaz_matchstr(fromcode, "UCS4LE"))
1700 cd->read_handle = yaz_read_UCS4LE;
1701 else if (!yaz_matchstr(fromcode, "MARC8"))
1702 cd->read_handle = yaz_read_marc8;
1703 else if (!yaz_matchstr(fromcode, "MARC8s"))
1704 cd->read_handle = yaz_read_marc8s;
1705 else if (!yaz_matchstr(fromcode, "advancegreek"))
1706 cd->read_handle = yaz_read_advancegreek;
1707 else if (!yaz_matchstr(fromcode, "iso54281984"))
1708 cd->read_handle = yaz_read_iso5428_1984;
1710 else if (!yaz_matchstr(fromcode, "WCHAR_T"))
1711 cd->read_handle = yaz_read_wchar_t;
1714 if (!yaz_matchstr(tocode, "UTF8"))
1715 cd->write_handle = yaz_write_UTF8;
1716 else if (!yaz_matchstr(tocode, "ISO88591"))
1718 cd->write_handle = yaz_write_ISO8859_1;
1719 cd->flush_handle = yaz_flush_ISO8859_1;
1721 else if (!yaz_matchstr (tocode, "UCS4"))
1722 cd->write_handle = yaz_write_UCS4;
1723 else if (!yaz_matchstr(tocode, "UCS4LE"))
1724 cd->write_handle = yaz_write_UCS4LE;
1725 else if (!yaz_matchstr(tocode, "MARC8"))
1727 cd->write_handle = yaz_write_marc8;
1728 cd->flush_handle = yaz_flush_marc8;
1730 else if (!yaz_matchstr(tocode, "MARC8s"))
1732 cd->write_handle = yaz_write_marc8;
1733 cd->flush_handle = yaz_flush_marc8;
1735 else if (!yaz_matchstr(tocode, "advancegreek"))
1737 cd->write_handle = yaz_write_advancegreek;
1739 else if (!yaz_matchstr(tocode, "iso54281984"))
1741 cd->write_handle = yaz_write_iso5428_1984;
1744 else if (!yaz_matchstr(tocode, "WCHAR_T"))
1745 cd->write_handle = yaz_write_wchar_t;
1750 if (!cd->read_handle || !cd->write_handle)
1752 cd->iconv_cd = iconv_open (tocode, fromcode);
1753 if (cd->iconv_cd == (iconv_t) (-1))
1760 if (!cd->read_handle || !cd->write_handle)
1770 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
1771 char **outbuf, size_t *outbytesleft)
1780 iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
1781 if (r == (size_t)(-1))
1783 switch (yaz_errno())
1786 cd->my_errno = YAZ_ICONV_E2BIG;
1789 cd->my_errno = YAZ_ICONV_EINVAL;
1792 cd->my_errno = YAZ_ICONV_EILSEQ;
1795 cd->my_errno = YAZ_ICONV_UNKNOWN;
1807 cd->my_errno = YAZ_ICONV_UNKNOWN;
1808 cd->marc8_esc_mode = 'B';
1810 cd->comb_offset = cd->comb_size = 0;
1811 cd->compose_char = 0;
1813 cd->write_marc8_comb_no = 0;
1814 cd->write_marc8_second_half_char = 0;
1815 cd->write_marc8_last = 0;
1816 cd->write_marc8_page_chr = "\033(B";
1824 if (cd->init_handle && inbuf && *inbuf)
1827 size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
1828 *inbytesleft, &no_read);
1831 if (cd->my_errno == YAZ_ICONV_EINVAL)
1836 *inbytesleft -= no_read;
1842 if (!inbuf || !*inbuf)
1844 if (outbuf && *outbuf)
1847 r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
1848 if (cd->flush_handle)
1849 r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
1864 no_read = cd->no_read_x;
1868 if (*inbytesleft == 0)
1870 r = *inbuf - inbuf0;
1873 x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
1883 r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
1886 /* unable to write it. save it because read_handle cannot
1888 if (cd->my_errno == YAZ_ICONV_E2BIG)
1891 cd->no_read_x = no_read;
1897 *inbytesleft -= no_read;
1898 (*inbuf) += no_read;
1903 int yaz_iconv_error (yaz_iconv_t cd)
1905 return cd->my_errno;
1908 int yaz_iconv_close (yaz_iconv_t cd)
1912 iconv_close (cd->iconv_cd);
1921 * indent-tabs-mode: nil
1923 * vim: shiftwidth=4 tabstop=8 expandtab