2 * Copyright (C) 1995-2007, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: siconv.c,v 1.48 2007-10-15 20:45:05 adam Exp $
9 * \brief Implements simple ICONV
11 * This implements an interface similar to that of iconv and
12 * is used by YAZ to interface with iconv (if present).
13 * For systems where iconv is not present, this layer
14 * provides a few important conversions: UTF-8, MARC-8, Latin-1.
17 * http://www.loc.gov/marc/specifications/speccharmarc8.html
37 #include <yaz/yaz-util.h>
39 unsigned long yaz_marc8_42_conv(unsigned char *inp, size_t inbytesleft,
40 size_t *no_read, int *combining);
41 unsigned long yaz_marc8_45_conv(unsigned char *inp, size_t inbytesleft,
42 size_t *no_read, int *combining);
43 unsigned long yaz_marc8_67_conv(unsigned char *inp, size_t inbytesleft,
44 size_t *no_read, int *combining);
45 unsigned long yaz_marc8_62_conv(unsigned char *inp, size_t inbytesleft,
46 size_t *no_read, int *combining);
47 unsigned long yaz_marc8_70_conv(unsigned char *inp, size_t inbytesleft,
48 size_t *no_read, int *combining);
49 unsigned long yaz_marc8_32_conv(unsigned char *inp, size_t inbytesleft,
50 size_t *no_read, int *combining);
51 unsigned long yaz_marc8_4E_conv(unsigned char *inp, size_t inbytesleft,
52 size_t *no_read, int *combining);
53 unsigned long yaz_marc8_51_conv(unsigned char *inp, size_t inbytesleft,
54 size_t *no_read, int *combining);
55 unsigned long yaz_marc8_33_conv(unsigned char *inp, size_t inbytesleft,
56 size_t *no_read, int *combining);
57 unsigned long yaz_marc8_34_conv(unsigned char *inp, size_t inbytesleft,
58 size_t *no_read, int *combining);
59 unsigned long yaz_marc8_53_conv(unsigned char *inp, size_t inbytesleft,
60 size_t *no_read, int *combining);
61 unsigned long yaz_marc8_31_conv(unsigned char *inp, size_t inbytesleft,
62 size_t *no_read, int *combining);
65 unsigned long yaz_marc8r_42_conv(unsigned char *inp, size_t inbytesleft,
66 size_t *no_read, int *combining);
67 unsigned long yaz_marc8r_45_conv(unsigned char *inp, size_t inbytesleft,
68 size_t *no_read, int *combining);
69 unsigned long yaz_marc8r_67_conv(unsigned char *inp, size_t inbytesleft,
70 size_t *no_read, int *combining);
71 unsigned long yaz_marc8r_62_conv(unsigned char *inp, size_t inbytesleft,
72 size_t *no_read, int *combining);
73 unsigned long yaz_marc8r_70_conv(unsigned char *inp, size_t inbytesleft,
74 size_t *no_read, int *combining);
75 unsigned long yaz_marc8r_32_conv(unsigned char *inp, size_t inbytesleft,
76 size_t *no_read, int *combining);
77 unsigned long yaz_marc8r_4E_conv(unsigned char *inp, size_t inbytesleft,
78 size_t *no_read, int *combining);
79 unsigned long yaz_marc8r_51_conv(unsigned char *inp, size_t inbytesleft,
80 size_t *no_read, int *combining);
81 unsigned long yaz_marc8r_33_conv(unsigned char *inp, size_t inbytesleft,
82 size_t *no_read, int *combining);
83 unsigned long yaz_marc8r_34_conv(unsigned char *inp, size_t inbytesleft,
84 size_t *no_read, int *combining);
85 unsigned long yaz_marc8r_53_conv(unsigned char *inp, size_t inbytesleft,
86 size_t *no_read, int *combining);
87 unsigned long yaz_marc8r_31_conv(unsigned char *inp, size_t inbytesleft,
88 size_t *no_read, int *combining);
92 struct yaz_iconv_struct {
95 size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
96 size_t inbytesleft, size_t *no_read);
97 unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
98 size_t inbytesleft, size_t *no_read);
99 size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
100 char **outbuf, size_t *outbytesleft);
101 size_t (*flush_handle)(yaz_iconv_t cd,
102 char **outbuf, size_t *outbytesleft);
107 unsigned long comb_x[8];
108 size_t comb_no_read[8];
110 unsigned long unget_x;
114 unsigned long compose_char;
116 unsigned write_marc8_second_half_char;
117 unsigned long write_marc8_last;
118 const char *write_marc8_lpage;
119 const char *write_marc8_g0;
120 const char *write_marc8_g1;
124 unsigned long x1, x2;
127 { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
128 { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
129 { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
130 { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
131 { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
132 { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
133 /* no need for 0xc6 LATIN CAPITAL LETTER AE */
134 { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
135 { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
136 { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
137 { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
138 { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
139 { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
140 { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
141 { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
142 { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
143 { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
144 { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
145 { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
146 { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
147 { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
148 { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
149 /* omitted: 0xd7 MULTIPLICATION SIGN */
150 /* omitted: 0xd8 LATIN CAPITAL LETTER O WITH STROKE */
151 { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
152 { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
153 { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
154 { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
155 { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
156 /* omitted: 0xde LATIN CAPITAL LETTER THORN */
157 /* omitted: 0xdf LATIN SMALL LETTER SHARP S */
158 { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
159 { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
160 { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
161 { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
162 { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
163 { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
164 /* omitted: 0xe6 LATIN SMALL LETTER AE */
165 { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
166 { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
167 { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
168 { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
169 { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
170 { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
171 { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
172 { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
173 { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
174 /* omitted: 0xf0 LATIN SMALL LETTER ETH */
175 { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
176 { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
177 { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
178 { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
179 { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
180 { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
181 /* omitted: 0xf7 DIVISION SIGN */
182 /* omitted: 0xf8 LATIN SMALL LETTER O WITH STROKE */
183 { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
184 { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
185 { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
186 { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
187 { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
188 /* omitted: 0xfe LATIN SMALL LETTER THORN */
189 { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
194 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
195 char **outbuf, size_t *outbytesleft,
196 const char *page_chr);
198 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
199 size_t inbytesleft, size_t *no_read)
201 unsigned long x = inp[0];
207 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
208 size_t inbytesleft, size_t *no_read)
217 cd->my_errno = YAZ_ICONV_EINVAL;
220 if (inp[1] != 0xbb && inp[2] == 0xbf)
227 unsigned long yaz_read_UTF8_char(unsigned char *inp,
228 size_t inbytesleft, size_t *no_read,
233 *no_read = 0; /* by default */
239 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
241 *error = YAZ_ICONV_EILSEQ;
243 else if (inp[0] <= 0xdf && inbytesleft >= 2)
245 if ((inp[1] & 0xc0) == 0x80)
247 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
251 *error = YAZ_ICONV_EILSEQ;
254 *error = YAZ_ICONV_EILSEQ;
256 else if (inp[0] <= 0xef && inbytesleft >= 3)
258 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
260 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
265 *error = YAZ_ICONV_EILSEQ;
268 *error = YAZ_ICONV_EILSEQ;
270 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
272 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
273 && (inp[3] & 0xc0) == 0x80)
275 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
276 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
280 *error = YAZ_ICONV_EILSEQ;
283 *error = YAZ_ICONV_EILSEQ;
285 else if (inp[0] <= 0xfb && inbytesleft >= 5)
287 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
288 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
290 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
291 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
296 *error = YAZ_ICONV_EILSEQ;
299 *error = YAZ_ICONV_EILSEQ;
301 else if (inp[0] <= 0xfd && inbytesleft >= 6)
303 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
304 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
305 && (inp[5] & 0xc0) == 0x80)
307 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
308 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
309 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
313 *error = YAZ_ICONV_EILSEQ;
316 *error = YAZ_ICONV_EILSEQ;
319 *error = YAZ_ICONV_EINVAL; /* incomplete sentence */
324 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
325 size_t inbytesleft, size_t *no_read)
327 return yaz_read_UTF8_char(inp, inbytesleft, no_read, &cd->my_errno);
330 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
331 size_t inbytesleft, size_t *no_read)
337 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
342 x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
348 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
349 size_t inbytesleft, size_t *no_read)
355 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
360 x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
367 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
368 size_t inbytesleft, size_t *no_read)
372 if (inbytesleft < sizeof(wchar_t))
374 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
380 memcpy (&wch, inp, sizeof(wch));
382 *no_read = sizeof(wch);
388 static unsigned long yaz_read_iso5428_1984(yaz_iconv_t cd, unsigned char *inp,
389 size_t inbytesleft, size_t *no_read)
396 while (inbytesleft > 0)
402 else if (*inp == 0xa3)
412 if (inbytesleft == 0)
414 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
419 case 0xe1: /* alpha small */
425 case 0xc1: /* alpha capital */
432 case 0xe2: /* Beta small */
435 case 0xc2: /* Beta capital */
439 case 0xe4: /* Gamma small */
442 case 0xc4: /* Gamma capital */
446 case 0xe5: /* Delta small */
449 case 0xc5: /* Delta capital */
452 case 0xe6: /* epsilon small */
458 case 0xc6: /* epsilon capital */
464 case 0xe9: /* Zeta small */
467 case 0xc9: /* Zeta capital */
470 case 0xea: /* Eta small */
476 case 0xca: /* Eta capital */
482 case 0xeb: /* Theta small */
485 case 0xcb: /* Theta capital */
488 case 0xec: /* Iota small */
500 case 0xcc: /* Iota capital */
509 case 0xed: /* Kappa small */
512 case 0xcd: /* Kappa capital */
515 case 0xee: /* Lambda small */
518 case 0xce: /* Lambda capital */
521 case 0xef: /* Mu small */
524 case 0xcf: /* Mu capital */
527 case 0xf0: /* Nu small */
530 case 0xd0: /* Nu capital */
533 case 0xf1: /* Xi small */
536 case 0xd1: /* Xi capital */
539 case 0xf2: /* Omicron small */
545 case 0xd2: /* Omicron capital */
551 case 0xf3: /* Pi small */
554 case 0xd3: /* Pi capital */
557 case 0xf5: /* Rho small */
560 case 0xd5: /* Rho capital */
563 case 0xf7: /* Sigma small (end of words) */
566 case 0xf6: /* Sigma small */
569 case 0xd6: /* Sigma capital */
572 case 0xf8: /* Tau small */
575 case 0xd8: /* Tau capital */
578 case 0xf9: /* Upsilon small */
590 case 0xd9: /* Upsilon capital */
599 case 0xfa: /* Phi small */
602 case 0xda: /* Phi capital */
605 case 0xfb: /* Chi small */
608 case 0xdb: /* Chi capital */
611 case 0xfc: /* Psi small */
614 case 0xdc: /* Psi capital */
617 case 0xfd: /* Omega small */
623 case 0xdd: /* Omega capital */
638 static size_t yaz_write_iso5428_1984(yaz_iconv_t cd, unsigned long x,
639 char **outbuf, size_t *outbytesleft)
642 unsigned char *out = (unsigned char*) *outbuf;
643 if (*outbytesleft < 3)
645 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
650 case 0x03ac : out[k++]=0xa2; out[k++]=0xe1; break;
651 case 0x03b1 : out[k++]=0xe1; break;
652 case 0x0386 : out[k++]=0xa2; out[k++]=0xc1; break;
653 case 0x0391 : out[k++]=0xc1; break;
654 case 0x03b2 : out[k++]=0xe2; break;
655 case 0x0392 : out[k++]=0xc2; break;
656 case 0x03b3 : out[k++]=0xe4; break;
657 case 0x0393 : out[k++]=0xc4; break;
658 case 0x03b4 : out[k++]=0xe5; break;
659 case 0x0394 : out[k++]=0xc5; break;
660 case 0x03ad : out[k++]=0xa2; out[k++]=0xe6; break;
661 case 0x03b5 : out[k++]=0xe6; break;
662 case 0x0388 : out[k++]=0xa2; out[k++]=0xc6; break;
663 case 0x0395 : out[k++]=0xc6; break;
664 case 0x03b6 : out[k++]=0xe9; break;
665 case 0x0396 : out[k++]=0xc9; break;
666 case 0x03ae : out[k++]=0xa2; out[k++]=0xea; break;
667 case 0x03b7 : out[k++]=0xea; break;
668 case 0x0389 : out[k++]=0xa2; out[k++]=0xca; break;
669 case 0x0397 : out[k++]=0xca; break;
670 case 0x03b8 : out[k++]=0xeb; break;
671 case 0x0398 : out[k++]=0xcb; break;
672 case 0x0390 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xec; break;
673 case 0x03af : out[k++]=0xa2; out[k++]=0xec; break;
674 case 0x03ca : out[k++]=0xa3; out[k++]=0xec; break;
675 case 0x03b9 : out[k++]=0xec; break;
676 case 0x038a : out[k++]=0xa2; out[k++]=0xcc; break;
677 case 0x03aa : out[k++]=0xa3; out[k++]=0xcc; break;
678 case 0x0399 : out[k++]=0xcc; break;
679 case 0x03ba : out[k++]=0xed; break;
680 case 0x039a : out[k++]=0xcd; break;
681 case 0x03bb : out[k++]=0xee; break;
682 case 0x039b : out[k++]=0xce; break;
683 case 0x03bc : out[k++]=0xef; break;
684 case 0x039c : out[k++]=0xcf; break;
685 case 0x03bd : out[k++]=0xf0; break;
686 case 0x039d : out[k++]=0xd0; break;
687 case 0x03be : out[k++]=0xf1; break;
688 case 0x039e : out[k++]=0xd1; break;
689 case 0x03cc : out[k++]=0xa2; out[k++]=0xf2; break;
690 case 0x03bf : out[k++]=0xf2; break;
691 case 0x038c : out[k++]=0xa2; out[k++]=0xd2; break;
692 case 0x039f : out[k++]=0xd2; break;
693 case 0x03c0 : out[k++]=0xf3; break;
694 case 0x03a0 : out[k++]=0xd3; break;
695 case 0x03c1 : out[k++]=0xf5; break;
696 case 0x03a1 : out[k++]=0xd5; break;
697 case 0x03c2 : out[k++]=0xf7; break;
698 case 0x03c3 : out[k++]=0xf6; break;
699 case 0x03a3 : out[k++]=0xd6; break;
700 case 0x03c4 : out[k++]=0xf8; break;
701 case 0x03a4 : out[k++]=0xd8; break;
702 case 0x03b0 : out[k++]=0xa2; out[k++]=0xa3; out[k++]=0xf9; break;
703 case 0x03cd : out[k++]=0xa2; out[k++]=0xf9; break;
704 case 0x03cb : out[k++]=0xa3; out[k++]=0xf9; break;
705 case 0x03c5 : out[k++]=0xf9; break;
706 case 0x038e : out[k++]=0xa2; out[k++]=0xd9; break;
707 case 0x03ab : out[k++]=0xa3; out[k++]=0xd9; break;
708 case 0x03a5 : out[k++]=0xd9; break;
709 case 0x03c6 : out[k++]=0xfa; break;
710 case 0x03a6 : out[k++]=0xda; break;
711 case 0x03c7 : out[k++]=0xfb; break;
712 case 0x03a7 : out[k++]=0xdb; break;
713 case 0x03c8 : out[k++]=0xfc; break;
714 case 0x03a8 : out[k++]=0xdc; break;
715 case 0x03ce : out[k++]=0xa2; out[k++]=0xfd; break;
716 case 0x03c9 : out[k++]=0xfd; break;
717 case 0x038f : out[k++]=0xa2; out[k++]=0xdd; break;
718 case 0x03a9 : out[k++]=0xdd; break;
722 cd->my_errno = YAZ_ICONV_EILSEQ;
733 static unsigned long yaz_read_advancegreek(yaz_iconv_t cd, unsigned char *inp,
734 size_t inbytesleft, size_t *no_read)
742 while (inbytesleft > 0)
748 else if (*inp == 0x9e)
752 else if (*inp == 0x9f)
762 if (inbytesleft == 0)
764 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
992 static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
993 char **outbuf, size_t *outbytesleft)
996 unsigned char *out = (unsigned char*) *outbuf;
997 if (*outbytesleft < 3)
999 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
1000 return (size_t)(-1);
1004 case 0x03ac : out[k++]=0x9d; out[k++]=0x81; break;
1005 case 0x03ad : out[k++]=0x9d; out[k++]=0x85; break;
1006 case 0x03ae : out[k++]=0x9d; out[k++]=0x87; break;
1007 case 0x03af : out[k++]=0x9d; out[k++]=0x89; break;
1008 case 0x03cc : out[k++]=0x9d; out[k++]=0x8f; break;
1009 case 0x03cd : out[k++]=0x9d; out[k++]=0x95; break;
1010 case 0x03ce : out[k++]=0x9d; out[k++]=0x99; break;
1011 case 0x0390 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x89; break;
1012 case 0x03b0 : out[k++]=0x9d; out[k++]=0x9e; out[k++]=0x95; break;
1013 case 0x0386 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x81; break;
1014 case 0x0388 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x85; break;
1015 case 0x0389 : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x87; break;
1016 case 0x038a : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x89; break;
1017 case 0x038c : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x8f; break;
1018 case 0x038e : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x95; break;
1019 case 0x038f : out[k++]=0x9d; out[k++]=0x9f; out[k++]=0x99; break;
1020 case 0x03ca : out[k++]=0x9e; out[k++]=0x89; break;
1021 case 0x03cb : out[k++]=0x9e; out[k++]=0x95; break;
1022 case 0x03aa : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x89; break;
1023 case 0x03ab : out[k++]=0x9e; out[k++]=0x9f; out[k++]=0x95; break;
1024 case 0x0391 : out[k++]=0x9f; out[k++]=0x81; break;
1025 case 0x0392 : out[k++]=0x9f; out[k++]=0x82; break;
1026 case 0x0393 : out[k++]=0x9f; out[k++]=0x83; break;
1027 case 0x0394 : out[k++]=0x9f; out[k++]=0x84; break;
1028 case 0x0395 : out[k++]=0x9f; out[k++]=0x85; break;
1029 case 0x0396 : out[k++]=0x9f; out[k++]=0x86; break;
1030 case 0x0397 : out[k++]=0x9f; out[k++]=0x87; break;
1031 case 0x0398 : out[k++]=0x9f; out[k++]=0x88; break;
1032 case 0x0399 : out[k++]=0x9f; out[k++]=0x89; break;
1033 case 0x039a : out[k++]=0x9f; out[k++]=0x8a; break;
1034 case 0x039b : out[k++]=0x9f; out[k++]=0x8b; break;
1035 case 0x039c : out[k++]=0x9f; out[k++]=0x8c; break;
1036 case 0x039d : out[k++]=0x9f; out[k++]=0x8d; break;
1037 case 0x039e : out[k++]=0x9f; out[k++]=0x8e; break;
1038 case 0x039f : out[k++]=0x9f; out[k++]=0x8f; break;
1039 case 0x03a0 : out[k++]=0x9f; out[k++]=0x90; break;
1040 case 0x03a1 : out[k++]=0x9f; out[k++]=0x91; break;
1041 case 0x03a3 : out[k++]=0x9f; out[k++]=0x93; break;
1042 case 0x03a4 : out[k++]=0x9f; out[k++]=0x94; break;
1043 case 0x03a5 : out[k++]=0x9f; out[k++]=0x95; break;
1044 case 0x03a6 : out[k++]=0x9f; out[k++]=0x96; break;
1045 case 0x03a7 : out[k++]=0x9f; out[k++]=0x97; break;
1046 case 0x03a8 : out[k++]=0x9f; out[k++]=0x98; break;
1047 case 0x03a9 : out[k++]=0x9f; out[k++]=0x99; break;
1048 case 0x03b1 : out[k++]=0x81; break;
1049 case 0x03b2 : out[k++]=0x82; break;
1050 case 0x03b3 : out[k++]=0x83; break;
1051 case 0x03b4 : out[k++]=0x84; break;
1052 case 0x03b5 : out[k++]=0x85; break;
1053 case 0x03b6 : out[k++]=0x86; break;
1054 case 0x03b7 : out[k++]=0x87; break;
1055 case 0x03b8 : out[k++]=0x88; break;
1056 case 0x03b9 : out[k++]=0x89; break;
1057 case 0x03ba : out[k++]=0x8a; break;
1058 case 0x03bb : out[k++]=0x8b; break;
1059 case 0x03bc : out[k++]=0x8c; break;
1060 case 0x03bd : out[k++]=0x8d; break;
1061 case 0x03be : out[k++]=0x8e; break;
1062 case 0x03bf : out[k++]=0x8f; break;
1063 case 0x03c0 : out[k++]=0x90; break;
1064 case 0x03c1 : out[k++]=0x91; break;
1065 case 0x03c2 : out[k++]=0x92; break;
1066 case 0x03c3 : out[k++]=0x93; break;
1067 case 0x03c4 : out[k++]=0x94; break;
1068 case 0x03c5 : out[k++]=0x95; break;
1069 case 0x03c6 : out[k++]=0x96; break;
1070 case 0x03c7 : out[k++]=0x96; break;
1071 case 0x03c8 : out[k++]=0x98; break;
1072 case 0x03c9 : out[k++]=0x99; break;
1076 cd->my_errno = YAZ_ICONV_EILSEQ;
1088 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
1089 size_t inbytesleft, size_t *no_read,
1092 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
1093 size_t inbytesleft, size_t *no_read)
1096 if (cd->comb_offset < cd->comb_size)
1098 *no_read = cd->comb_no_read[cd->comb_offset];
1099 x = cd->comb_x[cd->comb_offset];
1101 /* special case for double-diacritic combining characters,
1102 INVERTED BREVE and DOUBLE TILDE.
1103 We'll increment the no_read counter by 1, since we want to skip over
1104 the processing of the closing ligature character
1106 /* this code is no longer necessary.. our handlers code in
1107 yaz_marc8_?_conv (generated by charconv.tcl) now returns
1108 0 and no_read=1 when a sequence does not match the input.
1109 The SECOND HALFs in codetables.xml produces a non-existant
1110 entry in the conversion trie.. Hence when met, the input byte is
1111 skipped as it should (in yaz_iconv)
1114 if (x == 0x0361 || x == 0x0360)
1121 cd->comb_offset = 0;
1122 for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
1126 if (inbytesleft == 0 && cd->comb_size)
1128 cd->my_errno = YAZ_ICONV_EINVAL;
1133 x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
1136 cd->comb_x[cd->comb_size] = x;
1137 cd->comb_no_read[cd->comb_size] = *no_read;
1139 inbytesleft = inbytesleft - *no_read;
1144 static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
1145 size_t inbytesleft, size_t *no_read)
1147 unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
1148 if (x && cd->comb_size == 1)
1150 /* For MARC8s we try to get a Latin-1 page code out of it */
1152 for (i = 0; latin1_comb[i].x1; i++)
1153 if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
1155 *no_read += cd->comb_no_read[0];
1157 x = latin1_comb[i].y;
1164 static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
1165 size_t inbytesleft, size_t *no_read,
1169 while(inbytesleft >= 1 && inp[0] == 27)
1171 size_t inbytesleft0 = inbytesleft;
1174 while(inbytesleft > 0 && strchr("(,$!)-", *inp))
1179 if (inbytesleft <= 0)
1182 cd->my_errno = YAZ_ICONV_EINVAL;
1185 cd->marc8_esc_mode = *inp++;
1187 (*no_read) += inbytesleft0 - inbytesleft;
1189 if (inbytesleft <= 0)
1191 else if (*inp == ' ')
1199 size_t no_read_sub = 0;
1202 switch(cd->marc8_esc_mode)
1204 case 'B': /* Basic ASCII */
1205 case 's': /* ASCII */
1206 case 'E': /* ANSEL */
1207 x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb);
1211 x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb);
1214 case 'g': /* Greek */
1215 x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb);
1217 case 'b': /* Subscripts */
1218 x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb);
1220 case 'p': /* Superscripts */
1221 x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb);
1223 case '2': /* Basic Hebrew */
1224 x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb);
1226 case 'N': /* Basic Cyrillic */
1227 x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1229 case 'Q': /* Extended Cyrillic */
1230 x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb);
1232 case '3': /* Basic Arabic */
1233 x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb);
1235 case '4': /* Extended Arabic */
1236 x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb);
1238 case 'S': /* Greek */
1239 x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb);
1241 case '1': /* Chinese, Japanese, Korean (EACC) */
1242 x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb);
1246 cd->my_errno = YAZ_ICONV_EILSEQ;
1249 *no_read += no_read_sub;
1254 static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
1255 char **outbuf, size_t *outbytesleft)
1257 return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
1260 size_t yaz_write_UTF8_char(unsigned long x,
1261 char **outbuf, size_t *outbytesleft,
1264 unsigned char *outp = (unsigned char *) *outbuf;
1266 if (x <= 0x7f && *outbytesleft >= 1)
1268 *outp++ = (unsigned char) x;
1271 else if (x <= 0x7ff && *outbytesleft >= 2)
1273 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
1274 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1275 (*outbytesleft) -= 2;
1277 else if (x <= 0xffff && *outbytesleft >= 3)
1279 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
1280 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1281 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1282 (*outbytesleft) -= 3;
1284 else if (x <= 0x1fffff && *outbytesleft >= 4)
1286 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
1287 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1288 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1289 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1290 (*outbytesleft) -= 4;
1292 else if (x <= 0x3ffffff && *outbytesleft >= 5)
1294 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
1295 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1296 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1297 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1298 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1299 (*outbytesleft) -= 5;
1301 else if (*outbytesleft >= 6)
1303 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
1304 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
1305 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
1306 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
1307 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
1308 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
1309 (*outbytesleft) -= 6;
1313 *error = YAZ_ICONV_E2BIG; /* not room for output */
1314 return (size_t)(-1);
1316 *outbuf = (char *) outp;
1320 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
1321 char **outbuf, size_t *outbytesleft)
1323 /* list of two char unicode sequence that, when combined, are
1324 equivalent to single unicode chars that can be represented in
1326 Regular iconv on Linux at least does not seem to convert these,
1327 but since MARC-8 to UTF-8 generates these composed sequence
1328 we get a better chance of a successful MARC-8 -> ISO-8859-1
1330 unsigned char *outp = (unsigned char *) *outbuf;
1332 if (cd->compose_char)
1335 for (i = 0; latin1_comb[i].x1; i++)
1336 if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
1338 x = latin1_comb[i].y;
1341 if (*outbytesleft < 1)
1342 { /* no room. Retain compose_char and bail out */
1343 cd->my_errno = YAZ_ICONV_E2BIG;
1344 return (size_t)(-1);
1346 if (!latin1_comb[i].x1)
1347 { /* not found. Just write compose_char */
1348 *outp++ = (unsigned char) cd->compose_char;
1350 *outbuf = (char *) outp;
1352 /* compose_char used so reset it. x now holds current char */
1353 cd->compose_char = 0;
1356 if (x > 32 && x < 127 && cd->compose_char == 0)
1358 cd->compose_char = x;
1361 else if (x > 255 || x < 1)
1363 cd->my_errno = YAZ_ICONV_EILSEQ;
1366 else if (*outbytesleft < 1)
1368 cd->my_errno = YAZ_ICONV_E2BIG;
1369 return (size_t)(-1);
1371 *outp++ = (unsigned char) x;
1373 *outbuf = (char *) outp;
1377 static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
1378 char **outbuf, size_t *outbytesleft)
1380 if (cd->compose_char)
1382 unsigned char *outp = (unsigned char *) *outbuf;
1383 if (*outbytesleft < 1)
1385 cd->my_errno = YAZ_ICONV_E2BIG;
1386 return (size_t)(-1);
1388 *outp++ = (unsigned char) cd->compose_char;
1390 *outbuf = (char *) outp;
1391 cd->compose_char = 0;
1396 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
1397 char **outbuf, size_t *outbytesleft)
1399 unsigned char *outp = (unsigned char *) *outbuf;
1400 if (*outbytesleft >= 4)
1402 *outp++ = (unsigned char) (x>>24);
1403 *outp++ = (unsigned char) (x>>16);
1404 *outp++ = (unsigned char) (x>>8);
1405 *outp++ = (unsigned char) x;
1406 (*outbytesleft) -= 4;
1410 cd->my_errno = YAZ_ICONV_E2BIG;
1411 return (size_t)(-1);
1413 *outbuf = (char *) outp;
1417 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
1418 char **outbuf, size_t *outbytesleft)
1420 unsigned char *outp = (unsigned char *) *outbuf;
1421 if (*outbytesleft >= 4)
1423 *outp++ = (unsigned char) x;
1424 *outp++ = (unsigned char) (x>>8);
1425 *outp++ = (unsigned char) (x>>16);
1426 *outp++ = (unsigned char) (x>>24);
1427 (*outbytesleft) -= 4;
1431 cd->my_errno = YAZ_ICONV_E2BIG;
1432 return (size_t)(-1);
1434 *outbuf = (char *) outp;
1438 static unsigned long lookup_marc8(yaz_iconv_t cd,
1439 unsigned long x, int *comb,
1440 const char **page_chr)
1443 char *utf8_outbuf = utf8_buf;
1444 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
1446 r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
1447 if (r == (size_t)(-1))
1449 cd->my_errno = YAZ_ICONV_EILSEQ;
1455 size_t inbytesleft, no_read_sub = 0;
1458 *utf8_outbuf = '\0';
1459 inp = (unsigned char *) utf8_buf;
1460 inbytesleft = strlen(utf8_buf);
1462 x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb);
1465 *page_chr = ESC "(B";
1468 x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb);
1471 *page_chr = ESC "(B";
1474 x = yaz_marc8r_67_conv(inp, inbytesleft, &no_read_sub, comb);
1477 *page_chr = ESC "g";
1480 x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb);
1483 *page_chr = ESC "b";
1486 x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb);
1489 *page_chr = ESC "p";
1492 x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb);
1495 *page_chr = ESC "(2";
1498 x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb);
1501 *page_chr = ESC "(N";
1504 x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb);
1507 *page_chr = ESC "(Q";
1510 x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb);
1513 *page_chr = ESC "(3";
1516 x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb);
1519 *page_chr = ESC "(4";
1522 x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb);
1525 *page_chr = ESC "(S";
1528 x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb);
1531 *page_chr = ESC "$1";
1534 cd->my_errno = YAZ_ICONV_EILSEQ;
1539 static size_t flush_combos(yaz_iconv_t cd,
1540 char **outbuf, size_t *outbytesleft)
1542 unsigned long y = cd->write_marc8_last;
1550 assert(cd->write_marc8_lpage);
1551 if (cd->write_marc8_lpage)
1553 size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft,
1554 cd->write_marc8_lpage);
1559 byte = (unsigned char )((y>>16) & 0xff);
1561 out_buf[out_no++] = byte;
1562 byte = (unsigned char)((y>>8) & 0xff);
1564 out_buf[out_no++] = byte;
1565 byte = (unsigned char )(y & 0xff);
1567 out_buf[out_no++] = byte;
1569 if (out_no + 2 >= *outbytesleft)
1571 cd->my_errno = YAZ_ICONV_E2BIG;
1572 return (size_t) (-1);
1575 memcpy(*outbuf, out_buf, out_no);
1577 (*outbytesleft) -= out_no;
1578 if (cd->write_marc8_second_half_char)
1580 *(*outbuf)++ = cd->write_marc8_second_half_char;
1584 cd->write_marc8_last = 0;
1585 cd->write_marc8_lpage = 0;
1586 cd->write_marc8_second_half_char = 0;
1590 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
1591 char **outbuf, size_t *outbytesleft,
1592 const char *page_chr)
1594 const char **old_page_chr = &cd->write_marc8_g0;
1596 /* are we going to a G1-set (such as such as ESC ")!E") */
1597 if (page_chr && page_chr[1] == ')')
1598 old_page_chr = &cd->write_marc8_g1;
1600 if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
1603 const char *page_out = page_chr;
1605 if (*outbytesleft < 8)
1607 cd->my_errno = YAZ_ICONV_E2BIG;
1609 return (size_t) (-1);
1614 if (!strcmp(*old_page_chr, ESC "p")
1615 || !strcmp(*old_page_chr, ESC "g")
1616 || !strcmp(*old_page_chr, ESC "b"))
1619 /* Technique 1 leave */
1620 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
1622 /* Must leave script + enter new page */
1623 plen = strlen(page_out);
1624 memcpy(*outbuf, page_out, plen);
1626 (*outbytesleft) -= plen;
1627 page_out = ESC "(B";
1631 *old_page_chr = page_chr;
1632 plen = strlen(page_out);
1633 memcpy(*outbuf, page_out, plen);
1635 (*outbytesleft) -= plen;
1641 static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
1642 char **outbuf, size_t *outbytesleft)
1645 const char *page_chr = 0;
1646 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
1649 return (size_t) (-1);
1655 size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft,
1661 cd->write_marc8_second_half_char = 0xEC;
1662 else if (x == 0x0360)
1663 cd->write_marc8_second_half_char = 0xFB;
1665 if (*outbytesleft <= 1)
1667 cd->my_errno = YAZ_ICONV_E2BIG;
1668 return (size_t) (-1);
1675 size_t r = flush_combos(cd, outbuf, outbytesleft);
1679 cd->write_marc8_last = y;
1680 cd->write_marc8_lpage = page_chr;
1685 static size_t yaz_flush_marc8(yaz_iconv_t cd,
1686 char **outbuf, size_t *outbytesleft)
1688 size_t r = flush_combos(cd, outbuf, outbytesleft);
1691 cd->write_marc8_g1 = 0;
1692 return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, ESC "(B");
1695 static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
1696 char **outbuf, size_t *outbytesleft)
1699 for (i = 0; latin1_comb[i].x1; i++)
1701 if (x == latin1_comb[i].y)
1704 /* save the output pointers .. */
1705 char *outbuf0 = *outbuf;
1706 size_t outbytesleft0 = *outbytesleft;
1707 int last_ch = cd->write_marc8_last;
1708 const char *lpage = cd->write_marc8_lpage;
1710 r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
1711 outbuf, outbytesleft);
1714 r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
1715 outbuf, outbytesleft);
1716 if (r && cd->my_errno == YAZ_ICONV_E2BIG)
1718 /* not enough room. reset output to original values */
1720 *outbytesleft = outbytesleft0;
1721 cd->write_marc8_last = last_ch;
1722 cd->write_marc8_lpage = lpage;
1727 return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
1732 static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
1733 char **outbuf, size_t *outbytesleft)
1735 unsigned char *outp = (unsigned char *) *outbuf;
1737 if (*outbytesleft >= sizeof(wchar_t))
1740 memcpy(outp, &wch, sizeof(wch));
1741 outp += sizeof(wch);
1742 (*outbytesleft) -= sizeof(wch);
1746 cd->my_errno = YAZ_ICONV_E2BIG;
1747 return (size_t)(-1);
1749 *outbuf = (char *) outp;
1754 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
1756 return cd->read_handle && cd->write_handle;
1759 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
1761 yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
1763 cd->write_handle = 0;
1764 cd->read_handle = 0;
1765 cd->init_handle = 0;
1766 cd->flush_handle = 0;
1767 cd->my_errno = YAZ_ICONV_UNKNOWN;
1769 /* a useful hack: if fromcode has leading @,
1770 the library not use YAZ's own conversions .. */
1771 if (fromcode[0] == '@')
1775 if (!yaz_matchstr(fromcode, "UTF8"))
1777 cd->read_handle = yaz_read_UTF8;
1778 cd->init_handle = yaz_init_UTF8;
1780 else if (!yaz_matchstr(fromcode, "ISO88591"))
1781 cd->read_handle = yaz_read_ISO8859_1;
1782 else if (!yaz_matchstr(fromcode, "UCS4"))
1783 cd->read_handle = yaz_read_UCS4;
1784 else if (!yaz_matchstr(fromcode, "UCS4LE"))
1785 cd->read_handle = yaz_read_UCS4LE;
1786 else if (!yaz_matchstr(fromcode, "MARC8"))
1787 cd->read_handle = yaz_read_marc8;
1788 else if (!yaz_matchstr(fromcode, "MARC8s"))
1789 cd->read_handle = yaz_read_marc8s;
1790 else if (!yaz_matchstr(fromcode, "advancegreek"))
1791 cd->read_handle = yaz_read_advancegreek;
1792 else if (!yaz_matchstr(fromcode, "iso54281984"))
1793 cd->read_handle = yaz_read_iso5428_1984;
1794 else if (!yaz_matchstr(fromcode, "iso5428:1984"))
1795 cd->read_handle = yaz_read_iso5428_1984;
1797 else if (!yaz_matchstr(fromcode, "WCHAR_T"))
1798 cd->read_handle = yaz_read_wchar_t;
1801 if (!yaz_matchstr(tocode, "UTF8"))
1802 cd->write_handle = yaz_write_UTF8;
1803 else if (!yaz_matchstr(tocode, "ISO88591"))
1805 cd->write_handle = yaz_write_ISO8859_1;
1806 cd->flush_handle = yaz_flush_ISO8859_1;
1808 else if (!yaz_matchstr (tocode, "UCS4"))
1809 cd->write_handle = yaz_write_UCS4;
1810 else if (!yaz_matchstr(tocode, "UCS4LE"))
1811 cd->write_handle = yaz_write_UCS4LE;
1812 else if (!yaz_matchstr(tocode, "MARC8"))
1814 cd->write_handle = yaz_write_marc8;
1815 cd->flush_handle = yaz_flush_marc8;
1817 else if (!yaz_matchstr(tocode, "MARC8s"))
1819 cd->write_handle = yaz_write_marc8;
1820 cd->flush_handle = yaz_flush_marc8;
1822 else if (!yaz_matchstr(tocode, "advancegreek"))
1824 cd->write_handle = yaz_write_advancegreek;
1826 else if (!yaz_matchstr(tocode, "iso54281984"))
1828 cd->write_handle = yaz_write_iso5428_1984;
1830 else if (!yaz_matchstr(tocode, "iso5428:1984"))
1832 cd->write_handle = yaz_write_iso5428_1984;
1835 else if (!yaz_matchstr(tocode, "WCHAR_T"))
1836 cd->write_handle = yaz_write_wchar_t;
1841 if (!cd->read_handle || !cd->write_handle)
1843 cd->iconv_cd = iconv_open (tocode, fromcode);
1844 if (cd->iconv_cd == (iconv_t) (-1))
1851 if (!cd->read_handle || !cd->write_handle)
1861 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
1862 char **outbuf, size_t *outbytesleft)
1871 iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
1872 if (r == (size_t)(-1))
1874 switch (yaz_errno())
1877 cd->my_errno = YAZ_ICONV_E2BIG;
1880 cd->my_errno = YAZ_ICONV_EINVAL;
1883 cd->my_errno = YAZ_ICONV_EILSEQ;
1886 cd->my_errno = YAZ_ICONV_UNKNOWN;
1898 cd->my_errno = YAZ_ICONV_UNKNOWN;
1899 cd->marc8_esc_mode = 'B';
1901 cd->comb_offset = cd->comb_size = 0;
1902 cd->compose_char = 0;
1904 cd->write_marc8_second_half_char = 0;
1905 cd->write_marc8_last = 0;
1906 cd->write_marc8_lpage = 0;
1907 cd->write_marc8_g0 = ESC "(B";
1908 cd->write_marc8_g1 = 0;
1916 if (cd->init_handle && inbuf && *inbuf)
1919 size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
1920 *inbytesleft, &no_read);
1923 if (cd->my_errno == YAZ_ICONV_EINVAL)
1928 *inbytesleft -= no_read;
1934 if (!inbuf || !*inbuf)
1936 if (outbuf && *outbuf)
1939 r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
1940 if (cd->flush_handle)
1941 r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
1956 no_read = cd->no_read_x;
1960 if (*inbytesleft == 0)
1962 r = *inbuf - inbuf0;
1965 x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
1975 r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
1978 /* unable to write it. save it because read_handle cannot
1980 if (cd->my_errno == YAZ_ICONV_E2BIG)
1983 cd->no_read_x = no_read;
1989 *inbytesleft -= no_read;
1990 (*inbuf) += no_read;
1995 int yaz_iconv_error (yaz_iconv_t cd)
1997 return cd->my_errno;
2000 int yaz_iconv_close (yaz_iconv_t cd)
2004 iconv_close (cd->iconv_cd);
2013 * indent-tabs-mode: nil
2015 * vim: shiftwidth=4 tabstop=8 expandtab