2 * Copyright (C) 1995-2005, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: siconv.c,v 1.13 2005-06-25 15:46:05 adam Exp $
9 * \brief Implements simple ICONV
11 * This implements an interface similar to that of iconv and
12 * is used by YAZ to interface with iconv (if present).
13 * For systems where iconv is not present, this layer
14 * provides a few important conversion: UTF-8, MARC-8, Latin-1.
32 #include <yaz/yaz-util.h>
34 unsigned long yaz_marc8_1_conv (unsigned char *inp, size_t inbytesleft,
35 size_t *no_read, int *combining);
36 unsigned long yaz_marc8_2_conv (unsigned char *inp, size_t inbytesleft,
37 size_t *no_read, int *combining);
38 unsigned long yaz_marc8_3_conv (unsigned char *inp, size_t inbytesleft,
39 size_t *no_read, int *combining);
40 unsigned long yaz_marc8_4_conv (unsigned char *inp, size_t inbytesleft,
41 size_t *no_read, int *combining);
42 unsigned long yaz_marc8_5_conv (unsigned char *inp, size_t inbytesleft,
43 size_t *no_read, int *combining);
44 unsigned long yaz_marc8_6_conv (unsigned char *inp, size_t inbytesleft,
45 size_t *no_read, int *combining);
46 unsigned long yaz_marc8_7_conv (unsigned char *inp, size_t inbytesleft,
47 size_t *no_read, int *combining);
48 unsigned long yaz_marc8_8_conv (unsigned char *inp, size_t inbytesleft,
49 size_t *no_read, int *combining);
50 unsigned long yaz_marc8_9_conv (unsigned char *inp, size_t inbytesleft,
51 size_t *no_read, int *combining);
55 struct yaz_iconv_struct {
58 size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
59 size_t inbytesleft, size_t *no_read);
60 unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
61 size_t inbytesleft, size_t *no_read);
62 size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
63 char **outbuf, size_t *outbytesleft,
69 unsigned long comb_x[8];
70 size_t comb_no_read[8];
73 int marc8_comb_no_read;
76 unsigned long unget_x;
80 unsigned long compose_char;
83 static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
84 size_t inbytesleft, size_t *no_read)
86 unsigned long x = inp[0];
91 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
92 size_t inbytesleft, size_t *no_read)
101 cd->my_errno = YAZ_ICONV_EINVAL;
104 if (inp[1] != 0xbb || inp[2] != 0xbf)
106 cd->my_errno = YAZ_ICONV_EILSEQ;
113 static unsigned long yaz_read_UTF8 (yaz_iconv_t cd, unsigned char *inp,
114 size_t inbytesleft, size_t *no_read)
123 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
126 cd->my_errno = YAZ_ICONV_EILSEQ;
128 else if (inp[0] <= 0xdf && inbytesleft >= 2)
130 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
136 cd->my_errno = YAZ_ICONV_EILSEQ;
139 else if (inp[0] <= 0xef && inbytesleft >= 3)
141 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
148 cd->my_errno = YAZ_ICONV_EILSEQ;
151 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
153 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
154 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
160 cd->my_errno = YAZ_ICONV_EILSEQ;
163 else if (inp[0] <= 0xfb && inbytesleft >= 5)
165 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
166 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
173 cd->my_errno = YAZ_ICONV_EILSEQ;
176 else if (inp[0] <= 0xfd && inbytesleft >= 6)
178 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
179 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
180 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
186 cd->my_errno = YAZ_ICONV_EILSEQ;
192 cd->my_errno = YAZ_ICONV_EINVAL;
197 static unsigned long yaz_read_UCS4 (yaz_iconv_t cd, unsigned char *inp,
198 size_t inbytesleft, size_t *no_read)
204 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
209 x = (inp[0]<<24) | (inp[1]<<16) | (inp[2]<<8) | inp[3];
215 static unsigned long yaz_read_UCS4LE (yaz_iconv_t cd, unsigned char *inp,
216 size_t inbytesleft, size_t *no_read)
222 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
227 x = (inp[3]<<24) | (inp[2]<<16) | (inp[1]<<8) | inp[0];
234 static unsigned long yaz_read_wchar_t (yaz_iconv_t cd, unsigned char *inp,
235 size_t inbytesleft, size_t *no_read)
239 if (inbytesleft < sizeof(wchar_t))
241 cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
247 memcpy (&wch, inp, sizeof(wch));
249 *no_read = sizeof(wch);
257 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
258 size_t inbytesleft, size_t *no_read,
261 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
262 size_t inbytesleft, size_t *no_read)
265 if (cd->comb_offset < cd->comb_size)
267 *no_read = cd->comb_no_read[cd->comb_offset];
268 x = cd->comb_x[cd->comb_offset];
274 for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
277 x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
280 cd->comb_x[cd->comb_size] = x;
281 cd->comb_no_read[cd->comb_size] = *no_read;
283 inbytesleft = inbytesleft - *no_read;
288 static unsigned long yaz_read_marc8_comb (yaz_iconv_t cd, unsigned char *inp,
289 size_t inbytesleft, size_t *no_read,
293 while(inbytesleft >= 1 && inp[0] == 27)
295 size_t inbytesleft0 = inbytesleft;
298 while(inbytesleft > 0 && strchr("(,$!", *inp))
303 if (inbytesleft <= 0)
306 cd->my_errno = YAZ_ICONV_EINVAL;
309 cd->marc8_esc_mode = *inp++;
311 (*no_read) += inbytesleft0 - inbytesleft;
313 if (inbytesleft <= 0)
318 size_t no_read_sub = 0;
321 switch(cd->marc8_esc_mode)
323 case 'B': /* Basic ASCII */
324 case 'E': /* ANSEL */
325 case 's': /* ASCII */
326 x = yaz_marc8_1_conv(inp, inbytesleft, &no_read_sub, comb);
328 case 'g': /* Greek */
329 x = yaz_marc8_2_conv(inp, inbytesleft, &no_read_sub, comb);
331 case 'b': /* Subscripts */
332 x = yaz_marc8_3_conv(inp, inbytesleft, &no_read_sub, comb);
334 case 'p': /* Superscripts */
335 x = yaz_marc8_4_conv(inp, inbytesleft, &no_read_sub, comb);
337 case '2': /* Basic Hebrew */
338 x = yaz_marc8_5_conv(inp, inbytesleft, &no_read_sub, comb);
340 case 'N': /* Basic Cyrillic */
341 case 'Q': /* Extended Cyrillic */
342 x = yaz_marc8_6_conv(inp, inbytesleft, &no_read_sub, comb);
344 case '3': /* Basic Arabic */
345 case '4': /* Extended Arabic */
346 x = yaz_marc8_7_conv(inp, inbytesleft, &no_read_sub, comb);
348 case 'S': /* Greek */
349 x = yaz_marc8_8_conv(inp, inbytesleft, &no_read_sub, comb);
351 case '1': /* Chinese, Japanese, Korean (EACC) */
352 x = yaz_marc8_9_conv(inp, inbytesleft, &no_read_sub, comb);
356 cd->my_errno = YAZ_ICONV_EILSEQ;
359 *no_read += no_read_sub;
364 static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
365 size_t inbytesleft, size_t *no_read)
367 if (cd->marc8_comb_x)
369 unsigned long x = cd->marc8_comb_x;
370 *no_read = cd->marc8_comb_no_read;
371 cd->marc8_comb_x = 0;
375 while(inbytesleft >= 1 && inp[0] == 27)
377 size_t inbytesleft0 = inbytesleft;
380 while(inbytesleft > 0 && strchr("(,$!", *inp))
385 if (inbytesleft <= 0)
388 cd->my_errno = YAZ_ICONV_EINVAL;
391 cd->marc8_esc_mode = *inp++;
393 (*no_read) += inbytesleft0 - inbytesleft;
395 if (inbytesleft <= 0)
401 size_t no_read_sub = 0;
403 switch(cd->marc8_esc_mode)
405 case 'B': /* Basic ASCII */
406 case 'E': /* ANSEL */
407 case 's': /* ASCII */
408 x = yaz_marc8_1_conv(inp, inbytesleft, &no_read_sub, &comb);
410 case 'g': /* Greek */
411 x = yaz_marc8_2_conv(inp, inbytesleft, &no_read_sub, &comb);
413 case 'b': /* Subscripts */
414 x = yaz_marc8_3_conv(inp, inbytesleft, &no_read_sub, &comb);
416 case 'p': /* Superscripts */
417 x = yaz_marc8_4_conv(inp, inbytesleft, &no_read_sub, &comb);
419 case '2': /* Basic Hebrew */
420 x = yaz_marc8_5_conv(inp, inbytesleft, &no_read_sub, &comb);
422 case 'N': /* Basic Cyrillic */
423 case 'Q': /* Extended Cyrillic */
424 x = yaz_marc8_6_conv(inp, inbytesleft, &no_read_sub, &comb);
426 case '3': /* Basic Arabic */
427 case '4': /* Extended Arabic */
428 x = yaz_marc8_7_conv(inp, inbytesleft, &no_read_sub, &comb);
430 case 'S': /* Greek */
431 x = yaz_marc8_8_conv(inp, inbytesleft, &no_read_sub, &comb);
433 case '1': /* Chinese, Japanese, Korean (EACC) */
434 x = yaz_marc8_9_conv(inp, inbytesleft, &no_read_sub, &comb);
438 cd->my_errno = YAZ_ICONV_EILSEQ;
442 printf ("esc mode=%c x=%04lX comb=%d\n", cd->marc8_esc_mode, x, comb);
444 *no_read += no_read_sub;
446 if (comb && cd->marc8_comb_x == 0)
449 unsigned long next_x;
451 /* read next char .. */
452 next_x = yaz_read_marc8(cd, inp + *no_read,
453 inbytesleft - *no_read, &tmp_read);
454 /* save this x for later .. */
455 cd->marc8_comb_x = x;
456 /* save next read for later .. */
457 cd->marc8_comb_no_read = tmp_read;
458 /* return next x - thereby swap */
466 static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
467 char **outbuf, size_t *outbytesleft,
470 unsigned char *outp = (unsigned char *) *outbuf;
471 if (x <= 0x7f && *outbytesleft >= 1)
473 *outp++ = (unsigned char) x;
476 else if (x <= 0x7ff && *outbytesleft >= 2)
478 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
479 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
480 (*outbytesleft) -= 2;
482 else if (x <= 0xffff && *outbytesleft >= 3)
484 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
485 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
486 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
487 (*outbytesleft) -= 3;
489 else if (x <= 0x1fffff && *outbytesleft >= 4)
491 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
492 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
493 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
494 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
495 (*outbytesleft) -= 4;
497 else if (x <= 0x3ffffff && *outbytesleft >= 5)
499 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
500 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
501 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
502 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
503 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
504 (*outbytesleft) -= 5;
506 else if (*outbytesleft >= 6)
508 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
509 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
510 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
511 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
512 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
513 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
514 (*outbytesleft) -= 6;
518 cd->my_errno = YAZ_ICONV_E2BIG; /* not room for output */
521 *outbuf = (char *) outp;
526 static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
527 char **outbuf, size_t *outbytesleft,
530 /* list of two char unicode sequence that, when combined, are
531 equivalent to single unicode chars that can be represented in
533 Regular iconv on Linux at least does not seem to convert these,
534 but since MARC-8 to UTF-8 generates these composed sequence
535 we get a better chance of a successful MARC-8 -> ISO-8859-1
538 unsigned long x1, x2;
541 { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
542 { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
543 { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
544 { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
545 { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
546 { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
547 /* no need for 0xc6 LATIN CAPITAL LETTER AE */
548 { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
549 { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
550 { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
551 { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
552 { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
553 { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
554 { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
555 { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
556 { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
557 { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
558 { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
559 { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
560 { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
561 { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
562 { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
563 /* omitted: 0xd7 MULTIPLICATION SIGN */
564 /* omitted: 0xd8 LATIN CAPITAL LETTER O WITH STROKE */
565 { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
566 { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
567 { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
568 { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
569 { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
570 /* omitted: 0xde LATIN CAPITAL LETTER THORN */
571 /* omitted: 0xdf LATIN SMALL LETTER SHARP S */
572 { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
573 { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
574 { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
575 { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
576 { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
577 { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
578 /* omitted: 0xe6 LATIN SMALL LETTER AE */
579 { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
580 { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
581 { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
582 { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
583 { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
584 { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
585 { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
586 { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
587 { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
588 /* omitted: 0xf0 LATIN SMALL LETTER ETH */
589 { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
590 { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
591 { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
592 { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
593 { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
594 { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
595 /* omitted: 0xf7 DIVISION SIGN */
596 /* omitted: 0xf8 LATIN SMALL LETTER O WITH STROKE */
597 { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
598 { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
599 { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
600 { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
601 { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
602 /* omitted: 0xfe LATIN SMALL LETTER THORN */
603 { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
607 unsigned char *outp = (unsigned char *) *outbuf;
609 if (!last && x > 32 && x < 127 && cd->compose_char == 0)
611 cd->compose_char = x;
614 else if (cd->compose_char)
617 for (i = 0; comb[i].x1; i++)
618 if (cd->compose_char == comb[i].x1 && x == comb[i].x2)
625 if (*outbytesleft >= 1)
627 *outp++ = (unsigned char) cd->compose_char;
629 *outbuf = (char *) outp;
630 if (!last && x > 32 && x < 127)
632 cd->compose_char = x;
638 cd->my_errno = YAZ_ICONV_E2BIG;
642 /* compose_char and old x combined to one new char: x */
643 cd->compose_char = 0;
645 if (x > 255 || x < 1)
647 cd->my_errno = YAZ_ICONV_EILSEQ;
650 else if (*outbytesleft >= 1)
652 *outp++ = (unsigned char) x;
657 cd->my_errno = YAZ_ICONV_E2BIG;
660 *outbuf = (char *) outp;
665 static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
666 char **outbuf, size_t *outbytesleft,
669 unsigned char *outp = (unsigned char *) *outbuf;
670 if (*outbytesleft >= 4)
672 *outp++ = (unsigned char) (x>>24);
673 *outp++ = (unsigned char) (x>>16);
674 *outp++ = (unsigned char) (x>>8);
675 *outp++ = (unsigned char) x;
676 (*outbytesleft) -= 4;
680 cd->my_errno = YAZ_ICONV_E2BIG;
683 *outbuf = (char *) outp;
687 static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
688 char **outbuf, size_t *outbytesleft,
691 unsigned char *outp = (unsigned char *) *outbuf;
692 if (*outbytesleft >= 4)
694 *outp++ = (unsigned char) x;
695 *outp++ = (unsigned char) (x>>8);
696 *outp++ = (unsigned char) (x>>16);
697 *outp++ = (unsigned char) (x>>24);
698 (*outbytesleft) -= 4;
702 cd->my_errno = YAZ_ICONV_E2BIG;
705 *outbuf = (char *) outp;
710 static size_t yaz_write_wchar_t (yaz_iconv_t cd, unsigned long x,
711 char **outbuf, size_t *outbytesleft,
714 unsigned char *outp = (unsigned char *) *outbuf;
716 if (*outbytesleft >= sizeof(wchar_t))
719 memcpy(outp, &wch, sizeof(wch));
721 (*outbytesleft) -= sizeof(wch);
725 cd->my_errno = YAZ_ICONV_E2BIG;
728 *outbuf = (char *) outp;
733 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
735 return cd->read_handle && cd->write_handle;
738 yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
740 yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
742 cd->write_handle = 0;
745 cd->my_errno = YAZ_ICONV_UNKNOWN;
746 cd->marc8_esc_mode = 'B';
748 cd->comb_offset = cd->comb_size = 0;
750 cd->marc8_comb_x = 0;
752 cd->compose_char = 0;
754 /* a useful hack: if fromcode has leading @,
755 the library not use YAZ's own conversions .. */
756 if (fromcode[0] == '@')
760 if (!yaz_matchstr(fromcode, "UTF8"))
762 cd->read_handle = yaz_read_UTF8;
763 cd->init_handle = yaz_init_UTF8;
765 else if (!yaz_matchstr(fromcode, "ISO88591"))
766 cd->read_handle = yaz_read_ISO8859_1;
767 else if (!yaz_matchstr(fromcode, "UCS4"))
768 cd->read_handle = yaz_read_UCS4;
769 else if (!yaz_matchstr(fromcode, "UCS4LE"))
770 cd->read_handle = yaz_read_UCS4LE;
771 else if (!yaz_matchstr(fromcode, "MARC8"))
772 cd->read_handle = yaz_read_marc8;
774 else if (!yaz_matchstr(fromcode, "WCHAR_T"))
775 cd->read_handle = yaz_read_wchar_t;
778 if (!yaz_matchstr(tocode, "UTF8"))
779 cd->write_handle = yaz_write_UTF8;
780 else if (!yaz_matchstr(tocode, "ISO88591"))
781 cd->write_handle = yaz_write_ISO8859_1;
782 else if (!yaz_matchstr (tocode, "UCS4"))
783 cd->write_handle = yaz_write_UCS4;
784 else if (!yaz_matchstr(tocode, "UCS4LE"))
785 cd->write_handle = yaz_write_UCS4LE;
787 else if (!yaz_matchstr(tocode, "WCHAR_T"))
788 cd->write_handle = yaz_write_wchar_t;
793 if (!cd->read_handle || !cd->write_handle)
795 cd->iconv_cd = iconv_open (tocode, fromcode);
796 if (cd->iconv_cd == (iconv_t) (-1))
803 if (!cd->read_handle || !cd->write_handle)
813 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
814 char **outbuf, size_t *outbytesleft)
822 iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
823 if (r == (size_t)(-1))
828 cd->my_errno = YAZ_ICONV_E2BIG;
831 cd->my_errno = YAZ_ICONV_EINVAL;
834 cd->my_errno = YAZ_ICONV_EILSEQ;
837 cd->my_errno = YAZ_ICONV_UNKNOWN;
843 if (inbuf == 0 || *inbuf == 0)
846 cd->my_errno = YAZ_ICONV_UNKNOWN;
856 size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
857 *inbytesleft, &no_read);
860 if (cd->my_errno == YAZ_ICONV_EINVAL)
865 *inbytesleft -= no_read;
877 if (*inbytesleft == 0)
884 x = (cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
895 no_read = cd->no_read_x;
899 r = (cd->write_handle)(cd, x, outbuf, outbytesleft,
900 (*inbytesleft - no_read) == 0 ? 1 : 0);
903 /* unable to write it. save it because read_handle cannot
906 cd->no_read_x = no_read;
911 *inbytesleft -= no_read;
917 int yaz_iconv_error (yaz_iconv_t cd)
922 int yaz_iconv_close (yaz_iconv_t cd)
926 iconv_close (cd->iconv_cd);
936 * indent-tabs-mode: nil
938 * vim: shiftwidth=4 tabstop=8 expandtab