2 * Copyright (C) 1995-2008, Index Data ApS
3 * See the file LICENSE for details.
5 * $Id: siconv.c,v 1.50 2008-03-12 08:53:28 adam Exp $
9 * \brief ISO-5428 character mapping (iconv)
23 size_t yaz_init_UTF8(yaz_iconv_t cd, unsigned char *inp,
24 size_t inbytesleft, size_t *no_read)
33 yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
36 if (inp[1] != 0xbb && inp[2] == 0xbf)
43 unsigned long yaz_read_UTF8_char(unsigned char *inp,
44 size_t inbytesleft, size_t *no_read,
49 *no_read = 0; /* by default */
55 else if (inp[0] <= 0xbf || inp[0] >= 0xfe)
57 *error = YAZ_ICONV_EILSEQ;
59 else if (inp[0] <= 0xdf && inbytesleft >= 2)
61 if ((inp[1] & 0xc0) == 0x80)
63 x = ((inp[0] & 0x1f) << 6) | (inp[1] & 0x3f);
67 *error = YAZ_ICONV_EILSEQ;
70 *error = YAZ_ICONV_EILSEQ;
72 else if (inp[0] <= 0xef && inbytesleft >= 3)
74 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80)
76 x = ((inp[0] & 0x0f) << 12) | ((inp[1] & 0x3f) << 6) |
81 *error = YAZ_ICONV_EILSEQ;
84 *error = YAZ_ICONV_EILSEQ;
86 else if (inp[0] <= 0xf7 && inbytesleft >= 4)
88 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
89 && (inp[3] & 0xc0) == 0x80)
91 x = ((inp[0] & 0x07) << 18) | ((inp[1] & 0x3f) << 12) |
92 ((inp[2] & 0x3f) << 6) | (inp[3] & 0x3f);
96 *error = YAZ_ICONV_EILSEQ;
99 *error = YAZ_ICONV_EILSEQ;
101 else if (inp[0] <= 0xfb && inbytesleft >= 5)
103 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
104 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80)
106 x = ((inp[0] & 0x03) << 24) | ((inp[1] & 0x3f) << 18) |
107 ((inp[2] & 0x3f) << 12) | ((inp[3] & 0x3f) << 6) |
112 *error = YAZ_ICONV_EILSEQ;
115 *error = YAZ_ICONV_EILSEQ;
117 else if (inp[0] <= 0xfd && inbytesleft >= 6)
119 if ((inp[1] & 0xc0) == 0x80 && (inp[2] & 0xc0) == 0x80
120 && (inp[3] & 0xc0) == 0x80 && (inp[4] & 0xc0) == 0x80
121 && (inp[5] & 0xc0) == 0x80)
123 x = ((inp[0] & 0x01) << 30) | ((inp[1] & 0x3f) << 24) |
124 ((inp[2] & 0x3f) << 18) | ((inp[3] & 0x3f) << 12) |
125 ((inp[4] & 0x3f) << 6) | (inp[5] & 0x3f);
129 *error = YAZ_ICONV_EILSEQ;
132 *error = YAZ_ICONV_EILSEQ;
135 *error = YAZ_ICONV_EINVAL; /* incomplete sentence */
140 unsigned long yaz_read_UTF8(yaz_iconv_t cd, unsigned char *inp,
141 size_t inbytesleft, size_t *no_read)
144 int r = yaz_read_UTF8_char(inp, inbytesleft, no_read, &err);
145 yaz_iconv_set_errno(cd, err);
150 size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
151 char **outbuf, size_t *outbytesleft)
154 int r = yaz_write_UTF8_char(x, outbuf, outbytesleft, &err);
155 yaz_iconv_set_errno(cd, err);
159 size_t yaz_write_UTF8_char(unsigned long x,
160 char **outbuf, size_t *outbytesleft,
163 unsigned char *outp = (unsigned char *) *outbuf;
165 if (x <= 0x7f && *outbytesleft >= 1)
167 *outp++ = (unsigned char) x;
170 else if (x <= 0x7ff && *outbytesleft >= 2)
172 *outp++ = (unsigned char) ((x >> 6) | 0xc0);
173 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
174 (*outbytesleft) -= 2;
176 else if (x <= 0xffff && *outbytesleft >= 3)
178 *outp++ = (unsigned char) ((x >> 12) | 0xe0);
179 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
180 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
181 (*outbytesleft) -= 3;
183 else if (x <= 0x1fffff && *outbytesleft >= 4)
185 *outp++ = (unsigned char) ((x >> 18) | 0xf0);
186 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
187 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
188 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
189 (*outbytesleft) -= 4;
191 else if (x <= 0x3ffffff && *outbytesleft >= 5)
193 *outp++ = (unsigned char) ((x >> 24) | 0xf8);
194 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
195 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
196 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
197 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
198 (*outbytesleft) -= 5;
200 else if (*outbytesleft >= 6)
202 *outp++ = (unsigned char) ((x >> 30) | 0xfc);
203 *outp++ = (unsigned char) (((x >> 24) & 0x3f) | 0x80);
204 *outp++ = (unsigned char) (((x >> 18) & 0x3f) | 0x80);
205 *outp++ = (unsigned char) (((x >> 12) & 0x3f) | 0x80);
206 *outp++ = (unsigned char) (((x >> 6) & 0x3f) | 0x80);
207 *outp++ = (unsigned char) ((x & 0x3f) | 0x80);
208 (*outbytesleft) -= 6;
212 *error = YAZ_ICONV_E2BIG; /* not room for output */
215 *outbuf = (char *) outp;
223 * indent-tabs-mode: nil
225 * vim: shiftwidth=4 tabstop=8 expandtab