Implement real ISO546 character set, first version.
[yaz-moved-to-github.git] / src / siconv.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2008 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file
7  * \brief Implements simple ICONV
8  *
9  * This implements an interface similar to that of iconv and
10  * is used by YAZ to interface with iconv (if present).
11  * For systems where iconv is not present, this layer
12  * provides a few important conversions: UTF-8, MARC-8, Latin-1.
13  *
14  */
15
16 #if HAVE_CONFIG_H
17 #include <config.h>
18 #endif
19
20 #include <assert.h>
21 #include <errno.h>
22 #include <string.h>
23 #include <ctype.h>
24
25 #if HAVE_ICONV_H
26 #include <iconv.h>
27 #endif
28
29 #include <yaz/xmalloc.h>
30 #include <yaz/nmem.h>
31 #include "iconv-p.h"
32
33 struct yaz_iconv_struct {
34     int my_errno;
35     int init_flag;
36 #if 0
37     size_t (*init_handle)(yaz_iconv_t cd, unsigned char *inbuf,
38                             size_t inbytesleft, size_t *no_read);
39     unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
40                                  size_t inbytesleft, size_t *no_read);
41 #endif
42     size_t no_read_x;
43     unsigned long unget_x;
44 #if HAVE_ICONV_H
45     iconv_t iconv_cd;
46 #endif
47     struct yaz_iconv_encoder_s encoder;
48     struct yaz_iconv_decoder_s decoder;
49 };
50
51
52 int yaz_iconv_isbuiltin(yaz_iconv_t cd)
53 {
54     return cd->decoder.read_handle && cd->encoder.write_handle;
55 }
56
57
58 static int prepare_encoders(yaz_iconv_t cd, const char *tocode)
59 {
60     if (yaz_marc8_encoder(tocode, &cd->encoder))
61         return 1;
62     if (yaz_utf8_encoder(tocode, &cd->encoder))
63         return 1;
64     if (yaz_ucs4_encoder(tocode, &cd->encoder))
65         return 1;
66     if (yaz_iso_8859_1_encoder(tocode, &cd->encoder))
67         return 1;
68     if (yaz_iso_5428_encoder(tocode, &cd->encoder))
69         return 1;
70     if (yaz_advancegreek_encoder(tocode, &cd->encoder))
71         return 1;
72     if (yaz_wchar_encoder(tocode, &cd->encoder))
73         return 1;
74     return 0;
75 }
76
77 static int prepare_decoders(yaz_iconv_t cd, const char *tocode)
78 {
79     if (yaz_marc8_decoder(tocode, &cd->decoder))
80         return 1;
81     if (yaz_iso5426_decoder(tocode, &cd->decoder))
82         return 1;
83     if (yaz_utf8_decoder(tocode, &cd->decoder))
84         return 1;
85     if (yaz_ucs4_decoder(tocode, &cd->decoder))
86         return 1;
87     if (yaz_iso_8859_1_decoder(tocode, &cd->decoder))
88         return 1;
89     if (yaz_iso_5428_decoder(tocode, &cd->decoder))
90         return 1;
91     if (yaz_advancegreek_decoder(tocode, &cd->decoder))
92         return 1;
93     if (yaz_wchar_decoder(tocode, &cd->decoder))
94         return 1;
95     return 0;
96 }
97
98 yaz_iconv_t yaz_iconv_open(const char *tocode, const char *fromcode)
99 {
100     yaz_iconv_t cd = (yaz_iconv_t) xmalloc (sizeof(*cd));
101
102     cd->encoder.data = 0;
103     cd->encoder.write_handle = 0;
104     cd->encoder.flush_handle = 0;
105     cd->encoder.init_handle = 0;
106     cd->encoder.destroy_handle = 0;
107
108     cd->decoder.data = 0;
109     cd->decoder.read_handle = 0;
110     cd->decoder.init_handle = 0;
111     cd->decoder.destroy_handle = 0;
112
113     cd->my_errno = YAZ_ICONV_UNKNOWN;
114
115     /* a useful hack: if fromcode has leading @,
116        the library not use YAZ's own conversions .. */
117     if (fromcode[0] == '@')
118         fromcode++;
119     else
120     {
121         prepare_encoders(cd, tocode);
122         prepare_decoders(cd, fromcode);
123     }
124     if (cd->decoder.read_handle && cd->encoder.write_handle)
125     {
126 #if HAVE_ICONV_H
127         cd->iconv_cd = (iconv_t) (-1);
128 #endif
129         ;
130     }
131     else
132     {
133 #if HAVE_ICONV_H
134         cd->iconv_cd = iconv_open(tocode, fromcode);
135         if (cd->iconv_cd == (iconv_t) (-1))
136         {
137             yaz_iconv_close(cd);
138             return 0;
139         }
140 #else
141         yaz_iconv_close(cd);
142         return 0;
143 #endif
144     }
145     cd->init_flag = 1;
146     return cd;
147 }
148
149 size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
150                  char **outbuf, size_t *outbytesleft)
151 {
152     char *inbuf0 = 0;
153     size_t r = 0;
154
155 #if HAVE_ICONV_H
156     if (cd->iconv_cd != (iconv_t) (-1))
157     {
158         size_t r =
159             iconv(cd->iconv_cd, inbuf, inbytesleft, outbuf, outbytesleft);
160         if (r == (size_t)(-1))
161         {
162             switch (yaz_errno())
163             {
164             case E2BIG:
165                 cd->my_errno = YAZ_ICONV_E2BIG;
166                 break;
167             case EINVAL:
168                 cd->my_errno = YAZ_ICONV_EINVAL;
169                 break;
170             case EILSEQ:
171                 cd->my_errno = YAZ_ICONV_EILSEQ;
172                 break;
173             default:
174                 cd->my_errno = YAZ_ICONV_UNKNOWN;
175             }
176         }
177         return r;
178     }
179 #endif
180
181     if (inbuf)
182         inbuf0 = *inbuf;
183
184     if (cd->init_flag)
185     {
186         cd->my_errno = YAZ_ICONV_UNKNOWN;
187         
188         if (cd->encoder.init_handle)
189             (*cd->encoder.init_handle)(&cd->encoder);
190         
191         cd->unget_x = 0;
192         cd->no_read_x = 0;
193
194         if (cd->decoder.init_handle)
195         {
196             size_t no_read = 0;
197             size_t r = (cd->decoder.init_handle)(
198                 cd, &cd->decoder,
199                 inbuf ? (unsigned char *) *inbuf : 0,
200                 inbytesleft ? *inbytesleft : 0, 
201                 &no_read);
202             if (r)
203             {
204                 if (cd->my_errno == YAZ_ICONV_EINVAL)
205                     return r;
206                 cd->init_flag = 0;
207                 return r;
208             }
209             if (inbytesleft)
210                 *inbytesleft -= no_read;
211             if (inbuf)
212                 *inbuf += no_read;
213         }
214     }
215     cd->init_flag = 0;
216
217     if (!inbuf || !*inbuf)
218     {
219         if (outbuf && *outbuf)
220         {
221             if (cd->unget_x)
222                 r = (*cd->encoder.write_handle)(cd, &cd->encoder,
223                                                 cd->unget_x, outbuf, outbytesleft);
224             if (cd->encoder.flush_handle)
225                 r = (*cd->encoder.flush_handle)(cd, &cd->encoder,
226                                                 outbuf, outbytesleft);
227         }
228         if (r == 0)
229             cd->init_flag = 1;
230         cd->unget_x = 0;
231         return r;
232     }
233     while (1)
234     {
235         unsigned long x;
236         size_t no_read;
237
238         if (cd->unget_x)
239         {
240             x = cd->unget_x;
241             no_read = cd->no_read_x;
242         }
243         else
244         {
245             if (*inbytesleft == 0)
246             {
247                 r = *inbuf - inbuf0;
248                 break;
249             }
250             x = (*cd->decoder.read_handle)(
251                 cd, &cd->decoder, 
252                 (unsigned char *) *inbuf, *inbytesleft, &no_read);
253             if (no_read == 0)
254             {
255                 r = (size_t)(-1);
256                 break;
257             }
258         }
259         if (x)
260         {
261             r = (*cd->encoder.write_handle)(cd, &cd->encoder,
262                                             x, outbuf, outbytesleft);
263             if (r)
264             {
265                 /* unable to write it. save it because read_handle cannot
266                    rewind .. */
267                 if (cd->my_errno == YAZ_ICONV_E2BIG)
268                 {
269                     cd->unget_x = x;
270                     cd->no_read_x = no_read;
271                     break;
272                 }
273             }
274             cd->unget_x = 0;
275         }
276         *inbytesleft -= no_read;
277         (*inbuf) += no_read;
278     }
279     return r;
280 }
281
282 int yaz_iconv_error(yaz_iconv_t cd)
283 {
284     return cd->my_errno;
285 }
286
287 int yaz_iconv_close(yaz_iconv_t cd)
288 {
289 #if HAVE_ICONV_H
290     if (cd->iconv_cd != (iconv_t) (-1))
291         iconv_close(cd->iconv_cd);
292 #endif
293     if (cd->encoder.destroy_handle)
294         (*cd->encoder.destroy_handle)(&cd->encoder);
295     if (cd->decoder.destroy_handle)
296         (*cd->decoder.destroy_handle)(&cd->decoder);
297     xfree(cd);
298     return 0;
299 }
300
301 void yaz_iconv_set_errno(yaz_iconv_t cd, int no)
302 {
303     cd->my_errno = no;
304 }
305
306 /*
307  * Local variables:
308  * c-basic-offset: 4
309  * indent-tabs-mode: nil
310  * End:
311  * vim: shiftwidth=4 tabstop=8 expandtab
312  */