1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2009 Index Data.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Index Data nor the names of its contributors
13 * may be used to endorse or promote products derived from this
14 * software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 \brief Internal header for ICU utilities
31 These functions, while non-static, are considered unstable and internal
32 and may be renamed for each YAZ release.
38 #include <yaz/yconfig.h>
40 #include <unicode/utypes.h> /* Basic ICU data types */
41 #include <unicode/uchar.h> /* char names */
43 #include <unicode/ucol.h>
44 #include <unicode/ubrk.h>
45 #include <unicode/utrans.h>
49 /* declared structs and functions */
51 int icu_check_status (UErrorCode status);
60 struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity);
62 struct icu_buf_utf16 * icu_buf_utf16_clear(struct icu_buf_utf16 * buf16);
64 struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
67 struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
68 struct icu_buf_utf16 * src16);
70 void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16);
81 struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity);
83 struct icu_buf_utf8 * icu_buf_utf8_clear(struct icu_buf_utf8 * buf8);
85 struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
88 void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8);
91 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
92 const char * src8cstr,
96 UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
97 struct icu_buf_utf16 * src16,
105 struct icu_casemap * icu_casemap_create(char action, UErrorCode *status);
107 void icu_casemap_destroy(struct icu_casemap * casemap);
109 int icu_casemap_casemap(struct icu_casemap * casemap,
110 struct icu_buf_utf16 * dest16,
111 struct icu_buf_utf16 * src16,
115 int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
116 struct icu_buf_utf16 * src16,
117 const char *locale, char action,
120 void icu_sortkey8_from_utf16(UCollator *coll,
121 struct icu_buf_utf8 * dest8,
122 struct icu_buf_utf16 * src16,
123 UErrorCode * status);
129 struct icu_buf_utf16 * buf16;
135 keep always invariant
140 0 <= token_id <= token_count
144 struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
147 void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer);
149 int icu_tokenizer_attach(struct icu_tokenizer * tokenizer,
150 struct icu_buf_utf16 * src16, UErrorCode *status);
152 int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer,
153 struct icu_buf_utf16 * tkn16,
156 int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer);
157 int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer);
158 int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer);
159 int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer);
160 int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer);
167 UParseError parse_error;
168 UTransliterator * trans;
171 struct icu_transform * icu_transform_create(const char *id, char action,
175 void icu_transform_destroy(struct icu_transform * transform);
177 int icu_transform_trans(struct icu_transform * transform,
178 struct icu_buf_utf16 * dest16,
179 struct icu_buf_utf16 * src16,
182 enum icu_chain_step_type {
183 ICU_chain_step_type_none,
184 ICU_chain_step_type_display, /* convert to utf8 display format */
185 ICU_chain_step_type_casemap, /* apply utf16 charmap */
186 ICU_chain_step_type_transform, /* apply utf16 transform */
187 ICU_chain_step_type_tokenize, /* apply utf16 tokenization */
188 ICU_chain_step_type_transliterate /* apply utf16 tokenization */
193 struct icu_chain_step
195 /* type and action object */
196 enum icu_chain_step_type type;
198 struct icu_casemap * casemap;
199 struct icu_transform * transform;
200 struct icu_tokenizer * tokenizer;
202 /* temprary post-action utf16 buffer */
203 struct icu_buf_utf16 * buf16;
204 struct icu_chain_step * previous;
212 struct icu_chain_step * icu_chain_step_create(yaz_icu_chain_t chain,
213 enum icu_chain_step_type type,
214 const uint8_t * rule,
215 struct icu_buf_utf16 * buf16,
219 void icu_chain_step_destroy(struct icu_chain_step * step);
227 const char * src8cstr;
231 /* number of tokens returned so far */
234 /* utf8 output buffers */
235 struct icu_buf_utf8 * display8;
236 struct icu_buf_utf8 * norm8;
237 struct icu_buf_utf8 * sort8;
239 /* utf16 source buffer */
240 struct icu_buf_utf16 * src16;
242 /* linked list of chain steps */
243 struct icu_chain_step * steps;
246 struct icu_chain_step * icu_chain_insert_step(yaz_icu_chain_t chain,
247 enum icu_chain_step_type type,
248 const uint8_t * rule,
251 int icu_chain_step_next_token(yaz_icu_chain_t chain,
252 struct icu_chain_step * step,
255 int icu_chain_token_number(yaz_icu_chain_t chain);
257 const UCollator * icu_chain_get_coll(yaz_icu_chain_t chain);
259 yaz_icu_chain_t icu_chain_create(const char * locale,
261 UErrorCode * status);
264 #endif /* ICU_I18NL_H */
269 * c-file-style: "Stroustrup"
270 * indent-tabs-mode: nil
272 * vim: shiftwidth=4 tabstop=8 expandtab