-/*
- * Copyright (C) 1995-2007, Index Data ApS
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2008 Index Data
* See the file LICENSE for details.
- *
- * $Id: icu_I18N.c,v 1.17 2007-11-08 17:20:32 adam Exp $
+ */
+
+/**
+ * \file icu_I18N.c
+ * \brief ICU utilities
*/
#if HAVE_CONFIG_H
#include <yaz/timing.h>
#endif
+#if YAZ_HAVE_ICU
#include <yaz/xmalloc.h>
-#if YAZ_HAVE_ICU
#include <yaz/icu_I18N.h>
#include <yaz/log.h>
int icu_check_status (UErrorCode status)
{
- if(U_FAILURE(status)){
- yaz_log(YLOG_WARN,
- "ICU: %d %s\n", status, u_errorName(status));
+ if (U_FAILURE(status))
+ {
+ yaz_log(YLOG_WARN, "ICU: %d %s\n", status, u_errorName(status));
return 0;
}
return 1;
else
buf8->utf8
= (uint8_t *) xrealloc(buf8->utf8, sizeof(uint8_t) * capacity);
-
- icu_buf_utf8_clear(buf8);
+
buf8->utf8_cap = capacity;
}
else {
}
-struct icu_buf_utf8 * icu_buf_utf8_copy(struct icu_buf_utf8 * dest8,
- struct icu_buf_utf8 * src8)
-{
- if(!dest8 || !src8
- || dest8 == src8)
- return 0;
-
-
- if (dest8->utf8_cap < src8->utf8_len)
- icu_buf_utf8_resize(dest8, src8->utf8_len * 2);
-
- strncpy((char*) dest8->utf8, (char*) src8->utf8, src8->utf8_len);
-
- return dest8;
-}
-
-
const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8)
{
if (!src8 || src8->utf8_len == 0)
int32_t dest16_len = 0;
- if (!src16->utf16_len){ //guarding for empty source string
+ if (!src16->utf16_len){ /* guarding for empty source string */
if (dest16->utf16)
dest16->utf16[0] = (UChar) 0;
dest16->utf16_len = 0;
-UErrorCode icu_sortkey8_from_utf16(UCollator *coll,
- struct icu_buf_utf8 * dest8,
- struct icu_buf_utf16 * src16,
- UErrorCode * status)
+void icu_sortkey8_from_utf16(UCollator *coll,
+ struct icu_buf_utf8 * dest8,
+ struct icu_buf_utf16 * src16,
+ UErrorCode * status)
{
int32_t sortkey_len = 0;
dest8->utf8_len = sortkey_len;
else
icu_buf_utf8_clear(dest8);
-
- return sortkey_len;
}
normalizer->rules16->utf16_len,
UTRANS_FORWARD,
0, 0,
- normalizer->parse_error, status);
+ &normalizer->parse_error, status);
break;
case 'r':
case 'R':
normalizer->rules16->utf16_len,
UTRANS_REVERSE ,
0, 0,
- normalizer->parse_error, status);
+ &normalizer->parse_error, status);
break;
default:
*status = U_UNSUPPORTED_ERROR;
|| !dest16)
return 0;
- if (!src16->utf16_len){ //guarding for empty source string
+ if (!src16->utf16_len){ /* guarding for empty source string */
icu_buf_utf16_clear(dest16);
return 0;
}
-struct icu_chain * icu_chain_create(const char *locale,
- int sort,
+struct icu_chain * icu_chain_create(const char *locale, int sort,
UErrorCode * status)
{
-
struct icu_chain * chain
= (struct icu_chain *) xmalloc(sizeof(struct icu_chain));
*status = U_ZERO_ERROR;
- strncpy((char *) chain->locale, (const char *) locale, 16);
- chain->locale[16 - 1] = '\0';
+ chain->locale = xstrdup(locale);
chain->sort = sort;
if (U_FAILURE(*status))
return 0;
-
chain->token_count = 0;
chain->src8cstr = 0;
void icu_chain_destroy(struct icu_chain * chain)
{
- if (chain){
-
+ if (chain)
+ {
if (chain->coll)
ucol_close(chain->coll);
icu_buf_utf16_destroy(chain->src16);
icu_chain_step_destroy(chain->steps);
+ xfree(chain->locale);
xfree(chain);
}
}
xml_rule = xmlGetProp(node, (xmlChar *) "rule");
- if (!strcmp((const char *) node->name,
- (const char *) "casemap")){
+ if (!strcmp((const char *) node->name, "casemap"))
step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap,
(const uint8_t *) xml_rule, status);
- }
- else if (!strcmp((const char *) node->name,
- (const char *) "normalize")){
+ else if (!strcmp((const char *) node->name, "transform"))
step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize,
(const uint8_t *) xml_rule, status);
- }
- else if (!strcmp((const char *) node->name,
- (const char *) "tokenize")){
+ else if (!strcmp((const char *) node->name, "tokenize"))
step = icu_chain_insert_step(chain, ICU_chain_step_type_tokenize,
(const uint8_t *) xml_rule, status);
- }
- else if (!strcmp((const char *) node->name,
- (const char *) "display")){
+ else if (!strcmp((const char *) node->name, "display"))
step = icu_chain_insert_step(chain, ICU_chain_step_type_display,
(const uint8_t *) "", status);
- }
xmlFree(xml_rule);
- if (!step || U_FAILURE(*status)){
+ if (!step || U_FAILURE(*status))
+ {
icu_chain_destroy(chain);
return 0;
}
}
-
return chain;
}
/* create utf16 destination buffers as needed, or */
- switch(type) {
+ switch(type)
+ {
case ICU_chain_step_type_display:
buf16 = src16;
break;
/* assign utf16 src buffers as neeed, advance in previous steps
tokens until non-zero token met, and setting stop condition */
- if (step->previous){
+ if (step->previous)
+ {
src16 = step->previous->buf16;
/* tokens might be killed in previous steps, therefore looping */
got_new_token
= icu_chain_step_next_token(chain, step->previous, status);
}
- else { /* first step can only work once on chain->src16 input buffer */
+ else
+ { /* first step can only work once on chain->src16 input buffer */
src16 = chain->src16;
step->more_tokens = 0;
got_new_token = 1;
return 0;
/* stop if nothing to process */
- if (step->need_new_token && !got_new_token){
+ if (step->need_new_token && !got_new_token)
+ {
step->more_tokens = 0;
return 0;
}
perform the work, eventually put this steps output in
step->buf16 or the chains UTF8 output buffers */
- switch(step->type) {
+ switch(step->type)
+ {
case ICU_chain_step_type_display:
icu_utf16_to_utf8(chain->display8, src16, status);
break;
break;
case ICU_chain_step_type_tokenize:
/* attach to new src16 token only first time during splitting */
- if (step->need_new_token){
+ if (step->need_new_token)
+ {
icu_tokenizer_attach(step->u.tokenizer, src16, status);
step->need_new_token = 0;
}
-
/* splitting one src16 token into multiple buf16 tokens */
step->more_tokens
= icu_tokenizer_next_token(step->u.tokenizer,
/* make sure to get new previous token if this one had been used up
by recursive call to _same_ step */
- if (!step->more_tokens){
+ if (!step->more_tokens)
+ {
step->more_tokens = icu_chain_step_next_token(chain, step, status);
- return step->more_tokens; // avoid one token count too much!
+ return step->more_tokens; /* avoid one token count too much! */
}
-
break;
default:
return 0;
chain->token_count = 0;
/* clear all steps stop states */
- while (stp){
+ while (stp)
+ {
stp->more_tokens = 1;
stp->need_new_token = 1;
stp = stp->previous;
return 0;
/* special case with no steps - same as index type binary */
- if (!chain->steps){
+ if (!chain->steps)
+ {
if (chain->token_count)
return 0;
- else {
+ else
+ {
chain->token_count++;
if (chain->sort)
}
}
/* usual case, one or more icu chain steps existing */
- else {
-
+ else
+ {
while(!got_token && chain->steps && chain->steps->more_tokens)
got_token = icu_chain_step_next_token(chain, chain->steps, status);
- if (got_token){
+ if (got_token)
+ {
chain->token_count++;
icu_utf16_to_utf8(chain->norm8, chain->steps->buf16, status);
icu_sortkey8_from_utf16(chain->coll,
chain->sort8, chain->steps->buf16,
status);
-
+
return chain->token_count;
}
}
return chain->coll;
}
-
#endif /* YAZ_HAVE_ICU */
-
-
-
/*
* Local variables:
* c-basic-offset: 4