X-Git-Url: http://lists.indexdata.com/cgi-bin?a=blobdiff_plain;f=src%2Ficu_I18N.h;h=df6cd2dcb67c7797ac7623454cfd9812e1bbe9af;hb=27cfb6d89ca9b02f63f8334b6b8e666cf7db2ff7;hp=803d89b073c4d95b5c29ec6a0b22e8572a9615ae;hpb=4aa097d555372d370f2485df38ddf93ecd327c59;p=pazpar2-moved-to-github.git diff --git a/src/icu_I18N.h b/src/icu_I18N.h index 803d89b..df6cd2d 100644 --- a/src/icu_I18N.h +++ b/src/icu_I18N.h @@ -1,4 +1,4 @@ -/* $Id: icu_I18N.h,v 1.7 2007-05-07 12:52:04 marc Exp $ +/* $Id: icu_I18N.h,v 1.8 2007-05-09 14:01:21 marc Exp $ Copyright (c) 2006-2007, Index Data. This file is part of Pazpar2. @@ -35,10 +35,19 @@ //#include /* C Converter API */ //#include /* some more string fcns*/ //#include -//#include +#include //#include +// forward declarations +//struct UBreakIterator; + + + + +// declared structs and functions + + int icu_check_status (UErrorCode status); struct icu_buf_utf16 @@ -91,6 +100,44 @@ UErrorCode icu_sortkey8_from_utf16(UCollator *coll, struct icu_buf_utf16 * src16, UErrorCode * status); +struct icu_tokenizer +{ + char locale[16]; + char action; + UBreakIterator* bi; + struct icu_buf_utf16 * buf16; + int32_t token_count; + int32_t token_id; + int32_t token_start; + int32_t token_end; + // keep always invariant + // 0 <= token_start + // <= token_end + // <= buf16->utf16_len + // and invariant + // 0 <= token_id <= token_count +}; + +struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action, + UErrorCode *status); + +void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer); + +int icu_tokenizer_attach(struct icu_tokenizer * tokenizer, + struct icu_buf_utf16 * src16, UErrorCode *status); + +int32_t icu_tokenizer_next_token(struct icu_tokenizer * tokenizer, + struct icu_buf_utf16 * tkn16, + UErrorCode *status); + +int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer); +int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer); +int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer); +int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer); +int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer); + + + #endif // HAVE_ICU #endif // ICU_I18NL_H