X-Git-Url: http://lists.indexdata.com/cgi-bin?a=blobdiff_plain;ds=sidebyside;f=src%2Fcharsets.c;h=2d5dad3d36b202478cbd2e2a06a7815ca9ce6d84;hb=8e1a5dea882557048557f1e75a7b6bad75b21f2f;hp=3f45baa3286bbdef91a7ef9ba39f386608b19a91;hpb=27cfb6d89ca9b02f63f8334b6b8e666cf7db2ff7;p=pazpar2-moved-to-github.git diff --git a/src/charsets.c b/src/charsets.c index 3f45baa..2d5dad3 100644 --- a/src/charsets.c +++ b/src/charsets.c @@ -1,4 +1,4 @@ -/* $Id: charsets.c,v 1.1 2007-05-10 11:46:09 adam Exp $ +/* $Id: charsets.c,v 1.5 2007-05-25 10:32:55 marc Exp $ Copyright (c) 2006-2007, Index Data. This file is part of Pazpar2. @@ -29,19 +29,33 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include +#include #include #include + #include "charsets.h" +//#include "config.h" +//#include "parameters.h" + +#ifdef HAVE_ICU +#include "icu_I18N.h" +#endif // HAVE_ICU /* charset handle */ struct pp2_charset_s { const char *(*token_next_handler)(pp2_relevance_token_t prt); /* other handlers will come as we see fit */ +#ifdef HAVE_ICU + struct icu_chain * icu_chn; + UErrorCode icu_sts; +#endif // HAVE_ICU }; static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt); -/* in the future : */ -// static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt); + +#ifdef HAVE_ICU +static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt); +#endif // HAVE_ICU /* tokenzier handle */ struct pp2_relevance_token_s { @@ -50,11 +64,19 @@ struct pp2_relevance_token_s { WRBUF norm_str; /* normized string we return (temporarily) */ }; -pp2_charset_t pp2_charset_create(void) +pp2_charset_t pp2_charset_create(struct icu_chain * icu_chn) { pp2_charset_t pct = xmalloc(sizeof(*pct)); pct->token_next_handler = pp2_relevance_token_a_to_z; +#ifdef HAVE_ICU + pct->icu_chn = 0; + if (icu_chn){ + pct->icu_chn = icu_chn; + pct->icu_sts = U_ZERO_ERROR; + pct->token_next_handler = pp2_relevance_token_icu; + } + #endif // HAVE_ICU return pct; } @@ -69,16 +91,31 @@ pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct, pp2_relevance_token_t prt = xmalloc(sizeof(*prt)); assert(pct); + prt->norm_str = wrbuf_alloc(); prt->cp = buf; prt->pct = pct; + +#ifdef HAVE_ICU + if (pct->icu_chn) + { + pct->icu_sts = U_ZERO_ERROR; + int ok = 0; + ok = icu_chain_assign_cstr(pct->icu_chn, buf, &pct->icu_sts); + //printf("\nfield ok: %d '%s'\n", ok, buf); + prt->pct = pct; + prt->norm_str = 0; + } +#endif // HAVE_ICU return prt; } + void pp2_relevance_token_destroy(pp2_relevance_token_t prt) { assert(prt); - wrbuf_destroy(prt->norm_str); + if(prt->norm_str) + wrbuf_destroy(prt->norm_str); xfree(prt); } @@ -117,6 +154,27 @@ static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt) } +#ifdef HAVE_ICU +static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt) +{ + //&& U_SUCCESS(pct->icu_sts)) + if (icu_chain_next_token(prt->pct->icu_chn, &prt->pct->icu_sts)){ + //printf("'%s' ", icu_chain_get_norm(prt->pct->icu_chn)); + if (U_FAILURE(prt->pct->icu_sts)) + { + //printf("ICU status failure\n "); + return 0; + } + + return icu_chain_get_norm(prt->pct->icu_chn); + } + //printf ("EOF\n"); + return 0; +}; +#endif // HAVE_ICU + + + /* * Local variables: * c-basic-offset: 4