From: Adam Dickmeiss Date: Wed, 14 Sep 2011 07:37:54 +0000 (+0200) Subject: Rename some charset functions X-Git-Tag: v1.6.1~8 X-Git-Url: http://lists.indexdata.com/cgi-bin?a=commitdiff_plain;h=9e586c9793e3f4846f7307ac3a76537dec1aa43d;p=pazpar2-moved-to-github.git Rename some charset functions The _relevance_-name was relevant when charset system was only used to normalize relevance terms, but is inappropriate when it's used to normalize 4 different types of terms. --- diff --git a/src/charsets.c b/src/charsets.c index 397e73b..0199514 100644 --- a/src/charsets.c +++ b/src/charsets.c @@ -45,32 +45,32 @@ static pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node); static pp2_charset_t pp2_charset_create(struct icu_chain * icu_chn); static pp2_charset_t pp2_charset_create_a_to_z(void); static void pp2_charset_destroy(pp2_charset_t pct); -static pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct); +static pp2_charset_token_t pp2_charset_tokenize(pp2_charset_t pct); /* charset handle */ struct pp2_charset_s { - const char *(*token_next_handler)(pp2_relevance_token_t prt); - const char *(*get_sort_handler)(pp2_relevance_token_t prt); - const char *(*get_display_handler)(pp2_relevance_token_t prt); + const char *(*token_next_handler)(pp2_charset_token_t prt); + const char *(*get_sort_handler)(pp2_charset_token_t prt); + const char *(*get_display_handler)(pp2_charset_token_t prt); #if YAZ_HAVE_ICU struct icu_chain * icu_chn; UErrorCode icu_sts; #endif }; -static const char *pp2_relevance_token_null(pp2_relevance_token_t prt); -static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt); -static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt); -static const char *pp2_get_display_ascii(pp2_relevance_token_t prt); +static const char *pp2_charset_token_null(pp2_charset_token_t prt); +static const char *pp2_charset_token_a_to_z(pp2_charset_token_t prt); +static const char *pp2_get_sort_ascii(pp2_charset_token_t prt); +static const char *pp2_get_display_ascii(pp2_charset_token_t prt); #if YAZ_HAVE_ICU -static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt); -static const char *pp2_get_sort_icu(pp2_relevance_token_t prt); -static const char *pp2_get_display_icu(pp2_relevance_token_t prt); +static const char *pp2_charset_token_icu(pp2_charset_token_t prt); +static const char *pp2_get_sort_icu(pp2_charset_token_t prt); +static const char *pp2_get_display_icu(pp2_charset_token_t prt); #endif /* tokenzier handle */ -struct pp2_relevance_token_s { +struct pp2_charset_token_s { const char *cp; /* unnormalized buffer we're tokenizing */ const char *last_cp; /* pointer to last token we're dealing with */ pp2_charset_t pct; /* our main charset handle (type+config) */ @@ -218,7 +218,7 @@ pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node) pp2_charset_t pp2_charset_create_a_to_z(void) { pp2_charset_t pct = pp2_charset_create(0); - pct->token_next_handler = pp2_relevance_token_a_to_z; + pct->token_next_handler = pp2_charset_token_a_to_z; return pct; } @@ -226,7 +226,7 @@ pp2_charset_t pp2_charset_create(struct icu_chain *icu_chn) { pp2_charset_t pct = xmalloc(sizeof(*pct)); - pct->token_next_handler = pp2_relevance_token_null; + pct->token_next_handler = pp2_charset_token_null; pct->get_sort_handler = pp2_get_sort_ascii; pct->get_display_handler = pp2_get_display_ascii; #if YAZ_HAVE_ICU @@ -235,7 +235,7 @@ pp2_charset_t pp2_charset_create(struct icu_chain *icu_chn) { pct->icu_chn = icu_chn; pct->icu_sts = U_ZERO_ERROR; - pct->token_next_handler = pp2_relevance_token_icu; + pct->token_next_handler = pp2_charset_token_icu; pct->get_sort_handler = pp2_get_sort_icu; pct->get_display_handler = pp2_get_display_icu; } @@ -251,19 +251,19 @@ void pp2_charset_destroy(pp2_charset_t pct) xfree(pct); } -pp2_relevance_token_t pp2_relevance_create(pp2_charset_fact_t pft, - const char *id) +pp2_charset_token_t pp2_charset_token_create(pp2_charset_fact_t pft, + const char *id) { struct pp2_charset_entry *pce; for (pce = pft->list; pce; pce = pce->next) if (!strcmp(id, pce->name)) - return pp2_relevance_tokenize(pce->pct); + return pp2_charset_tokenize(pce->pct); return 0; } -pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct) +pp2_charset_token_t pp2_charset_tokenize(pp2_charset_t pct) { - pp2_relevance_token_t prt = xmalloc(sizeof(*prt)); + pp2_charset_token_t prt = xmalloc(sizeof(*prt)); assert(pct); @@ -281,9 +281,8 @@ pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct) return prt; } -void pp2_relevance_first(pp2_relevance_token_t prt, - const char *buf, - int skip_article) +void pp2_charset_token_first(pp2_charset_token_t prt, + const char *buf, int skip_article) { if (skip_article) { @@ -313,7 +312,7 @@ void pp2_relevance_first(pp2_relevance_token_t prt, #endif // YAZ_HAVE_ICU } -void pp2_relevance_token_destroy(pp2_relevance_token_t prt) +void pp2_charset_token_destroy(pp2_charset_token_t prt) { assert(prt); #if YAZ_HAVE_ICU @@ -327,18 +326,18 @@ void pp2_relevance_token_destroy(pp2_relevance_token_t prt) xfree(prt); } -const char *pp2_relevance_token_next(pp2_relevance_token_t prt) +const char *pp2_charset_token_next(pp2_charset_token_t prt) { assert(prt); return (prt->pct->token_next_handler)(prt); } -const char *pp2_get_sort(pp2_relevance_token_t prt) +const char *pp2_get_sort(pp2_charset_token_t prt) { return prt->pct->get_sort_handler(prt); } -const char *pp2_get_display(pp2_relevance_token_t prt) +const char *pp2_get_display(pp2_charset_token_t prt) { return prt->pct->get_display_handler(prt); } @@ -347,7 +346,7 @@ const char *pp2_get_display(pp2_relevance_token_t prt) /* original tokenizer with our tokenize interface, but we add +1 to ensure no '\0' are in our string (except for EOF) */ -static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt) +static const char *pp2_charset_token_a_to_z(pp2_charset_token_t prt) { const char *cp = prt->cp; int c; @@ -374,7 +373,7 @@ static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt) return wrbuf_cstr(prt->norm_str); } -static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt) +static const char *pp2_get_sort_ascii(pp2_charset_token_t prt) { if (prt->last_cp == 0) return 0; @@ -391,7 +390,7 @@ static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt) } } -static const char *pp2_get_display_ascii(pp2_relevance_token_t prt) +static const char *pp2_get_display_ascii(pp2_charset_token_t prt) { if (prt->last_cp == 0) return 0; @@ -401,7 +400,7 @@ static const char *pp2_get_display_ascii(pp2_relevance_token_t prt) } } -static const char *pp2_relevance_token_null(pp2_relevance_token_t prt) +static const char *pp2_charset_token_null(pp2_charset_token_t prt) { const char *cp = prt->cp; @@ -413,7 +412,7 @@ static const char *pp2_relevance_token_null(pp2_relevance_token_t prt) } #if YAZ_HAVE_ICU -static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt) +static const char *pp2_charset_token_icu(pp2_charset_token_t prt) { if (icu_iter_next(prt->iter)) { @@ -422,12 +421,12 @@ static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt) return 0; } -static const char *pp2_get_sort_icu(pp2_relevance_token_t prt) +static const char *pp2_get_sort_icu(pp2_charset_token_t prt) { return icu_iter_get_sortkey(prt->iter); } -static const char *pp2_get_display_icu(pp2_relevance_token_t prt) +static const char *pp2_get_display_icu(pp2_charset_token_t prt) { return icu_iter_get_display(prt->iter); } diff --git a/src/charsets.h b/src/charsets.h index cc9f269..1a7381f 100644 --- a/src/charsets.h +++ b/src/charsets.h @@ -27,25 +27,25 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include -typedef struct pp2_relevance_token_s *pp2_relevance_token_t; +typedef struct pp2_charset_token_s *pp2_charset_token_t; typedef struct pp2_charset_fact_s *pp2_charset_fact_t; -void pp2_relevance_first(pp2_relevance_token_t prt, - const char *buf, - int skip_article); - -void pp2_relevance_token_destroy(pp2_relevance_token_t prt); -const char *pp2_relevance_token_next(pp2_relevance_token_t prt); -const char *pp2_get_sort(pp2_relevance_token_t prt); -const char *pp2_get_display(pp2_relevance_token_t prt); - pp2_charset_fact_t pp2_charset_fact_create(void); void pp2_charset_fact_destroy(pp2_charset_fact_t pft); int pp2_charset_fact_define(pp2_charset_fact_t pft, xmlNode *xml_node, const char *default_id); -pp2_relevance_token_t pp2_relevance_create(pp2_charset_fact_t pft, - const char *id); void pp2_charset_fact_incref(pp2_charset_fact_t pft); +pp2_charset_token_t pp2_charset_token_create(pp2_charset_fact_t pft, + const char *id); + +void pp2_charset_token_first(pp2_charset_token_t prt, + const char *buf, + int skip_article); +void pp2_charset_token_destroy(pp2_charset_token_t prt); +const char *pp2_charset_token_next(pp2_charset_token_t prt); +const char *pp2_get_sort(pp2_charset_token_t prt); +const char *pp2_get_display(pp2_charset_token_t prt); + #endif /* diff --git a/src/relevance.c b/src/relevance.c index 4df7750..708f2ba 100644 --- a/src/relevance.c +++ b/src/relevance.c @@ -33,7 +33,7 @@ struct relevance int *doc_frequency_vec; int vec_len; struct word_entry *entries; - pp2_relevance_token_t prt; + pp2_charset_token_t prt; NMEM nmem; }; @@ -68,7 +68,7 @@ int word_entry_match(struct word_entry *entries, const char *norm_str) return 0; } -static struct word_entry *build_word_entries(pp2_relevance_token_t prt, +static struct word_entry *build_word_entries(pp2_charset_token_t prt, NMEM nmem, const char **terms) { @@ -80,8 +80,8 @@ static struct word_entry *build_word_entries(pp2_relevance_token_t prt, { const char *norm_str; - pp2_relevance_first(prt, *p, 0); - while ((norm_str = pp2_relevance_token_next(prt))) + pp2_charset_token_first(prt, *p, 0); + while ((norm_str = pp2_charset_token_next(prt))) add_word_entry(nmem, &entries, norm_str, termno); termno++; } @@ -95,11 +95,11 @@ void relevance_countwords(struct relevance *r, struct record_cluster *cluster, const char *norm_str; int i, length = 0; - pp2_relevance_first(r->prt, words, 0); + pp2_charset_token_first(r->prt, words, 0); for (i = 1; i < r->vec_len; i++) mult[i] = 0; - while ((norm_str = pp2_relevance_token_next(r->prt))) + while ((norm_str = pp2_charset_token_next(r->prt))) { int res = word_entry_match(r->entries, norm_str); if (res) @@ -133,7 +133,7 @@ struct relevance *relevance_create(pp2_charset_fact_t pft, res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int)); memset(res->doc_frequency_vec, 0, res->vec_len * sizeof(int)); res->nmem = nmem; - res->prt = pp2_relevance_create(pft, "relevance"); + res->prt = pp2_charset_token_create(pft, "relevance"); res->entries = build_word_entries(res->prt, nmem, terms); return res; } @@ -142,7 +142,7 @@ void relevance_destroy(struct relevance **rp) { if (*rp) { - pp2_relevance_token_destroy((*rp)->prt); + pp2_charset_token_destroy((*rp)->prt); *rp = 0; } } diff --git a/src/session.c b/src/session.c index d56b396..00b70bb 100644 --- a/src/session.c +++ b/src/session.c @@ -188,7 +188,7 @@ void pull_terms(NMEM nmem, struct ccl_rpn_node *n, char **termlist, int *num) void add_facet(struct session *s, const char *type, const char *value, int count) { struct conf_service *service = s->service; - pp2_relevance_token_t prt; + pp2_charset_token_t prt; const char *facet_component; WRBUF facet_wrbuf = wrbuf_alloc(); WRBUF display_wrbuf = wrbuf_alloc(); @@ -202,7 +202,7 @@ void add_facet(struct session *s, const char *type, const char *value, int count if (!icu_chain_id) icu_chain_id = "facet"; - prt = pp2_relevance_create(service->charsets, icu_chain_id); + prt = pp2_charset_token_create(service->charsets, icu_chain_id); if (!prt) { yaz_log(YLOG_FATAL, "Unknown ICU chain '%s' for facet of type '%s'", @@ -211,8 +211,8 @@ void add_facet(struct session *s, const char *type, const char *value, int count wrbuf_destroy(display_wrbuf); return; } - pp2_relevance_first(prt, value, 0); - while ((facet_component = pp2_relevance_token_next(prt))) + pp2_charset_token_first(prt, value, 0); + while ((facet_component = pp2_charset_token_next(prt))) { const char *display_component; if (*facet_component) @@ -229,7 +229,7 @@ void add_facet(struct session *s, const char *type, const char *value, int count wrbuf_puts(display_wrbuf, display_component); } } - pp2_relevance_token_destroy(prt); + pp2_charset_token_destroy(prt); yaz_log(YLOG_LOG, "facet norm=%s", wrbuf_cstr(facet_wrbuf)); yaz_log(YLOG_LOG, "facet display=%s", wrbuf_cstr(display_wrbuf)); @@ -1135,15 +1135,15 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name, if (value) { const char *norm_str; - pp2_relevance_token_t prt = - pp2_relevance_create(service->charsets, "mergekey"); + pp2_charset_token_t prt = + pp2_charset_token_create(service->charsets, "mergekey"); - pp2_relevance_first(prt, (const char *) value, 0); + pp2_charset_token_first(prt, (const char *) value, 0); if (wrbuf_len(norm_wr) > 0) wrbuf_puts(norm_wr, " "); wrbuf_puts(norm_wr, name); while ((norm_str = - pp2_relevance_token_next(prt))) + pp2_charset_token_next(prt))) { if (*norm_str) { @@ -1152,7 +1152,7 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name, } } xmlFree(value); - pp2_relevance_token_destroy(prt); + pp2_charset_token_destroy(prt); no_found++; } } @@ -1174,11 +1174,11 @@ static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no, if (mergekey) { const char *norm_str; - pp2_relevance_token_t prt = - pp2_relevance_create(service->charsets, "mergekey"); + pp2_charset_token_t prt = + pp2_charset_token_create(service->charsets, "mergekey"); - pp2_relevance_first(prt, (const char *) mergekey, 0); - while ((norm_str = pp2_relevance_token_next(prt))) + pp2_charset_token_first(prt, (const char *) mergekey, 0); + while ((norm_str = pp2_charset_token_next(prt))) { if (*norm_str) { @@ -1187,7 +1187,7 @@ static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no, wrbuf_puts(norm_wr, norm_str); } } - pp2_relevance_token_destroy(prt); + pp2_charset_token_destroy(prt); xmlFree(mergekey); } else @@ -1386,7 +1386,7 @@ static int ingest_to_cluster(struct client *cl, // now parsing XML record and adding data to cluster or record metadata for (n = root->children; n; n = n->next) { - pp2_relevance_token_t prt; + pp2_charset_token_t prt; if (type) xmlFree(type); if (value) @@ -1481,12 +1481,13 @@ static int ingest_to_cluster(struct client *cl, nmem_malloc(se->nmem, sizeof(union data_types)); - prt = pp2_relevance_create(service->charsets, "sort"); + prt = + pp2_charset_token_create(service->charsets, "sort"); - pp2_relevance_first(prt, rec_md->data.text.disp, - skip_article); + pp2_charset_token_first(prt, rec_md->data.text.disp, + skip_article); - pp2_relevance_token_next(prt); + pp2_charset_token_next(prt); sort_str = pp2_get_sort(prt); @@ -1500,7 +1501,7 @@ static int ingest_to_cluster(struct client *cl, } cluster->sortkeys[sk_field_id]->text.sort = nmem_strdup(se->nmem, sort_str); - pp2_relevance_token_destroy(prt); + pp2_charset_token_destroy(prt); } } }