From b94f122f7e74623aa67e1fa1d097f7627c087f5c Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 13 Sep 2011 15:25:52 +0200 Subject: [PATCH] Allow multiple ICU chains for facets The charsets ICU interface can keep any number of identified ICU chains where "relevance", "sort", "mergekey", and "facet" are just the existing ones. The elements in server/service for defining ICU chains relevance, sort, mergekey and facet are deprecated and may be replaced by ... --- src/charsets.c | 140 +++++++++++++++++++++++++++++++++++++------ src/charsets.h | 19 +++--- src/client.c | 2 +- src/pazpar2_config.c | 160 ++++++++++++++------------------------------------ src/pazpar2_config.h | 10 +--- src/relevance.c | 4 +- src/relevance.h | 2 +- src/session.c | 20 +++++-- test/test_icu.cfg | 28 +++++---- test/test_icu_8.res | 6 +- 10 files changed, 211 insertions(+), 180 deletions(-) diff --git a/src/charsets.c b/src/charsets.c index 44ef5fc..ba5d426 100644 --- a/src/charsets.c +++ b/src/charsets.c @@ -40,12 +40,18 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #endif +typedef struct pp2_charset_s *pp2_charset_t; +static pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node); +static pp2_charset_t pp2_charset_create(struct icu_chain * icu_chn); +static pp2_charset_t pp2_charset_create_a_to_z(void); +static void pp2_charset_destroy(pp2_charset_t pct); +static pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct); + /* charset handle */ struct pp2_charset_s { const char *(*token_next_handler)(pp2_relevance_token_t prt); const char *(*get_sort_handler)(pp2_relevance_token_t prt); const char *(*get_display_handler)(pp2_relevance_token_t prt); - int ref_count; #if YAZ_HAVE_ICU struct icu_chain * icu_chn; UErrorCode icu_sts; @@ -75,14 +81,114 @@ struct pp2_relevance_token_s { #endif }; +struct pp2_charset_fact_s { + struct pp2_charset_entry *list; + int ref_count; +}; + +struct pp2_charset_entry { + struct pp2_charset_entry *next; + pp2_charset_t pct; + char *name; +}; + + +static int pp2_charset_fact_add(pp2_charset_fact_t pft, + pp2_charset_t pct, const char *default_id); + +pp2_charset_fact_t pp2_charset_fact_create(void) +{ + pp2_charset_fact_t pft = xmalloc(sizeof(*pft)); + pft->list = 0; + pft->ref_count = 1; + + pp2_charset_fact_add(pft, pp2_charset_create_a_to_z(), "relevance"); + pp2_charset_fact_add(pft, pp2_charset_create_a_to_z(), "sort"); + pp2_charset_fact_add(pft, pp2_charset_create_a_to_z(), "mergekey"); + pp2_charset_fact_add(pft, pp2_charset_create(0), "facet"); + return pft; +} + +void pp2_charset_fact_destroy(pp2_charset_fact_t pft) +{ + if (pft) + { + assert(pft->ref_count >= 1); + --(pft->ref_count); + if (pft->ref_count == 0) + { + struct pp2_charset_entry *pce = pft->list; + while (pce) + { + struct pp2_charset_entry *next = pce->next; + pp2_charset_destroy(pce->pct); + xfree(pce->name); + xfree(pce); + pce = next; + } + xfree(pft); + } + } +} + +int pp2_charset_fact_add(pp2_charset_fact_t pft, + pp2_charset_t pct, const char *default_id) +{ + struct pp2_charset_entry *pce; + + for (pce = pft->list; pce; pce = pce->next) + if (!strcmp(default_id, pce->name)) + break; + + if (!pce) + { + pce = xmalloc(sizeof(*pce)); + pce->name = xstrdup(default_id); + pce->next = pft->list; + pft->list = pce; + } + else + { + pp2_charset_destroy(pce->pct); + } + pce->pct = pct; + return 0; +} + +int pp2_charset_fact_define(pp2_charset_fact_t pft, + xmlNode *xml_node, const char *default_id) +{ + int r; + pp2_charset_t pct; + xmlChar *id; + + assert(xml_node); + pct = pp2_charset_create_xml(xml_node); + if (!pct) + return -1; + id = xmlGetProp(xml_node, (xmlChar*) "id"); + if (id) + default_id = (const char *) id; + if (!default_id) + { + pp2_charset_destroy(pct); + return -1; + } + r = pp2_charset_fact_add(pft, pct, default_id); + xmlFree(id); + return r; +} + +void pp2_charset_fact_incref(pp2_charset_fact_t pft) +{ + (pft->ref_count)++; +} pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node) { #if YAZ_HAVE_ICU UErrorCode status = U_ZERO_ERROR; struct icu_chain *chain = 0; - if (xml_node) - xml_node = xml_node->children; while (xml_node && xml_node->type != XML_ELEMENT_NODE) xml_node = xml_node->next; chain = icu_chain_xml_config(xml_node, 1, &status); @@ -108,11 +214,6 @@ pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node) #endif // YAZ_HAVE_ICU } -void pp2_charset_incref(pp2_charset_t pct) -{ - (pct->ref_count)++; -} - pp2_charset_t pp2_charset_create_a_to_z(void) { pp2_charset_t pct = pp2_charset_create(0); @@ -127,7 +228,6 @@ pp2_charset_t pp2_charset_create(struct icu_chain *icu_chn) pct->token_next_handler = pp2_relevance_token_null; pct->get_sort_handler = pp2_get_sort_ascii; pct->get_display_handler = pp2_get_display_ascii; - pct->ref_count = 1; #if YAZ_HAVE_ICU pct->icu_chn = 0; if (icu_chn) @@ -144,18 +244,20 @@ pp2_charset_t pp2_charset_create(struct icu_chain *icu_chn) void pp2_charset_destroy(pp2_charset_t pct) { - if (pct) - { - assert(pct->ref_count >= 1); - --(pct->ref_count); - if (pct->ref_count == 0) - { #if YAZ_HAVE_ICU - icu_chain_destroy(pct->icu_chn); + icu_chain_destroy(pct->icu_chn); #endif - xfree(pct); - } - } + xfree(pct); +} + +pp2_relevance_token_t pp2_relevance_create(pp2_charset_fact_t pft, + const char *id) +{ + struct pp2_charset_entry *pce; + for (pce = pft->list; pce; pce = pce->next) + if (!strcmp(id, pce->name)) + return pp2_relevance_tokenize(pce->pct); + return 0; } pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct) diff --git a/src/charsets.h b/src/charsets.h index 4efb3f0..cc9f269 100644 --- a/src/charsets.h +++ b/src/charsets.h @@ -27,19 +27,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include -struct icu_chain; - -typedef struct pp2_charset_s *pp2_charset_t; typedef struct pp2_relevance_token_s *pp2_relevance_token_t; +typedef struct pp2_charset_fact_s *pp2_charset_fact_t; -pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node); -pp2_charset_t pp2_charset_create(struct icu_chain * icu_chn); -pp2_charset_t pp2_charset_create_a_to_z(void); - -void pp2_charset_destroy(pp2_charset_t pct); -void pp2_charset_incref(pp2_charset_t pct); - -pp2_relevance_token_t pp2_relevance_tokenize(pp2_charset_t pct); void pp2_relevance_first(pp2_relevance_token_t prt, const char *buf, int skip_article); @@ -49,6 +39,13 @@ const char *pp2_relevance_token_next(pp2_relevance_token_t prt); const char *pp2_get_sort(pp2_relevance_token_t prt); const char *pp2_get_display(pp2_relevance_token_t prt); +pp2_charset_fact_t pp2_charset_fact_create(void); +void pp2_charset_fact_destroy(pp2_charset_fact_t pft); +int pp2_charset_fact_define(pp2_charset_fact_t pft, + xmlNode *xml_node, const char *default_id); +pp2_relevance_token_t pp2_relevance_create(pp2_charset_fact_t pft, + const char *id); +void pp2_charset_fact_incref(pp2_charset_fact_t pft); #endif /* diff --git a/src/client.c b/src/client.c index c8945f8..6bc8e4f 100644 --- a/src/client.c +++ b/src/client.c @@ -1085,7 +1085,7 @@ int client_parse_query(struct client *cl, const char *query, char *p[512]; extract_terms(se->nmem, cn, p); se->relevance = relevance_create( - se->service->relevance_pct, + se->service->charsets, se->nmem, (const char **) p); } diff --git a/src/pazpar2_config.c b/src/pazpar2_config.c index f42cfae..df050c7 100644 --- a/src/pazpar2_config.c +++ b/src/pazpar2_config.c @@ -126,10 +126,7 @@ static struct conf_service *service_init(struct conf_server *server, service->z3950_session_timeout = 180; service->z3950_operation_timeout = 30; - service->relevance_pct = 0; - service->sort_pct = 0; - service->mergekey_pct = 0; - service->facet_pct = 0; + service->charsets = 0; service->id = service_id ? nmem_strdup(nmem, service_id) : 0; service->num_metadata = num_metadata; @@ -246,10 +243,7 @@ void service_destroy(struct conf_service *service) { if (!pazpar2_decref(&service->ref_count, service->mutex)) { - pp2_charset_destroy(service->relevance_pct); - pp2_charset_destroy(service->sort_pct); - pp2_charset_destroy(service->mergekey_pct); - pp2_charset_destroy(service->facet_pct); + pp2_charset_fact_destroy(service->charsets); yaz_mutex_destroy(&service->mutex); nmem_destroy(service->nmem); } @@ -531,61 +525,30 @@ static struct conf_service *service_create_static(struct conf_server *server, } else if (!strcmp((const char *) n->name, "settings")) got_settings++; - else if (!strcmp((const char *) n->name, "relevance")) + else if (!strcmp((const char *) n->name, "icu_chain")) { - if (service->relevance_pct) + if (!service->charsets) + service->charsets = pp2_charset_fact_create(); + if (pp2_charset_fact_define(service->charsets, n, 0)) { - yaz_log(YLOG_LOG, "relevance may not repeat in service"); + yaz_log(YLOG_FATAL, "ICU chain definition error"); return 0; } - else - { - service->relevance_pct = pp2_charset_create_xml(n); - if (!service->relevance_pct) - return 0; - } - } - else if (!strcmp((const char *) n->name, "sort")) - { - if (service->sort_pct) - { - yaz_log(YLOG_LOG, "sort may not repeat in service"); - return 0; - } - else - { - service->sort_pct = pp2_charset_create_xml(n); - if (!service->sort_pct) - return 0; - } } - else if (!strcmp((const char *) n->name, "mergekey")) - { - if (service->mergekey_pct) - { - yaz_log(YLOG_LOG, "mergekey may not repeat in service"); - return 0; - } - else - { - service->mergekey_pct = pp2_charset_create_xml(n); - if (!service->mergekey_pct) - return 0; - } - } - else if (!strcmp((const char *) n->name, "facet")) + else if (!strcmp((const char *) n->name, "relevance") + || !strcmp((const char *) n->name, "sort") + || !strcmp((const char *) n->name, "mergekey") + || !strcmp((const char *) n->name, "facet")) + { - if (service->facet_pct) + if (!service->charsets) + service->charsets = pp2_charset_fact_create(); + if (pp2_charset_fact_define(service->charsets, + n->children, (const char *) n->name)) { - yaz_log(YLOG_LOG, "facet may not repeat in service"); + yaz_log(YLOG_FATAL, "ICU chain definition error"); return 0; } - else - { - service->facet_pct = pp2_charset_create_xml(n); - if (!service->facet_pct) - return 0; - } } else if (!strcmp((const char *) n->name, (const char *) "metadata")) { @@ -675,48 +638,17 @@ static void inherit_server_settings(struct conf_service *s) /* use relevance/sort/mergekey/facet from server if not defined for this service.. */ - if (!s->relevance_pct) + if (!s->charsets) { - if (server->relevance_pct) + if (server->charsets) { - s->relevance_pct = server->relevance_pct; - pp2_charset_incref(s->relevance_pct); + s->charsets = server->charsets; + pp2_charset_fact_incref(s->charsets); } else - s->relevance_pct = pp2_charset_create_a_to_z(); - } - - if (!s->sort_pct) - { - if (server->sort_pct) - { - s->sort_pct = server->sort_pct; - pp2_charset_incref(s->sort_pct); - } - else - s->sort_pct = pp2_charset_create_a_to_z(); - } - - if (!s->mergekey_pct) - { - if (server->mergekey_pct) - { - s->mergekey_pct = server->mergekey_pct; - pp2_charset_incref(s->mergekey_pct); - } - else - s->mergekey_pct = pp2_charset_create_a_to_z(); - } - - if (!s->facet_pct) - { - if (server->facet_pct) { - s->facet_pct = server->facet_pct; - pp2_charset_incref(s->facet_pct); + s->charsets = pp2_charset_fact_create(); } - else - s->facet_pct = pp2_charset_create(0); } } @@ -750,10 +682,7 @@ static struct conf_server *server_create(struct conf_config *config, server->service = 0; server->config = config; server->next = 0; - server->relevance_pct = 0; - server->sort_pct = 0; - server->mergekey_pct = 0; - server->facet_pct = 0; + server->charsets = 0; server->server_settings = 0; server->http_server = 0; server->iochan_man = 0; @@ -806,30 +735,30 @@ static struct conf_server *server_create(struct conf_config *config, if (!(server->server_settings = parse_settings(config, nmem, n))) return 0; } - else if (!strcmp((const char *) n->name, "relevance")) + else if (!strcmp((const char *) n->name, "icu_chain")) { - server->relevance_pct = pp2_charset_create_xml(n); - if (!server->relevance_pct) - return 0; - } - else if (!strcmp((const char *) n->name, "sort")) - { - server->sort_pct = pp2_charset_create_xml(n); - if (!server->sort_pct) - return 0; - } - else if (!strcmp((const char *) n->name, "mergekey")) - { - server->mergekey_pct = pp2_charset_create_xml(n); - if (!server->mergekey_pct) + if (!server->charsets) + server->charsets = pp2_charset_fact_create(); + if (pp2_charset_fact_define(server->charsets, n, 0)) + { + yaz_log(YLOG_FATAL, "ICU chain definition error"); return 0; + } } - else if (!strcmp((const char *) n->name, "facet")) + else if (!strcmp((const char *) n->name, "relevance") + || !strcmp((const char *) n->name, "sort") + || !strcmp((const char *) n->name, "mergekey") + || !strcmp((const char *) n->name, "facet")) { - server->facet_pct = pp2_charset_create_xml(n); - if (!server->facet_pct) + if (!server->charsets) + server->charsets = pp2_charset_fact_create(); + if (pp2_charset_fact_define(server->charsets, + n->children, (const char *) n->name)) + { + yaz_log(YLOG_FATAL, "ICU chain definition error"); return 0; - } + } + } else if (!strcmp((const char *) n->name, "service")) { char *service_id = (char *) @@ -1033,10 +962,7 @@ void server_destroy(struct conf_server *server) service_destroy(s); s = s_next; } - pp2_charset_destroy(server->relevance_pct); - pp2_charset_destroy(server->sort_pct); - pp2_charset_destroy(server->mergekey_pct); - pp2_charset_destroy(server->facet_pct); + pp2_charset_fact_destroy(server->charsets); yaz_log(YLOG_LOG, "server_destroy server=%p", server); http_server_destroy(server->http_server); } diff --git a/src/pazpar2_config.h b/src/pazpar2_config.h index f6cf6d0..8a1ae06 100644 --- a/src/pazpar2_config.h +++ b/src/pazpar2_config.h @@ -117,10 +117,7 @@ struct conf_service int ref_count; /* duplicated from conf_server */ - pp2_charset_t relevance_pct; - pp2_charset_t sort_pct; - pp2_charset_t mergekey_pct; - pp2_charset_t facet_pct; + pp2_charset_fact_t charsets; struct database *databases; struct conf_server *server; @@ -140,10 +137,7 @@ struct conf_server char *server_settings; char *server_id; - pp2_charset_t relevance_pct; - pp2_charset_t sort_pct; - pp2_charset_t mergekey_pct; - pp2_charset_t facet_pct; + pp2_charset_fact_t charsets; struct conf_service *service; struct conf_server *next; diff --git a/src/relevance.c b/src/relevance.c index 680d8f6..4df7750 100644 --- a/src/relevance.c +++ b/src/relevance.c @@ -120,7 +120,7 @@ void relevance_countwords(struct relevance *r, struct record_cluster *cluster, cluster->term_frequency_vec[0] += length; } -struct relevance *relevance_create(pp2_charset_t pct, +struct relevance *relevance_create(pp2_charset_fact_t pft, NMEM nmem, const char **terms) { struct relevance *res = nmem_malloc(nmem, sizeof(struct relevance)); @@ -133,7 +133,7 @@ struct relevance *relevance_create(pp2_charset_t pct, res->doc_frequency_vec = nmem_malloc(nmem, res->vec_len * sizeof(int)); memset(res->doc_frequency_vec, 0, res->vec_len * sizeof(int)); res->nmem = nmem; - res->prt = pp2_relevance_tokenize(pct); + res->prt = pp2_relevance_create(pft, "relevance"); res->entries = build_word_entries(res->prt, nmem, terms); return res; } diff --git a/src/relevance.h b/src/relevance.h index cb82601..e357382 100644 --- a/src/relevance.h +++ b/src/relevance.h @@ -27,7 +27,7 @@ struct relevance; struct record_cluster; struct reclist; -struct relevance *relevance_create(pp2_charset_t pct, +struct relevance *relevance_create(pp2_charset_fact_t pft, NMEM nmem, const char **terms); void relevance_destroy(struct relevance **rp); void relevance_newrec(struct relevance *r, struct record_cluster *cluster); diff --git a/src/session.c b/src/session.c index 9b69b38..fa9ebb3 100644 --- a/src/session.c +++ b/src/session.c @@ -200,8 +200,17 @@ void add_facet(struct session *s, const char *type, const char *value, int count icu_chain_id = (service->metadata + i)->icu_chain; yaz_log(YLOG_LOG, "icu_chain id=%s", icu_chain_id ? icu_chain_id : "null"); - prt = pp2_relevance_tokenize(service->facet_pct); - + if (!icu_chain_id) + icu_chain_id = "facet"; + prt = pp2_relevance_create(service->charsets, icu_chain_id); + if (!prt) + { + yaz_log(YLOG_FATAL, "Unknown ICU chain '%s' for facet of type '%s'", + icu_chain_id, type); + wrbuf_destroy(facet_wrbuf); + wrbuf_destroy(display_wrbuf); + return; + } pp2_relevance_first(prt, value, 0); while ((facet_component = pp2_relevance_token_next(prt))) { @@ -236,6 +245,7 @@ void add_facet(struct session *s, const char *type, const char *value, int count { session_log(s, YLOG_FATAL, "Too many termlists"); wrbuf_destroy(facet_wrbuf); + wrbuf_destroy(display_wrbuf); return; } @@ -1126,7 +1136,7 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name, { const char *norm_str; pp2_relevance_token_t prt = - pp2_relevance_tokenize(service->mergekey_pct); + pp2_relevance_create(service->charsets, "mergekey"); pp2_relevance_first(prt, (const char *) value, 0); if (wrbuf_len(norm_wr) > 0) @@ -1165,7 +1175,7 @@ static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no, { const char *norm_str; pp2_relevance_token_t prt = - pp2_relevance_tokenize(service->mergekey_pct); + pp2_relevance_create(service->charsets, "mergekey"); pp2_relevance_first(prt, (const char *) mergekey, 0); while ((norm_str = pp2_relevance_token_next(prt))) @@ -1472,7 +1482,7 @@ static int ingest_to_cluster(struct client *cl, nmem_malloc(se->nmem, sizeof(union data_types)); - prt = pp2_relevance_tokenize(service->sort_pct); + prt = pp2_relevance_create(service->charsets, "sort"); pp2_relevance_first(prt, rec_md->data.text.disp, skip_article); diff --git a/test/test_icu.cfg b/test/test_icu.cfg index 08dd6de..f24007e 100644 --- a/test/test_icu.cfg +++ b/test/test_icu.cfg @@ -23,19 +23,20 @@ - - - - - - - + + + + + - - - - - + + + + + + + + @@ -45,7 +46,8 @@ - + diff --git a/test/test_icu_8.res b/test/test_icu_8.res index 2f49a59..a200b10 100644 --- a/test/test_icu_8.res +++ b/test/test_icu_8.res @@ -2,9 +2,9 @@ 0 Jack Collins2 -Mairs, John W1 -Wood, Helen M1 -Englund, Carl R1 +Mairs, John W.1 +Wood, Helen M.1 +Englund, Carl R.1 Radioisotope Scanning1 -- 1.7.10.4