From 2e6ca38ba48b41f025b5740bf33a42afbefd4641 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 2 Jul 2015 14:42:00 +0200 Subject: [PATCH] facet ID term map PAZ-1008 New setting pz:facetmap:split:name. --- doc/pazpar2_conf.xml | 22 ++++++++++----- perf/bash/pp2client.sh | 1 + src/client.c | 15 ++++++++--- src/session.c | 70 +++++++++++++++++++++++++++++++++++++++++++----- src/session.h | 7 ++++- src/termlists.c | 4 ++- src/termlists.h | 4 ++- 7 files changed, 104 insertions(+), 19 deletions(-) diff --git a/doc/pazpar2_conf.xml b/doc/pazpar2_conf.xml index 86dbdc8..7994914 100644 --- a/doc/pazpar2_conf.xml +++ b/doc/pazpar2_conf.xml @@ -1271,12 +1271,22 @@ supports (native) facets. The value is the name of the field on the target. - - - At this point only Solr targets have been tested with this - facility. - - + + + + + pz:facetmap:split:name + + + Like pz:facetmap, but makes Pazpar2 inspect the term value consisting + of two items separated by colon. First item is the raw ID to be + sent to database if limitmap on the field + name is used. The second item is + the display term. + + + This facility was added in Pazpar2 version 1.11.0. + diff --git a/perf/bash/pp2client.sh b/perf/bash/pp2client.sh index 03b0421..c39077d 100755 --- a/perf/bash/pp2client.sh +++ b/perf/bash/pp2client.sh @@ -93,6 +93,7 @@ if [ "$TIME" != "" ] ; then else wget -q -O ${TMP_DIR}$OF.show.xml "$H?command=show&session=$S&sort=relevance&start=0&num=100&block=1" fi +wget -q -O ${TMP_DIR}$OF.termlist.xml "$H?command=termlist&session=$S" wget -q -O ${TMP_DIR}$OF.bytarget.xml "$H?command=bytarget&session=$S" wget -q -O ${TMP_DIR}$OF.stat.xml "$H?command=stat&session=$S" wget -q -O ${TMP_DIR}$OF.info.xml "$H?command=info" diff --git a/src/client.c b/src/client.c index e25bd2d..644e42d 100644 --- a/src/client.c +++ b/src/client.c @@ -521,7 +521,7 @@ static void client_report_facets(struct client *cl, ZOOM_resultset rs) ZOOM_facet_field_get_term(facets[facet_idx], term_idx, &freq); if (term) - add_facet(se, p, term, freq); + add_facet(se, p, term, freq, cl); } break; } @@ -1349,7 +1349,7 @@ static void ccl_quote_map_term(CCL_bibset ccl_map, WRBUF w, } } -static int apply_limit(struct session_database *sdb, +static int apply_limit(struct client *cl, facet_limits_t facet_limits, WRBUF w_pqf, CCL_bibset ccl_map, struct conf_service *service) @@ -1358,6 +1358,7 @@ static int apply_limit(struct session_database *sdb, int i = 0; const char *name; const char *value; + struct session_database *sdb = client_get_database(cl); NMEM nmem_tmp = nmem_create(); for (i = 0; (name = facet_limits_get(facet_limits, i, &value)); i++) @@ -1377,6 +1378,14 @@ static int apply_limit(struct session_database *sdb, nmem_strsplit_escape2(nmem_tmp, "|", value, &values, &num, 1, '\\', 1); + for (i = 0; i < num; i++) + { + const char *id = session_lookup_id_facet(cl->session, + cl, name, + values[i]); + if (id) + values[i] = nmem_strdup(nmem_tmp, id); + } nmem_strsplit_escape2(nmem_tmp, ",", s->value, &cvalues, &cnum, 1, '\\', 1); @@ -1503,7 +1512,7 @@ int client_parse_query(struct client *cl, const char *query, wrbuf_puts(w_pqf, " "); } - if (apply_limit(sdb, facet_limits, w_pqf, ccl_map, service)) + if (apply_limit(cl, facet_limits, w_pqf, ccl_map, service)) { ccl_qual_rm(&ccl_map); return -2; diff --git a/src/session.c b/src/session.c index e67cd10..e76da0a 100644 --- a/src/session.c +++ b/src/session.c @@ -203,13 +203,66 @@ static void session_normalize_facet(struct session *s, run_icu(s, icu_chain_id, value, facet_wrbuf, display_wrbuf); } -void add_facet(struct session *s, const char *type, const char *value, int count) +struct facet_id { + char *client_id; + char *type; + char *id; + char *term; + struct facet_id *next; +}; + +static void session_add_id_facet(struct session *s, struct client *cl, + const char *type, + const char *id, + size_t id_len, + const char *term) +{ + struct facet_id *t = nmem_malloc(s->session_nmem, sizeof(*t)); + + t->client_id = nmem_strdup(s->session_nmem, client_get_id(cl)); + t->type = nmem_strdup(s->session_nmem, type); + t->id = nmem_strdupn(s->session_nmem, id, id_len); + t->term = nmem_strdup(s->session_nmem, term); + t->next = s->facet_id_list; + s->facet_id_list = t; +} + + +const char *session_lookup_id_facet(struct session *s, struct client *cl, + const char *type, + const char *term) +{ + struct facet_id *t = s->facet_id_list; + for (; t; t = t->next) + if (!strcmp(client_get_id(cl), t->client_id) && + !strcmp(t->type, type) && !strcmp(t->term, term)) + { + return t->id; + } + return 0; +} + +void add_facet(struct session *s, const char *type, const char *value, int count, struct client *cl) { WRBUF facet_wrbuf = wrbuf_alloc(); WRBUF display_wrbuf = wrbuf_alloc(); + const char *id = 0; + size_t id_len = 0; - session_normalize_facet(s, type, value, display_wrbuf, facet_wrbuf); + /* inspect pz:facetmap:split:name ?? */ + if (!strncmp(type, "split:", 6)) + { + const char *cp = strchr(value, ':'); + if (cp) + { + id = value; + id_len = cp - value; + value = cp + 1; + } + type += 6; + } + session_normalize_facet(s, type, value, display_wrbuf, facet_wrbuf); if (wrbuf_len(facet_wrbuf)) { struct named_termlist **tp = &s->termlists; @@ -224,7 +277,10 @@ void add_facet(struct session *s, const char *type, const char *value, int count (*tp)->next = 0; } termlist_insert((*tp)->termlist, wrbuf_cstr(display_wrbuf), - wrbuf_cstr(facet_wrbuf), count); + wrbuf_cstr(facet_wrbuf), id, id_len, count); + if (id) + session_add_id_facet(s, cl, type, id, id_len, + wrbuf_cstr(display_wrbuf)); } wrbuf_destroy(facet_wrbuf); wrbuf_destroy(display_wrbuf); @@ -1027,6 +1083,7 @@ struct session *new_session(NMEM nmem, struct conf_service *service, session->clients_cached = 0; session->settings_modified = 0; session->session_nmem = nmem; + session->facet_id_list = 0; session->nmem = nmem_create(); session->databases = 0; session->sorted_results = 0; @@ -1216,7 +1273,6 @@ void perform_termlist(struct http_channel *c, struct session *se, wrbuf_puts(c->wrbuf, ""); wrbuf_xmlputs(c->wrbuf, p[i]->display_term); wrbuf_puts(c->wrbuf, ""); - wrbuf_printf(c->wrbuf, "%d", p[i]->frequency); @@ -2382,15 +2438,15 @@ static int ingest_to_cluster(struct client *cl, char year[64]; sprintf(year, "%d", rec_md->data.number.max); - add_facet(se, (char *) type, year, term_factor); + add_facet(se, (char *) type, year, term_factor, cl); if (rec_md->data.number.max != rec_md->data.number.min) { sprintf(year, "%d", rec_md->data.number.min); - add_facet(se, (char *) type, year, term_factor); + add_facet(se, (char *) type, year, term_factor, cl); } } else - add_facet(se, type, wrbuf_cstr(wrbuf_disp), term_factor); + add_facet(se, type, wrbuf_cstr(wrbuf_disp), term_factor, cl); } } else diff --git a/src/session.h b/src/session.h index 153c6ad..49e5338 100644 --- a/src/session.h +++ b/src/session.h @@ -113,6 +113,7 @@ struct session { facet_limits_t facet_limits; int clients_starting; struct reclist_sortparms *sorted_results; + struct facet_id *facet_id_list; }; struct statistics { @@ -192,11 +193,15 @@ int ingest_record(struct client *cl, const char *rec, int record_no, NMEM nmem); int ingest_xml_record(struct client *cl, xmlDoc *xdoc, int record_no, NMEM nmem, int cached_copy); void session_alert_watch(struct session *s, int what); -void add_facet(struct session *s, const char *type, const char *value, int count); +void add_facet(struct session *s, const char *type, const char *value, int count, struct client *cl); int session_check_cluster_limit(struct session *se, struct record_cluster *rec); void perform_termlist(struct http_channel *c, struct session *se, const char *name, int num, int version); + +const char *session_lookup_id_facet(struct session *s, struct client *cl, + const char *type, const char *term); + void session_log(struct session *s, int level, const char *fmt, ...) #ifdef __GNUC__ __attribute__ ((format (printf, 3, 4))) diff --git a/src/termlists.c b/src/termlists.c index 8f06a47..79e88ee 100644 --- a/src/termlists.c +++ b/src/termlists.c @@ -62,7 +62,8 @@ struct termlist *termlist_create(NMEM nmem) } void termlist_insert(struct termlist *tl, const char *display_term, - const char *norm_term, int freq) + const char *norm_term, const char *id, size_t id_len, + int freq) { unsigned int bucket; struct termlist_bucket **p; @@ -87,6 +88,7 @@ void termlist_insert(struct termlist *tl, const char *display_term, new->term.norm_term = nmem_strdup(tl->nmem, buf); new->term.display_term = *display_term ? nmem_strdup(tl->nmem, display_term) : new->term.norm_term; + new->term.id = id ? nmem_strdupn(tl->nmem, id, id_len) : 0; new->term.frequency = freq; new->next = 0; *p = new; diff --git a/src/termlists.h b/src/termlists.h index 8502e3e..0d5310a 100644 --- a/src/termlists.h +++ b/src/termlists.h @@ -26,6 +26,7 @@ struct termlist_score { char *norm_term; char *display_term; + char *id; int frequency; }; @@ -33,7 +34,8 @@ struct termlist; struct termlist *termlist_create(NMEM nmem); void termlist_insert(struct termlist *tl, const char *display_term, - const char *norm_term, int freq); + const char *norm_term, + const char *id, size_t id_len, int freq); struct termlist_score **termlist_highscore(struct termlist *tl, int *len, NMEM nmem); -- 1.7.10.4