Dynamic ranking PAZ-869
[pazpar2-moved-to-github.git] / src / session.c
index 65caa7c..570d15b 100644 (file)
@@ -631,6 +631,8 @@ static void session_clear_set(struct session *se, struct reclist_sortparms *sp)
     se->total_records = se->total_merged = 0;
     se->num_termlists = 0;
 
+    relevance_clear(se->relevance);
+
     /* reset list of sorted results and clear to relevance search */
     se->sorted_results = nmem_malloc(se->nmem, sizeof(*se->sorted_results));
     se->sorted_results->name = nmem_strdup(se->nmem, sp->name);
@@ -644,9 +646,11 @@ static void session_clear_set(struct session *se, struct reclist_sortparms *sp)
     se->reclist = reclist_create(se->nmem);
 }
 
-static void session_sort_unlocked(struct session *se, struct reclist_sortparms *sp)
+static void session_sort_unlocked(struct session *se,
+                                  struct reclist_sortparms *sp,
+                                  const char *mergekey,
+                                  const char *rank)
 {
-    struct reclist_sortparms *sr;
     struct client_list *l;
     const char *field = sp->name;
     int increasing = sp->increasing;
@@ -655,20 +659,42 @@ static void session_sort_unlocked(struct session *se, struct reclist_sortparms *
 
     session_log(se, YLOG_DEBUG, "session_sort field=%s increasing=%d type=%d",
                 field, increasing, type);
-    /* see if we already have sorted for this criteria */
-    for (sr = se->sorted_results; sr; sr = sr->next)
+
+    if (rank && (!se->rank || strcmp(se->rank, rank)))
     {
-        if (!reclist_sortparms_cmp(sr, sp))
-            break;
+        /* new rank must research/reingest anyway */
+        assert(rank);
+        xfree(se->rank);
+        se->rank = *rank ? xstrdup(rank) : 0;
+        clients_research = 1;
+        session_log(se, YLOG_DEBUG, "session_sort: new rank = %s",
+                    rank);
     }
-    if (sr)
+    if (mergekey && (!se->mergekey || strcmp(se->mergekey, mergekey)))
     {
-        session_log(se, YLOG_DEBUG, "search_sort: field=%s increasing=%d type=%d already fetched",
-                    field, increasing, type);
-        return;
+        /* new mergekey must research/reingest anyway */
+        assert(mergekey);
+        xfree(se->mergekey);
+        se->mergekey = *mergekey ? xstrdup(mergekey) : 0;
+        clients_research = 1;
+        session_log(se, YLOG_DEBUG, "session_sort: new mergekey = %s",
+                    mergekey);
+    }
+    if (clients_research == 0)
+    {
+        struct reclist_sortparms *sr;
+        for (sr = se->sorted_results; sr; sr = sr->next)
+            if (!reclist_sortparms_cmp(sr, sp))
+                break;
+        if (sr)
+        {
+            session_log(se, YLOG_DEBUG, "session_sort: field=%s increasing=%d type=%d already fetched",
+                        field, increasing, type);
+            return;
+        }
     }
-    session_log(se, YLOG_DEBUG, "search_sort: field=%s increasing=%d type=%d must fetch",
-                    field, increasing, type);
+    session_log(se, YLOG_DEBUG, "session_sort: field=%s increasing=%d type=%d must fetch",
+                field, increasing, type);
 
     // We need to reset reclist on every sort that changes the records, not just for position
     // So if just one client requires new searching, we need to clear set.
@@ -681,21 +707,23 @@ static void session_sort_unlocked(struct session *se, struct reclist_sortparms *
         client_parse_init(cl, 1);
         clients_research += client_parse_sort(cl, sp);
     }
-    if (clients_research) {
+    if (clients_research)
+    {
         session_log(se, YLOG_DEBUG,
-                    "Reset results due to %d clients researching",
+                    "session_sort: reset results due to %d clients researching",
                     clients_research);
         session_clear_set(se, sp);
     }
-    else {
+    else
+    {
         // A new sorting based on same record set
-        sr = nmem_malloc(se->nmem, sizeof(*sr));
+        struct reclist_sortparms *sr = nmem_malloc(se->nmem, sizeof(*sr));
         sr->name = nmem_strdup(se->nmem, field);
         sr->increasing = increasing;
         sr->type = type;
         sr->next = se->sorted_results;
         se->sorted_results = sr;
-        session_log(se, YLOG_DEBUG, "No research/ingesting done");
+        session_log(se, YLOG_DEBUG, "session_sort: no research/ingesting done");
         return ;
     }
     session_log(se, YLOG_DEBUG, "Re- search/ingesting for clients due to change in sort order");
@@ -708,18 +736,21 @@ static void session_sort_unlocked(struct session *se, struct reclist_sortparms *
             client_get_state(cl) == Client_Working) {
             client_start_search(cl);
         }
-        else {
+        else
+        {
             session_log(se, YLOG_DEBUG,
-                        "Client %s: No re-start/ingest in show. Wrong client state: %d",
+                        "session_sort: %s: No re-start/ingest in show. "
+                        "Wrong client state: %d",
                         client_get_id(cl), client_get_state(cl));
         }
-
     }
 }
 
-void session_sort(struct session *se, struct reclist_sortparms *sp) {
+void session_sort(struct session *se, struct reclist_sortparms *sp,
+                  const char *mergekey, const char *rank)
+{
     //session_enter(se, "session_sort");
-    session_sort_unlocked(se, sp);
+    session_sort_unlocked(se, sp, mergekey, rank);
     //session_leave(se, "session_sort");
 }
 
@@ -731,7 +762,9 @@ enum pazpar2_error_code session_search(struct session *se,
                                        const char *filter,
                                        const char *limit,
                                        const char **addinfo,
-                                       struct reclist_sortparms *sp)
+                                       struct reclist_sortparms *sp,
+                                       const char *mergekey,
+                                       const char *rank)
 {
     int live_channels = 0;
     int no_working = 0;
@@ -752,6 +785,17 @@ enum pazpar2_error_code session_search(struct session *se,
     session_enter(se, "session_search");
     se->settings_modified = 0;
 
+    if (mergekey)
+    {
+        xfree(se->mergekey);
+        se->mergekey = *mergekey ? xstrdup(mergekey) : 0;
+    }
+    if (rank)
+    {
+        xfree(se->rank);
+        se->rank = *rank ? xstrdup(rank) : 0;
+    }
+
     session_clear_set(se, sp);
     relevance_destroy(&se->relevance);
 
@@ -925,6 +969,8 @@ void session_destroy(struct session *se)
     normalize_cache_destroy(se->normalize_cache);
     relevance_destroy(&se->relevance);
     reclist_destroy(se->reclist);
+    xfree(se->mergekey);
+    xfree(se->rank);
     if (nmem_total(se->nmem))
         session_log(se, YLOG_DEBUG, "NMEN operation usage %zd", nmem_total(se->nmem));
     if (nmem_total(se->session_nmem))
@@ -973,6 +1019,8 @@ struct session *new_session(NMEM nmem, struct conf_service *service,
     session->databases = 0;
     session->sorted_results = 0;
     session->facet_limits = 0;
+    session->mergekey = 0;
+    session->rank = 0;
 
     for (i = 0; i <= SESSION_WATCH_MAX; i++)
     {
@@ -1317,9 +1365,7 @@ struct record_cluster **show_range_start(struct session *se,
                 client_update_show_stat(rec->client, 1);
         }
     }
-    if (*num > 0)
-        recs =
-            nmem_malloc(se->nmem, *num * sizeof(struct record_cluster *));
+    recs = nmem_malloc(se->nmem, (*num > 0 ? *num : 1) * sizeof(*recs));
     for (i = 0; i < *num; i++)
     {
         struct record_cluster *r = reclist_read_record(se->reclist);
@@ -1472,7 +1518,7 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
             else if (!strcmp(name, (const char *) type))
             {
                 xmlChar *value = xmlNodeListGetString(doc, n->children, 1);
-                if (value)
+                if (value && *value)
                 {
                     const char *norm_str;
                     pp2_charset_token_t prt =
@@ -1491,10 +1537,11 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
                             wrbuf_puts(norm_wr, norm_str);
                         }
                     }
-                    xmlFree(value);
                     pp2_charset_token_destroy(prt);
                     no_found++;
                 }
+                if (value)
+                    xmlFree(value);
             }
             xmlFree(type);
         }
@@ -1503,15 +1550,25 @@ static int get_mergekey_from_doc(xmlDoc *doc, xmlNode *root, const char *name,
 }
 
 static const char *get_mergekey(xmlDoc *doc, struct client *cl, int record_no,
-                                struct conf_service *service, NMEM nmem)
+                                struct conf_service *service, NMEM nmem,
+                                const char *session_mergekey)
 {
     char *mergekey_norm = 0;
     xmlNode *root = xmlDocGetRootElement(doc);
     WRBUF norm_wr = wrbuf_alloc();
+    xmlChar *mergekey;
 
-    /* consider mergekey from XSL first */
-    xmlChar *mergekey = xmlGetProp(root, (xmlChar *) "mergekey");
-    if (mergekey)
+    if (session_mergekey)
+    {
+        int i, num = 0;
+        char **values = 0;
+        nmem_strsplit_escape2(nmem, ",", session_mergekey, &values,
+                              &num, 1, '\\', 1);
+
+        for (i = 0; i < num; i++)
+            get_mergekey_from_doc(doc, root, values[i], service, norm_wr);
+    }
+    else if ((mergekey = xmlGetProp(root, (xmlChar *) "mergekey")))
     {
         const char *norm_str;
         pp2_charset_token_t prt =
@@ -1669,7 +1726,8 @@ int ingest_record(struct client *cl, const char *rec,
         return -2;
     }
 
-    mergekey_norm = get_mergekey(xdoc, cl, record_no, service, nmem);
+    mergekey_norm = get_mergekey(xdoc, cl, record_no, service, nmem,
+        se->mergekey);
     if (!mergekey_norm)
     {
         session_log(se, YLOG_WARN, "Got no mergekey");
@@ -1677,7 +1735,7 @@ int ingest_record(struct client *cl, const char *rec,
         return -1;
     }
     session_enter(se, "ingest_record");
-    if (client_get_session(cl) == se)
+    if (client_get_session(cl) == se && se->relevance)
         ret = ingest_to_cluster(cl, xdoc, root, record_no, mergekey_norm);
     session_leave(se, "ingest_record");
 
@@ -1843,6 +1901,9 @@ static int ingest_to_cluster(struct client *cl,
     struct record_cluster *cluster;
     struct record_metadata **metadata0;
     struct session_database *sdb = client_get_database(cl);
+    NMEM ingest_nmem = 0;
+    char **rank_values = 0;
+    int rank_num = 0;
     struct record *record = record_create(se->nmem,
                                           service->num_metadata,
                                           service->num_sortkeys, cl,
@@ -1950,6 +2011,14 @@ static int ingest_to_cluster(struct client *cl,
     memcpy(metadata0, cluster->metadata,
            sizeof(*metadata0) * service->num_metadata);
 
+    ingest_nmem = nmem_create();
+    if (se->rank)
+    {
+        yaz_log(YLOG_LOG, "local in sort : %s", se->rank);
+        nmem_strsplit_escape2(ingest_nmem, ",", se->rank, &rank_values,
+                              &rank_num, 1, '\\', 1);
+    }
+
     // now parsing XML record and adding data to cluster or record metadata
     for (n = root->children; n; n = n->next)
     {
@@ -1970,8 +2039,8 @@ static int ingest_to_cluster(struct client *cl,
             struct record_metadata *rec_md = 0;
             int md_field_id = -1;
             int sk_field_id = -1;
-            const char *rank;
-            xmlChar *xml_rank;
+            const char *rank = 0;
+            xmlChar *xml_rank = 0;
 
             type = xmlGetProp(n, (xmlChar *) "type");
             value = xmlNodeListGetString(xdoc, n->children, 1);
@@ -2001,8 +2070,28 @@ static int ingest_to_cluster(struct client *cl,
             if (!rec_md)
                 continue;
 
-            xml_rank = xmlGetProp(n, (xmlChar *) "rank");
-            rank = xml_rank ? (const char *) xml_rank : ser_md->rank;
+            if (rank_num)
+            {
+                int i;
+                for (i = 0; i < rank_num; i++)
+                {
+                    const char *val = rank_values[i];
+                    const char *cp = strchr(val, '=');
+                    if (!cp)
+                        continue;
+                    if ((cp - val) == strlen((const char *) type)
+                        && !memcmp(val, type, cp - val))
+                    {
+                        rank = cp + 1;
+                        break;
+                    }
+                }
+            }
+            else
+            {
+                xml_rank = xmlGetProp(n, (xmlChar *) "rank");
+                rank = xml_rank ? (const char *) xml_rank : ser_md->rank;
+            }
 
             wheretoput = &cluster->metadata[md_field_id];
 
@@ -2141,6 +2230,7 @@ static int ingest_to_cluster(struct client *cl,
     if (value)
         xmlFree(value);
 
+    nmem_destroy(ingest_nmem);
     xfree(metadata0);
     relevance_donerecord(se->relevance, cluster);
     se->total_records++;
@@ -2155,7 +2245,7 @@ void session_log(struct session *s, int level, const char *fmt, ...)
     va_start(ap, fmt);
 
     yaz_vsnprintf(buf, sizeof(buf)-30, fmt, ap);
-    yaz_log(level, "Session %u: %s", s ? s->session_id : "-", buf);
+    yaz_log(level, "Session %u: %s", s ? s->session_id : 0, buf);
 
     va_end(ap);
 }