Only reseting on position, when not done before. Otherwise it would reset on every...
[pazpar2-moved-to-github.git] / src / session.c
index f7f687d..6ac2276 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of Pazpar2.
-   Copyright (C) 2006-2011 Index Data
+   Copyright (C) 2006-2012 Index Data
 
 Pazpar2 is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -76,7 +76,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include "settings.h"
 #include "normalize7bit.h"
 
-#define TERMLIST_HIGH_SCORE 25
+#include <libxml/tree.h>
 
 #define MAX_CHUNK 15
 
@@ -229,13 +229,12 @@ void add_facet(struct session *s, const char *type, const char *value, int count
             }
             
             s->termlists[i].name = nmem_strdup(s->nmem, type);
-            s->termlists[i].termlist 
-                = termlist_create(s->nmem, TERMLIST_HIGH_SCORE);
+            s->termlists[i].termlist = termlist_create(s->nmem);
             s->num_termlists = i + 1;
         }
         
 #if 0
-        session_log(s, YLOG_DEBUG, "Facets for %s: %s norm:%s (%d)", type, value, wrbuf_cstr(facet_wrbuf), count);
+        session_log(s, YLOG_LOG, "Facets for %s: %s norm:%s (%d)", type, value, wrbuf_cstr(facet_wrbuf), count);
 #endif
         termlist_insert(s->termlists[i].termlist, wrbuf_cstr(display_wrbuf),
                         wrbuf_cstr(facet_wrbuf), count);
@@ -406,40 +405,43 @@ const char *session_setting_oneval(struct session_database *db, int offset)
 // setting. However, this is not a realistic use scenario.
 static int prepare_map(struct session *se, struct session_database *sdb)
 {
-    const char *s;
-
-    if (sdb->settings && sdb->settings[PZ_XSLT] && !sdb->map &&
-        (s = session_setting_oneval(sdb, PZ_XSLT)))        
+    if (sdb->settings && !sdb->map)
     {
-        char auto_stylesheet[256];
+        const char *s;
 
-        if (!strcmp(s, "auto"))
+        if (sdb->settings[PZ_XSLT] &&
+            (s = session_setting_oneval(sdb, PZ_XSLT)))        
         {
-            const char *request_syntax = session_setting_oneval(
-                sdb, PZ_REQUESTSYNTAX);
-            if (request_syntax)
+            char auto_stylesheet[256];
+            
+            if (!strcmp(s, "auto"))
             {
-                char *cp;
-                yaz_snprintf(auto_stylesheet, sizeof(auto_stylesheet),
-                             "%s.xsl", request_syntax);
-                for (cp = auto_stylesheet; *cp; cp++)
+                const char *request_syntax = session_setting_oneval(
+                    sdb, PZ_REQUESTSYNTAX);
+                if (request_syntax)
                 {
-                    /* deliberately only consider ASCII */
-                    if (*cp > 32 && *cp < 127)
-                        *cp = tolower(*cp);
+                    char *cp;
+                    yaz_snprintf(auto_stylesheet, sizeof(auto_stylesheet),
+                                 "%s.xsl", request_syntax);
+                    for (cp = auto_stylesheet; *cp; cp++)
+                    {
+                        /* deliberately only consider ASCII */
+                        if (*cp > 32 && *cp < 127)
+                            *cp = tolower(*cp);
+                    }
+                    s = auto_stylesheet;
+                }
+                else
+                {
+                    session_log(se, YLOG_WARN,
+                                "No pz:requestsyntax for auto stylesheet");
                 }
-                s = auto_stylesheet;
-            }
-            else
-            {
-                session_log(se, YLOG_WARN,
-                            "No pz:requestsyntax for auto stylesheet");
             }
+            sdb->map = normalize_cache_get(se->normalize_cache,
+                                           se->service, s);
+            if (!sdb->map)
+                return -1;
         }
-        sdb->map = normalize_cache_get(se->normalize_cache,
-                                       se->service, s);
-        if (!sdb->map)
-            return -1;
     }
     return 0;
 }
@@ -617,42 +619,71 @@ int session_is_preferred_clients_ready(struct session *s)
     return res == 0;
 }
 
-void session_sort(struct session *se, const char *field, int increasing)
+static void session_clear_set(struct session *se,
+                              const char *sort_field, int increasing, int position)
+{
+    reclist_destroy(se->reclist);
+    se->reclist = 0;
+    if (nmem_total(se->nmem))
+        session_log(se, YLOG_DEBUG, "NMEN operation usage %zd",
+                    nmem_total(se->nmem));
+    nmem_reset(se->nmem);
+    se->total_records = se->total_merged = 0;
+    se->num_termlists = 0;
+    
+    /* reset list of sorted results and clear to relevance search */
+    se->sorted_results = nmem_malloc(se->nmem, sizeof(*se->sorted_results));
+    se->sorted_results->field = nmem_strdup(se->nmem, sort_field);
+    se->sorted_results->increasing = increasing;
+    se->sorted_results->position = position;
+    se->sorted_results->next = 0;
+    
+    se->reclist = reclist_create(se->nmem);
+}
+
+void session_sort(struct session *se, const char *field, int increasing,
+                  int position)
 {
     struct session_sorted_results *sr;
     struct client_list *l;
 
     session_enter(se);
 
+    yaz_log(YLOG_LOG, "session_sort field=%s increasing=%d position=%d", field, increasing, position);
     /* see if we already have sorted for this critieria */
+    /* TODO I do not see the point in saving all previous sorts. Dont we re-sort anyway ? */
     for (sr = se->sorted_results; sr; sr = sr->next)
     {
-        if (!strcmp(field, sr->field) && increasing == sr->increasing)
+        if (!strcmp(field, sr->field) && increasing == sr->increasing && sr->position == position)
             break;
     }
     if (sr)
     {
-        yaz_log(YLOG_LOG, "search_sort: field=%s increasing=%d already fetched",
-                field, increasing);
+        session_log(se, YLOG_DEBUG, "search_sort: field=%s increasing=%d position=%d already fetched",
+                    field, increasing, position);
         session_leave(se);
         return;
     }
-    yaz_log(YLOG_LOG, "search_sort: field=%s increasing=%d must fetch",
-            field, increasing);
+    if (position)
+    {
+        session_clear_set(se, field, increasing, position);
+    }
+
+    session_log(se, YLOG_DEBUG, "search_sort: field=%s increasing=%d position=%d must fetch",
+                field, increasing, position);
     sr = nmem_malloc(se->nmem, sizeof(*sr));
     sr->field = nmem_strdup(se->nmem, field);
     sr->increasing = increasing;
+    sr->position = position;
     sr->next = se->sorted_results;
     se->sorted_results = sr;
-    
+        
     for (l = se->clients_active; l; l = l->next)
     {
         struct client *cl = l->client;
-        struct timeval tval;
-        if (client_prep_connection(cl, se->service->z3950_operation_timeout,
-                                   se->service->z3950_session_timeout,
-                                   se->service->server->iochan_man,
-                                   &tval))
+        if (client_get_state(cl) == Client_Connecting ||
+            client_get_state(cl) == Client_Idle ||
+            client_get_state(cl) == Client_Working)
             client_start_search(cl);
     }
     session_leave(se);
@@ -665,7 +696,8 @@ enum pazpar2_error_code session_search(struct session *se,
                                        const char *filter,
                                        const char *limit,
                                        const char **addinfo,
-                                       const char *sort_field, int increasing)
+                                       const char *sort_field,
+                                       int increasing)
 {
     int live_channels = 0;
     int no_working = 0;
@@ -685,27 +717,16 @@ enum pazpar2_error_code session_search(struct session *se,
         session_reset_active_clients(se, 0);
     
     session_enter(se);
-    reclist_destroy(se->reclist);
-    se->reclist = 0;
     se->settings_modified = 0;
+    session_clear_set(se, sort_field, increasing, 0); /* hardcoded position */
     relevance_destroy(&se->relevance);
-    nmem_reset(se->nmem);
-    se->total_records = se->total_merged = 0;
-    se->num_termlists = 0;
 
-    /* reset list of sorted results and clear to relevance search */
-    se->sorted_results = nmem_malloc(se->nmem, sizeof(*se->sorted_results));
-    se->sorted_results->field = nmem_strdup(se->nmem, sort_field);
-    se->sorted_results->increasing = increasing;
-    se->sorted_results->next = 0;
-    
     live_channels = select_targets(se, filter);
     if (!live_channels)
     {
         session_leave(se);
         return PAZPAR2_NO_TARGETS;
     }
-    se->reclist = reclist_create(se->nmem);
 
     yaz_gettimeofday(&tval);
     
@@ -732,7 +753,7 @@ enum pazpar2_error_code session_search(struct session *se,
             continue;
 
         parse_ret = client_parse_query(cl, query, facet_limits, startrecs,
-            maxrecs);
+                                       maxrecs, se->service->ccl_bibset);
         if (parse_ret == -1)
             no_failed_query++;
         else if (parse_ret == -2)
@@ -746,12 +767,12 @@ enum pazpar2_error_code session_search(struct session *se,
                                        &tval);
             if (parse_ret == 1 && r == 2)
             {
-                session_log(se, YLOG_LOG, "client REUSE %s", client_get_id(cl));
+                session_log(se, YLOG_LOG, "client %s REUSE result", client_get_id(cl));
                 client_reingest(cl);
             }
-            else
+            else if (r)
             {
-                session_log(se, YLOG_LOG, "client NEW %s", client_get_id(cl));
+                session_log(se, YLOG_LOG, "client %s NEW search", client_get_id(cl));
                 client_start_search(cl);
             }
             no_working++;
@@ -775,7 +796,7 @@ enum pazpar2_error_code session_search(struct session *se,
         else
             return PAZPAR2_NO_TARGETS;
     }
-    yaz_log(YLOG_LOG, "session_start_search done");
+    session_log(se, YLOG_LOG, "session_start_search done");
     return PAZPAR2_NO_ERROR;
 }
 
@@ -821,8 +842,7 @@ void session_init_databases(struct session *se)
 static struct session_database *load_session_database(struct session *se, 
                                                       char *id)
 {
-    struct database *db = new_database(id, se->session_nmem);
-
+    struct database *db = new_database_inherit_settings(id, se->session_nmem, se->service->settings);
     session_init_databases_fun((void*) se, db);
 
     // New sdb is head of se->databases list
@@ -886,6 +906,10 @@ void session_destroy(struct session *se)
     normalize_cache_destroy(se->normalize_cache);
     relevance_destroy(&se->relevance);
     reclist_destroy(se->reclist);
+    if (nmem_total(se->nmem))
+        session_log(se, YLOG_DEBUG, "NMEN operation usage %zd", nmem_total(se->nmem));
+    if (nmem_total(se->session_nmem))
+        session_log(se, YLOG_DEBUG, "NMEN session usage %zd", nmem_total(se->session_nmem));
     nmem_destroy(se->nmem);
     service_destroy(se->service);
     yaz_mutex_destroy(&se->session_mutex);
@@ -963,7 +987,9 @@ static struct hitsbytarget *hitsbytarget_nb(struct session *se,
         res[*count].id = client_get_id(cl);
         res[*count].name = *name ? name : "Unknown";
         res[*count].hits = client_get_hits(cl);
+        res[*count].approximation = client_get_approximation(cl);
         res[*count].records = client_get_num_records(cl);
+        res[*count].filtered = client_get_num_records_filtered(cl);
         res[*count].diagnostic =
             client_get_diagnostic(cl, &res[*count].addinfo);
         res[*count].state = client_get_state_str(cl);
@@ -987,23 +1013,6 @@ struct hitsbytarget *get_hitsbytarget(struct session *se, int *count, NMEM nmem)
     session_leave(se);
     return p;
 }
-    
-struct termlist_score **get_termlist_score(struct session *se,
-                                           const char *name, int *num)
-{
-    int i;
-    struct termlist_score **tl = 0;
-
-    session_enter(se);
-    for (i = 0; i < se->num_termlists; i++)
-        if (!strcmp((const char *) se->termlists[i].name, name))
-        {
-            tl = termlist_highscore(se->termlists[i].termlist, num);
-            break;
-        }
-    session_leave(se);
-    return tl;
-}
 
 // Compares two hitsbytarget nodes by hitcount
 static int cmp_ht(const void *p1, const void *p2)
@@ -1013,14 +1022,25 @@ static int cmp_ht(const void *p1, const void *p2)
     return h2->hits - h1->hits;
 }
 
+// Compares two hitsbytarget nodes by hitcount
+static int cmp_ht_approx(const void *p1, const void *p2)
+{
+    const struct hitsbytarget *h1 = p1;
+    const struct hitsbytarget *h2 = p2;
+    return h2->approximation - h1->approximation;
+}
+
 static int targets_termlist_nb(WRBUF wrbuf, struct session *se, int num,
-                               NMEM nmem)
+                               NMEM nmem, int version)
 {
     struct hitsbytarget *ht;
     int count, i;
 
     ht = hitsbytarget_nb(se, &count, nmem);
-    qsort(ht, count, sizeof(struct hitsbytarget), cmp_ht);
+    if (version >= 2)
+        qsort(ht, count, sizeof(struct hitsbytarget), cmp_ht_approx);
+    else
+        qsort(ht, count, sizeof(struct hitsbytarget), cmp_ht);
     for (i = 0; i < count && i < num && ht[i].hits > 0; i++)
     {
 
@@ -1041,7 +1061,14 @@ static int targets_termlist_nb(WRBUF wrbuf, struct session *se, int num,
         
         wrbuf_printf(wrbuf, "<frequency>" ODR_INT_PRINTF "</frequency>\n",
                      ht[i].hits);
-        
+
+        if (version >= 2) {
+            // Should not print if we know it isn't a approximation.
+            wrbuf_printf(wrbuf, "<approximation>" ODR_INT_PRINTF "</approximation>\n", ht[i].approximation);
+            wrbuf_printf(wrbuf, "<records>%d</records>\n", ht[i].records - ht[i].filtered);
+            wrbuf_printf(wrbuf, "<filtered>%d</filtered>\n", ht[i].filtered);
+        }
+
         wrbuf_puts(wrbuf, "<state>");
         wrbuf_xmlputs(wrbuf, ht[i].state);
         wrbuf_puts(wrbuf, "</state>\n");
@@ -1054,7 +1081,7 @@ static int targets_termlist_nb(WRBUF wrbuf, struct session *se, int num,
 }
 
 void perform_termlist(struct http_channel *c, struct session *se,
-                      const char *name, int num)
+                      const char *name, int num, int version)
 {
     int i, j;
     NMEM nmem_tmp = nmem_create();
@@ -1071,7 +1098,8 @@ void perform_termlist(struct http_channel *c, struct session *se,
     for (j = 0; j < num_names; j++)
     {
         const char *tname;
-        
+        int must_generate_empty = 1; /* bug 5350 */
+
         for (i = 0; i < se->num_termlists; i++)
         {
             tname = se->termlists[i].name;
@@ -1083,8 +1111,10 @@ void perform_termlist(struct http_channel *c, struct session *se,
                 wrbuf_puts(c->wrbuf, "<list name=\"");
                 wrbuf_xmlputs(c->wrbuf, tname);
                 wrbuf_puts(c->wrbuf, "\">\n");
+                must_generate_empty = 0;
 
-                p = termlist_highscore(se->termlists[i].termlist, &len);
+                p = termlist_highscore(se->termlists[i].termlist, &len,
+                                       nmem_tmp);
                 if (p)
                 {
                     int i;
@@ -1115,8 +1145,15 @@ void perform_termlist(struct http_channel *c, struct session *se,
             wrbuf_xmlputs(c->wrbuf, tname);
             wrbuf_puts(c->wrbuf, "\">\n");
 
-            targets_termlist_nb(c->wrbuf, se, num, c->nmem);
+            targets_termlist_nb(c->wrbuf, se, num, c->nmem, version);
             wrbuf_puts(c->wrbuf, "</list>\n");
+            must_generate_empty = 0;
+        }
+        if (must_generate_empty)
+        {
+            wrbuf_puts(c->wrbuf, "<list name=\"");
+            wrbuf_xmlputs(c->wrbuf, names[j]);
+            wrbuf_puts(c->wrbuf, "\"/>\n");
         }
     }
     session_leave(se);
@@ -1171,7 +1208,7 @@ void show_single_stop(struct session *se, struct record_cluster *rec)
 
 struct record_cluster **show_range_start(struct session *se,
                                          struct reclist_sortparms *sp, 
-                                         int start, int *num, int *total, Odr_int *sumhits)
+                                         int start, int *num, int *total, Odr_int *sumhits, Odr_int *approx_hits)
 {
     struct record_cluster **recs;
     struct reclist_sortparms *spp;
@@ -1185,7 +1222,8 @@ struct record_cluster **show_range_start(struct session *se,
     {
         *num = 0;
         *total = 0;
-        *sumhits = 0;
+        *sumhits = 0;        
+        *approx_hits = 0;
         recs = 0;
     }
     else
@@ -1204,9 +1242,11 @@ struct record_cluster **show_range_start(struct session *se,
         *total = reclist_get_num_records(se->reclist);
 
         *sumhits = 0;
-        for (l = se->clients_active; l; l = l->next)
+        *approx_hits = 0;
+        for (l = se->clients_active; l; l = l->next) {
             *sumhits += client_get_hits(l->client);
-        
+            *approx_hits += client_get_approximation(l->client);
+        }
         for (i = 0; i < start; i++)
             if (!reclist_read_record(se->reclist))
             {
@@ -1534,8 +1574,7 @@ int ingest_record(struct client *cl, const char *rec,
     
     if (!check_record_filter(root, sdb))
     {
-        session_log(se, YLOG_LOG, "Filtered out record no %d from %s",
-                    record_no, sdb->database->id);
+        session_log(se, YLOG_LOG, "Filtered out record no %d from %s", record_no, sdb->database->id);
         xmlFreeDoc(xdoc);
         return -2;
     }
@@ -1556,6 +1595,7 @@ int ingest_record(struct client *cl, const char *rec,
     return ret;
 }
 
+// Skip record on non-zero
 static int check_limit_local(struct client *cl,
                              struct record *record,
                              int record_no)
@@ -1574,9 +1614,7 @@ static int check_limit_local(struct client *cl,
         char **values = 0;
         int i, num_v = 0;
         
-        const char *name =
-            client_get_facet_limit_local(cl, sdb, &l, nmem_tmp, &num_v,
-                                         &values);
+        const char *name = client_get_facet_limit_local(cl, sdb, &l, nmem_tmp, &num_v, &values);
         if (!name)
             break;
         
@@ -1588,7 +1626,7 @@ static int check_limit_local(struct client *cl,
         }
         ser_md = &service->metadata[md_field_id];
         rec_md = record->metadata[md_field_id];
-        yaz_log(YLOG_LOG, "check limit local %s", name);
+        yaz_log(YLOG_DEBUG, "check limit local %s", name);
         for (i = 0; i < num_v; )
         {
             if (rec_md)
@@ -1603,10 +1641,10 @@ static int check_limit_local(struct client *cl,
                 }
                 else
                 {
-                    yaz_log(YLOG_LOG, "cmp: '%s' '%s'",
-                            rec_md->data.text.disp, values[i]);
+                    yaz_log(YLOG_DEBUG, "cmp: '%s' '%s'", rec_md->data.text.disp, values[i]);
                     if (!strcmp(rec_md->data.text.disp, values[i]))
                     {
+                        // Value equals, should not be filtered.
                         break;
                     }
                 }
@@ -1618,6 +1656,7 @@ static int check_limit_local(struct client *cl,
                 i++;
             }
         }
+        // At end , not match
         if (i == num_v)
         {
             skip_record = 1;
@@ -1756,6 +1795,8 @@ static int ingest_to_cluster(struct client *cl,
             struct record_metadata *rec_md = 0;
             int md_field_id = -1;
             int sk_field_id = -1;
+            const char *rank;
+            xmlChar *xml_rank;
             
             type = xmlGetProp(n, (xmlChar *) "type");
             value = xmlNodeListGetString(xdoc, n->children, 1);
@@ -1769,7 +1810,7 @@ static int ingest_to_cluster(struct client *cl,
                 continue;
             
             ser_md = &service->metadata[md_field_id];
-            
+
             if (ser_md->sortkey_offset >= 0)
             {
                 sk_field_id = ser_md->sortkey_offset;
@@ -1782,6 +1823,9 @@ static int ingest_to_cluster(struct client *cl,
             if (!rec_md)
                 continue;
 
+            xml_rank = xmlGetProp(n, (xmlChar *) "rank");
+            rank = xml_rank ? (const char *) xml_rank : ser_md->rank;
+
             wheretoput = &cluster->metadata[md_field_id];
 
             // and polulate with data:
@@ -1866,12 +1910,12 @@ static int ingest_to_cluster(struct client *cl,
                 }
             }
 
-
             // ranking of _all_ fields enabled ... 
-            if (ser_md->rank)
+            if (rank)
+            {
                 relevance_countwords(se->relevance, cluster, 
-                                     (char *) value, ser_md->rank,
-                                     ser_md->name);
+                                     (char *) value, rank, ser_md->name);
+            }
 
             // construct facets ... unless the client already has reported them
             if (ser_md->termlist && !client_has_facet(cl, (char *) type))
@@ -1893,6 +1937,8 @@ static int ingest_to_cluster(struct client *cl,
             }
 
             // cleaning up
+            if (xml_rank)
+                xmlFree(xml_rank);
             xmlFree(type);
             xmlFree(value);
             type = value = 0;