zoom filter: honor cclmap_*-fields
[metaproxy-moved-to-github.git] / src / filter_zoom.cpp
index 6cabaf9..ea3df01 100644 (file)
@@ -23,8 +23,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <metaproxy/util.hpp>
 #include "torus.hpp"
 
+#include <libxslt/xsltutils.h>
+#include <libxslt/transform.h>
+
 #include <boost/thread/mutex.hpp>
 #include <boost/thread/condition.hpp>
+#include <yaz/ccl.h>
+#include <yaz/oid_db.h>
 #include <yaz/diagbib1.h>
 #include <yaz/log.h>
 #include <yaz/zgdu.h>
@@ -35,6 +40,21 @@ namespace yf = mp::filter;
 
 namespace metaproxy_1 {
     namespace filter {
+        struct Zoom::Searchable {
+            std::string database;
+            std::string target;
+            std::string query_encoding;
+            std::string sru;
+            std::string request_syntax;
+            std::string element_set;
+            std::string record_encoding;
+            std::string transform_xsl_fname;
+            bool use_turbomarc;
+            bool piggyback;
+            CCL_bibset ccl_bibset;
+            Searchable();
+            ~Searchable();
+        };
         class Zoom::Backend {
             friend class Impl;
             friend class Frontend;
@@ -42,36 +62,40 @@ namespace metaproxy_1 {
             ZOOM_connection m_connection;
             ZOOM_resultset m_resultset;
             std::string m_frontend_database;
+            SearchablePtr sptr;
+            xsltStylesheetPtr xsp;
         public:
-            Backend();
+            Backend(SearchablePtr sptr);
             ~Backend();
             void connect(std::string zurl, int *error, const char **addinfo);
             void search_pqf(const char *pqf, Odr_int *hits,
                             int *error, const char **addinfo);
+            void present(Odr_int start, Odr_int number, ZOOM_record *recs,
+                         int *error, const char **addinfo);
             void set_option(const char *name, const char *value);
             int get_error(const char **addinfo);
         };
-        struct Zoom::Searchable {
-            std::string m_database;
-            std::string m_target;
-            std::string query_encoding;
-            std::string sru;
-            Searchable(std::string norm_db, std::string target);
-            ~Searchable();
-        };
         class Zoom::Frontend {
             friend class Impl;
             Impl *m_p;
             bool m_is_virtual;
             bool m_in_use;
             yazpp_1::GDU m_init_gdu;
-            std::list<BackendPtr> m_backend_list;
+            BackendPtr m_backend;
             void handle_package(mp::Package &package);
             void handle_search(mp::Package &package);
             void handle_present(mp::Package &package);
             BackendPtr get_backend_from_databases(std::string &database,
                                                   int *error,
                                                   const char **addinfo);
+            Z_Records *get_records(Odr_int start,
+                                   Odr_int number_to_present,
+                                   int *error,
+                                   const char **addinfo,
+                                   Odr_int *number_of_records_returned,
+                                   ODR odr, BackendPtr b,
+                                   Odr_oid *preferredRecordSyntax,
+                                   const char *element_set_name);
         public:
             Frontend(Impl *impl);
             ~Frontend();
@@ -88,7 +112,7 @@ namespace metaproxy_1 {
             void release_frontend(mp::Package &package);
             void parse_torus(const xmlNode *ptr);
 
-            std::list<Zoom::Searchable>m_searchables;
+            std::list<Zoom::SearchablePtr>m_searchables;
 
             std::map<mp::Session, FrontendPtr> m_clients;            
             boost::mutex m_mutex;
@@ -121,14 +145,17 @@ void yf::Zoom::process(mp::Package &package) const
 
 // define Implementation stuff
 
-yf::Zoom::Backend::Backend()
+yf::Zoom::Backend::Backend(SearchablePtr ptr) : sptr(ptr)
 {
     m_connection = ZOOM_connection_create(0);
     m_resultset = 0;
+    xsp = 0;
 }
 
 yf::Zoom::Backend::~Backend()
 {
+    if (xsp)
+        xsltFreeStylesheet(xsp);
     ZOOM_connection_destroy(m_connection);
     ZOOM_resultset_destroy(m_resultset);
 }
@@ -138,25 +165,32 @@ void yf::Zoom::Backend::connect(std::string zurl,
 {
     ZOOM_connection_connect(m_connection, zurl.c_str(), 0);
     *error = ZOOM_connection_error(m_connection, 0, addinfo);
-    yaz_log(YLOG_LOG, "ZOOM_connection_connect: error: %d", *error);
 }
 
 void yf::Zoom::Backend::search_pqf(const char *pqf, Odr_int *hits,
                                    int *error, const char **addinfo)
 {
-    yaz_log(YLOG_LOG, "ZOOM_connection_search_pqf pqf=%s", pqf);
     m_resultset = ZOOM_connection_search_pqf(m_connection, pqf);
     *error = ZOOM_connection_error(m_connection, 0, addinfo);
-    yaz_log(YLOG_LOG, "ZOOM_connection_search_pqf: error: %d", *error);
     if (*error == 0)
         *hits = ZOOM_resultset_size(m_resultset);
     else
         *hits = 0;
 }
 
+void yf::Zoom::Backend::present(Odr_int start, Odr_int number,
+                                ZOOM_record *recs,
+                                int *error, const char **addinfo)
+{
+    ZOOM_resultset_records(m_resultset, recs, start, number);
+    *error = ZOOM_connection_error(m_connection, 0, addinfo);
+}
+
 void yf::Zoom::Backend::set_option(const char *name, const char *value)
 {
     ZOOM_connection_option_set(m_connection, name, value);
+    if (m_resultset)
+        ZOOM_resultset_option_set(m_resultset, name, value);
 }
 
 int yf::Zoom::Backend::get_error(const char **addinfo)
@@ -164,14 +198,16 @@ int yf::Zoom::Backend::get_error(const char **addinfo)
     return ZOOM_connection_error(m_connection, 0, addinfo);
 }
 
-yf::Zoom::Searchable::Searchable(std::string database, 
-                                 std::string target)
-    : m_database(database), m_target(target)
+yf::Zoom::Searchable::Searchable()
 {
+    piggyback = true;
+    use_turbomarc = false;
+    ccl_bibset = ccl_qual_mk();
 }
 
 yf::Zoom::Searchable::~Searchable()
 {
+    ccl_qual_rm(&ccl_bibset);
 }
 
 yf::Zoom::Frontend::Frontend(Impl *impl) : 
@@ -253,11 +289,8 @@ void yf::Zoom::Impl::parse_torus(const xmlNode *ptr1)
                     continue;
                 if (!strcmp((const char *) ptr2->name, "layer"))
                 {
-                    std::string database;
-                    std::string target;
-                    std::string route;
-                    std::string sru;
-                    std::string query_encoding;
+                    Zoom::SearchablePtr s(new Searchable);
+
                     const xmlNode *ptr3 = ptr2;
                     for (ptr3 = ptr3->children; ptr3; ptr3 = ptr3->next)
                     {
@@ -265,32 +298,69 @@ void yf::Zoom::Impl::parse_torus(const xmlNode *ptr1)
                             continue;
                         if (!strcmp((const char *) ptr3->name, "id"))
                         {
-                            database = mp::xml::get_text(ptr3);
+                            s->database = mp::xml::get_text(ptr3);
                         }
                         else if (!strcmp((const char *) ptr3->name, "zurl"))
                         {
-                            target = mp::xml::get_text(ptr3);
+                            s->target = mp::xml::get_text(ptr3);
                         }
                         else if (!strcmp((const char *) ptr3->name, "sru"))
                         {
-                            sru = mp::xml::get_text(ptr3);
+                            s->sru = mp::xml::get_text(ptr3);
                         }
                         else if (!strcmp((const char *) ptr3->name,
                                          "queryEncoding"))
                         {
-                            query_encoding = mp::xml::get_text(ptr3);
+                            s->query_encoding = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "piggyback"))
+                        {
+                            s->piggyback = mp::xml::get_bool(ptr3, true);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "requestSyntax"))
+                        {
+                            s->request_syntax = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "elementSet"))
+                        {
+                            s->element_set = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "recordEncoding"))
+                        {
+                            s->record_encoding = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "transform"))
+                        {
+                            s->transform_xsl_fname = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "useTurboMarc"))
+                        {
+                            yaz_log(YLOG_LOG, "seeing useTurboMarc");
+                            s->use_turbomarc = mp::xml::get_bool(ptr3, false);
+                            yaz_log(YLOG_LOG, "value=%s",
+                                    s->use_turbomarc ? "1" : "0");
+                                    
+                        }
+                        else if (!strncmp((const char *) ptr3->name,
+                                          "cclmap_", 7))
+                        {
+                            std::string value = mp::xml::get_text(ptr3);
+                            ccl_qual_fitem(s->ccl_bibset, value.c_str(),
+                                           (const char *) ptr3->name + 7);
                         }
                     }
-                    if (database.length() && target.length())
+                    if (s->database.length() && s->target.length())
                     {
-                        yaz_log(YLOG_LOG, "add db=%s target=%s", 
-                                database.c_str(), target.c_str());
-                        Zoom::Searchable searchable(
-                            mp::util::database_name_normalize(database),
-                            target);
-                        searchable.query_encoding = query_encoding;
-                        searchable.sru = sru;
-                        m_searchables.push_back(searchable);
+                        yaz_log(YLOG_LOG, "add db=%s target=%s turbomarc=%s", 
+                                s->database.c_str(), s->target.c_str(),
+                                s->use_turbomarc ? "1" : "0");
+                        m_searchables.push_back(s);
                     }
                 }
             }
@@ -344,21 +414,17 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
     std::string &database, int *error, const char **addinfo)
 {
     std::list<BackendPtr>::const_iterator map_it;
-    map_it = m_backend_list.begin();
-    for (; map_it != m_backend_list.end(); map_it++)
-        if ((*map_it)->m_frontend_database == database)
-            return *map_it;
+    if (m_backend && m_backend->m_frontend_database == database)
+        return m_backend;
 
-    std::list<Zoom::Searchable>::const_iterator map_s =
+    std::list<Zoom::SearchablePtr>::iterator map_s =
         m_p->m_searchables.begin();
 
     std::string c_db = mp::util::database_name_normalize(database);
 
     while (map_s != m_p->m_searchables.end())
     {
-        yaz_log(YLOG_LOG, "consider db=%s map db=%s",
-                database.c_str(), map_s->m_database.c_str());
-        if (c_db.compare(map_s->m_database) == 0)
+        if (c_db.compare((*map_s)->database) == 0)
             break;
         map_s++;
     }
@@ -369,28 +435,225 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
         BackendPtr b;
         return b;
     }
-    BackendPtr b(new Backend);
 
-    if (map_s->query_encoding.length())
-        b->set_option("rpnCharset", map_s->query_encoding.c_str());
+    xsltStylesheetPtr xsp = 0;
+    if ((*map_s)->transform_xsl_fname.length())
+    {
+        xmlDoc *xsp_doc = xmlParseFile((*map_s)->transform_xsl_fname.c_str());
+        if (!xsp_doc)
+        {
+            *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
+            *addinfo = "xmlParseFile failed";
+            BackendPtr b;
+            return b;
+        }
+        xsp = xsltParseStylesheetDoc(xsp_doc);
+        if (!xsp)
+        {
+            *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
+            *addinfo = "xsltParseStylesheetDoc failed";
+            BackendPtr b;
+            xmlFreeDoc(xsp_doc);
+            return b;
+        }
+    }
+
+    SearchablePtr sptr = *map_s;
+
+    m_backend.reset();
+
+    BackendPtr b(new Backend(sptr));
+
+    b->xsp = xsp;
+    b->m_frontend_database = database;
+
+    if (sptr->query_encoding.length())
+        b->set_option("rpnCharset", sptr->query_encoding.c_str());
 
     std::string url;
-    if (map_s->sru.length())
+    if (sptr->sru.length())
     {
-        url = "http://" + map_s->m_target;
-        b->set_option("sru", map_s->sru.c_str());
+        url = "http://" + sptr->target;
+        b->set_option("sru", sptr->sru.c_str());
     }
     else
-        url = map_s->m_target;
+        url = sptr->target;
 
     b->connect(url, error, addinfo);
     if (*error == 0)
     {
-        m_backend_list.push_back(b);
+        m_backend = b;
     }
     return b;
 }
 
+Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
+                                           Odr_int number_to_present,
+                                           int *error,
+                                           const char **addinfo,
+                                           Odr_int *number_of_records_returned,
+                                           ODR odr,
+                                           BackendPtr b,
+                                           Odr_oid *preferredRecordSyntax,
+                                           const char *element_set_name)
+{
+    *number_of_records_returned = 0;
+    Z_Records *records = 0;
+    bool enable_pz2_transform = false;
+
+    if (start < 0 || number_to_present <= 0)
+        return records;
+    
+    if (number_to_present > 10000)
+        number_to_present = 10000;
+    
+    ZOOM_record *recs = (ZOOM_record *)
+        odr_malloc(odr, number_to_present * sizeof(*recs));
+
+    char oid_name_str[OID_STR_MAX];
+    const char *syntax_name = 0;
+
+    if (preferredRecordSyntax)
+    {
+        if (!oid_oidcmp(preferredRecordSyntax, yaz_oid_recsyn_xml)
+            && !strcmp(element_set_name, "pz2"))
+        {
+            if (b->sptr->request_syntax.length())
+            {
+                syntax_name = b->sptr->request_syntax.c_str();
+                enable_pz2_transform = true;
+            }
+        }
+        else
+        {
+            syntax_name =
+                yaz_oid_to_string_buf(preferredRecordSyntax, 0, oid_name_str);
+        }
+    }
+
+    yaz_log(YLOG_LOG, "enable_pz2_transform %s", enable_pz2_transform ?
+            "enabled" : "disabled");
+
+    b->set_option("preferredRecordSyntax", syntax_name);
+
+    if (enable_pz2_transform)
+    {
+        element_set_name = "F";
+        if (b->sptr->element_set.length())
+            element_set_name = b->sptr->element_set.c_str();
+    }
+
+    b->set_option("elementSetName", element_set_name);
+
+    b->present(start, number_to_present, recs, error, addinfo);
+
+    Odr_int i = 0;
+    if (!*error)
+    {
+        for (i = 0; i < number_to_present; i++)
+            if (!recs[i])
+                break;
+    }
+    if (i > 0)
+    {  // only return records if no error and at least one record
+        char *odr_database = odr_strdup(odr,
+                                        b->m_frontend_database.c_str());
+        Z_NamePlusRecordList *npl = (Z_NamePlusRecordList *)
+            odr_malloc(odr, sizeof(*npl));
+        *number_of_records_returned = i;
+        npl->num_records = i;
+        npl->records = (Z_NamePlusRecord **)
+            odr_malloc(odr, i * sizeof(*npl->records));
+        for (i = 0; i < number_to_present; i++)
+        {
+            Z_NamePlusRecord *npr = 0;
+            const char *addinfo;
+            int sur_error = ZOOM_record_error(recs[i], 0 /* msg */,
+                                              &addinfo, 0 /* diagset */);
+                
+            if (sur_error)
+            {
+                npr = zget_surrogateDiagRec(odr, odr_database, sur_error,
+                                            addinfo);
+            }
+            else if (enable_pz2_transform)
+            {
+                char rec_type_str[100];
+
+                strcpy(rec_type_str, b->sptr->use_turbomarc ?
+                       "txml" : "xml");
+                
+                // prevent buffer overflow ...
+                if (b->sptr->record_encoding.length() > 0 &&
+                    b->sptr->record_encoding.length() < 
+                    (sizeof(rec_type_str)-20))
+                {
+                    strcat(rec_type_str, "; charset=");
+                    strcat(rec_type_str, b->sptr->record_encoding.c_str());
+                }
+                
+                int rec_len;
+                const char *rec_buf = ZOOM_record_get(recs[i], rec_type_str,
+                                                      &rec_len);
+                if (rec_buf && b->xsp)
+                {
+                    xmlDoc *rec_doc = xmlParseMemory(rec_buf, rec_len);
+                    if (rec_doc)
+                    { 
+                        xmlDoc *rec_res;
+                        rec_res = xsltApplyStylesheet(b->xsp, rec_doc, 0);
+
+                        if (rec_res)
+                            xsltSaveResultToString((xmlChar **) &rec_buf, &rec_len,
+                                                   rec_res, b->xsp);
+                    }
+                }
+
+                if (rec_buf)
+                {
+                    npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr));
+                    npr->databaseName = odr_database;
+                    npr->which = Z_NamePlusRecord_databaseRecord;
+                    npr->u.databaseRecord =
+                        z_ext_record_xml(odr, rec_buf, rec_len);
+                }
+                else
+                {
+                    npr = zget_surrogateDiagRec(
+                        odr, odr_database, 
+                        YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS,
+                        rec_type_str);
+                }
+            }
+            else
+            {
+                Z_External *ext =
+                    (Z_External *) ZOOM_record_get(recs[i], "ext", 0);
+                if (ext)
+                {
+                    npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr));
+                    npr->databaseName = odr_database;
+                    npr->which = Z_NamePlusRecord_databaseRecord;
+                    npr->u.databaseRecord = ext;
+                }
+                else
+                {
+                    npr = zget_surrogateDiagRec(
+                        odr, odr_database, 
+                        YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS,
+                        "ZOOM_record, type ext");
+                }
+            }
+            npl->records[i] = npr;
+        }
+        records = (Z_Records*) odr_malloc(odr, sizeof(*records));
+        records->which = Z_Records_DBOSD;
+        records->u.databaseOrSurDiagnostics = npl;
+    }
+    return records;
+}
+    
+
 void yf::Zoom::Frontend::handle_search(mp::Package &package)
 {
     Z_GDU *gdu = package.request().get();
@@ -406,8 +669,8 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package)
         return;
     }
 
-    int error;
-    const char *addinfo;
+    int error = 0;
+    const char *addinfo = 0;
     std::string db(sr->databaseNames[0]);
     BackendPtr b = get_backend_from_databases(db, &error, &addinfo);
     if (error)
@@ -418,45 +681,121 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package)
         package.response() = apdu_res;
         return;
     }
+
+    b->set_option("setname", "default");
+
+    Odr_int hits = 0;
     Z_Query *query = sr->query;
     if (query->which == Z_Query_type_1 || query->which == Z_Query_type_101)
     {
+        // RPN
         WRBUF w = wrbuf_alloc();
         yaz_rpnquery_to_wrbuf(w, query->u.type_1);
-        Odr_int hits;
-        int error;
-        const char *addinfo = 0;
 
         b->search_pqf(wrbuf_cstr(w), &hits, &error, &addinfo);
+
+        wrbuf_destroy(w);
+    }
+    else if (query->which == Z_Query_type_2)
+    {
+        // CCL
+        WRBUF w = wrbuf_alloc();
+        wrbuf_write(w, (const char *) query->u.type_2->buf,
+                    query->u.type_2->len);
+        int cerror, cpos;
+        struct ccl_rpn_node *cn;
+        cn = ccl_find_str(b->sptr->ccl_bibset, wrbuf_cstr(w), &cerror, &cpos);
         wrbuf_destroy(w);
 
-        apdu_res = 
-            odr.create_searchResponse(
-                apdu_req, error, addinfo);
-        apdu_res->u.searchResponse->resultCount = odr_intdup(odr, hits);
-        package.response() = apdu_res;
+        if (!cn)
+        {
+            char *addinfo = odr_strdup(odr, ccl_err_msg(cerror));
+
+            apdu_res = 
+                odr.create_searchResponse(apdu_req, 
+                                          YAZ_BIB1_MALFORMED_QUERY,
+                                          addinfo);
+            package.response() = apdu_res;
+            return;
+        }
+        w = wrbuf_alloc();
+        ccl_pquery(w, cn);
+        
+        b->search_pqf(wrbuf_cstr(w), &hits, &error, &addinfo);
+        
+        ccl_rpn_delete(cn);
+        wrbuf_destroy(w);
     }
     else
     {
         apdu_res = 
-            odr.create_searchResponse(
-                apdu_req,
-                YAZ_BIB1_QUERY_TYPE_UNSUPP, 0);
+            odr.create_searchResponse(apdu_req, YAZ_BIB1_QUERY_TYPE_UNSUPP, 0);
         package.response() = apdu_res;
         return;
     }
+    
+    const char *element_set_name = 0;
+    Odr_int number_to_present = 0;
+    if (!error)
+        mp::util::piggyback_sr(sr, hits, number_to_present, &element_set_name);
+    
+    Odr_int number_of_records_returned = 0;
+    Z_Records *records = get_records(
+        0, number_to_present, &error, &addinfo,
+        &number_of_records_returned, odr, b, sr->preferredRecordSyntax,
+        element_set_name);
+    apdu_res = odr.create_searchResponse(apdu_req, error, addinfo);
+    if (records)
+    {
+        apdu_res->u.searchResponse->records = records;
+        apdu_res->u.searchResponse->numberOfRecordsReturned =
+            odr_intdup(odr, number_of_records_returned);
+    }
+    apdu_res->u.searchResponse->resultCount = odr_intdup(odr, hits);
+    package.response() = apdu_res;
 }
 
 void yf::Zoom::Frontend::handle_present(mp::Package &package)
 {
     Z_GDU *gdu = package.request().get();
     Z_APDU *apdu_req = gdu->u.z3950;
+    Z_APDU *apdu_res = 0;
+    Z_PresentRequest *pr = apdu_req->u.presentRequest;
+
     mp::odr odr;
-    package.response() = odr.create_close(
-        apdu_req,
-        Z_Close_protocolError,
-        "zoom filter has not implemented present request yet");
-    package.session().close();
+    if (!m_backend)
+    {
+        package.response() = odr.create_presentResponse(
+            apdu_req, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, 0);
+        return;
+    }
+    const char *element_set_name = 0;
+    Z_RecordComposition *comp = pr->recordComposition;
+    if (comp && comp->which != Z_RecordComp_simple)
+    {
+        package.response() = odr.create_presentResponse(
+            apdu_req, 
+            YAZ_BIB1_PRESENT_COMP_SPEC_PARAMETER_UNSUPP, 0);
+        return;
+    }
+    if (comp && comp->u.simple->which == Z_ElementSetNames_generic)
+        element_set_name = comp->u.simple->u.generic;
+    Odr_int number_of_records_returned = 0;
+    int error = 0;
+    const char *addinfo = 0;
+    Z_Records *records = get_records(
+        *pr->resultSetStartPoint - 1, *pr->numberOfRecordsRequested,
+        &error, &addinfo, &number_of_records_returned, odr, m_backend,
+        pr->preferredRecordSyntax, element_set_name);
+
+    apdu_res = odr.create_presentResponse(apdu_req, error, addinfo);
+    if (records)
+    {
+        apdu_res->u.presentResponse->records = records;
+        apdu_res->u.presentResponse->numberOfRecordsReturned =
+            odr_intdup(odr, number_of_records_returned);
+    }
+    package.response() = apdu_res;
 }
 
 void yf::Zoom::Frontend::handle_package(mp::Package &package)