zoom filter: honor cclmap_*-fields
[metaproxy-moved-to-github.git] / src / filter_zoom.cpp
index 436444e..ea3df01 100644 (file)
@@ -23,8 +23,12 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <metaproxy/util.hpp>
 #include "torus.hpp"
 
+#include <libxslt/xsltutils.h>
+#include <libxslt/transform.h>
+
 #include <boost/thread/mutex.hpp>
 #include <boost/thread/condition.hpp>
+#include <yaz/ccl.h>
 #include <yaz/oid_db.h>
 #include <yaz/diagbib1.h>
 #include <yaz/log.h>
@@ -41,7 +45,13 @@ namespace metaproxy_1 {
             std::string target;
             std::string query_encoding;
             std::string sru;
+            std::string request_syntax;
+            std::string element_set;
+            std::string record_encoding;
+            std::string transform_xsl_fname;
+            bool use_turbomarc;
             bool piggyback;
+            CCL_bibset ccl_bibset;
             Searchable();
             ~Searchable();
         };
@@ -52,8 +62,10 @@ namespace metaproxy_1 {
             ZOOM_connection m_connection;
             ZOOM_resultset m_resultset;
             std::string m_frontend_database;
+            SearchablePtr sptr;
+            xsltStylesheetPtr xsp;
         public:
-            Backend();
+            Backend(SearchablePtr sptr);
             ~Backend();
             void connect(std::string zurl, int *error, const char **addinfo);
             void search_pqf(const char *pqf, Odr_int *hits,
@@ -100,7 +112,7 @@ namespace metaproxy_1 {
             void release_frontend(mp::Package &package);
             void parse_torus(const xmlNode *ptr);
 
-            std::list<Zoom::Searchable>m_searchables;
+            std::list<Zoom::SearchablePtr>m_searchables;
 
             std::map<mp::Session, FrontendPtr> m_clients;            
             boost::mutex m_mutex;
@@ -133,14 +145,17 @@ void yf::Zoom::process(mp::Package &package) const
 
 // define Implementation stuff
 
-yf::Zoom::Backend::Backend()
+yf::Zoom::Backend::Backend(SearchablePtr ptr) : sptr(ptr)
 {
     m_connection = ZOOM_connection_create(0);
     m_resultset = 0;
+    xsp = 0;
 }
 
 yf::Zoom::Backend::~Backend()
 {
+    if (xsp)
+        xsltFreeStylesheet(xsp);
     ZOOM_connection_destroy(m_connection);
     ZOOM_resultset_destroy(m_resultset);
 }
@@ -186,10 +201,13 @@ int yf::Zoom::Backend::get_error(const char **addinfo)
 yf::Zoom::Searchable::Searchable()
 {
     piggyback = true;
+    use_turbomarc = false;
+    ccl_bibset = ccl_qual_mk();
 }
 
 yf::Zoom::Searchable::~Searchable()
 {
+    ccl_qual_rm(&ccl_bibset);
 }
 
 yf::Zoom::Frontend::Frontend(Impl *impl) : 
@@ -271,7 +289,7 @@ void yf::Zoom::Impl::parse_torus(const xmlNode *ptr1)
                     continue;
                 if (!strcmp((const char *) ptr2->name, "layer"))
                 {
-                    Zoom::Searchable s;
+                    Zoom::SearchablePtr s(new Searchable);
 
                     const xmlNode *ptr3 = ptr2;
                     for (ptr3 = ptr3->children; ptr3; ptr3 = ptr3->next)
@@ -280,31 +298,68 @@ void yf::Zoom::Impl::parse_torus(const xmlNode *ptr1)
                             continue;
                         if (!strcmp((const char *) ptr3->name, "id"))
                         {
-                            s.database = mp::xml::get_text(ptr3);
+                            s->database = mp::xml::get_text(ptr3);
                         }
                         else if (!strcmp((const char *) ptr3->name, "zurl"))
                         {
-                            s.target = mp::xml::get_text(ptr3);
+                            s->target = mp::xml::get_text(ptr3);
                         }
                         else if (!strcmp((const char *) ptr3->name, "sru"))
                         {
-                            s.sru = mp::xml::get_text(ptr3);
+                            s->sru = mp::xml::get_text(ptr3);
                         }
                         else if (!strcmp((const char *) ptr3->name,
                                          "queryEncoding"))
                         {
-                            s.query_encoding = mp::xml::get_text(ptr3);
+                            s->query_encoding = mp::xml::get_text(ptr3);
                         }
                         else if (!strcmp((const char *) ptr3->name,
                                          "piggyback"))
                         {
-                            s.piggyback = mp::xml::get_bool(ptr3, true);
+                            s->piggyback = mp::xml::get_bool(ptr3, true);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "requestSyntax"))
+                        {
+                            s->request_syntax = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "elementSet"))
+                        {
+                            s->element_set = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "recordEncoding"))
+                        {
+                            s->record_encoding = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "transform"))
+                        {
+                            s->transform_xsl_fname = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "useTurboMarc"))
+                        {
+                            yaz_log(YLOG_LOG, "seeing useTurboMarc");
+                            s->use_turbomarc = mp::xml::get_bool(ptr3, false);
+                            yaz_log(YLOG_LOG, "value=%s",
+                                    s->use_turbomarc ? "1" : "0");
+                                    
+                        }
+                        else if (!strncmp((const char *) ptr3->name,
+                                          "cclmap_", 7))
+                        {
+                            std::string value = mp::xml::get_text(ptr3);
+                            ccl_qual_fitem(s->ccl_bibset, value.c_str(),
+                                           (const char *) ptr3->name + 7);
                         }
                     }
-                    if (s.database.length() && s.target.length())
+                    if (s->database.length() && s->target.length())
                     {
-                        yaz_log(YLOG_LOG, "add db=%s target=%s", 
-                                s.database.c_str(), s.target.c_str());
+                        yaz_log(YLOG_LOG, "add db=%s target=%s turbomarc=%s", 
+                                s->database.c_str(), s->target.c_str(),
+                                s->use_turbomarc ? "1" : "0");
                         m_searchables.push_back(s);
                     }
                 }
@@ -362,14 +417,14 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
     if (m_backend && m_backend->m_frontend_database == database)
         return m_backend;
 
-    std::list<Zoom::Searchable>::iterator map_s =
+    std::list<Zoom::SearchablePtr>::iterator map_s =
         m_p->m_searchables.begin();
 
     std::string c_db = mp::util::database_name_normalize(database);
 
     while (map_s != m_p->m_searchables.end())
     {
-        if (c_db.compare(map_s->database) == 0)
+        if (c_db.compare((*map_s)->database) == 0)
             break;
         map_s++;
     }
@@ -381,23 +436,48 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
         return b;
     }
 
+    xsltStylesheetPtr xsp = 0;
+    if ((*map_s)->transform_xsl_fname.length())
+    {
+        xmlDoc *xsp_doc = xmlParseFile((*map_s)->transform_xsl_fname.c_str());
+        if (!xsp_doc)
+        {
+            *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
+            *addinfo = "xmlParseFile failed";
+            BackendPtr b;
+            return b;
+        }
+        xsp = xsltParseStylesheetDoc(xsp_doc);
+        if (!xsp)
+        {
+            *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
+            *addinfo = "xsltParseStylesheetDoc failed";
+            BackendPtr b;
+            xmlFreeDoc(xsp_doc);
+            return b;
+        }
+    }
+
+    SearchablePtr sptr = *map_s;
+
     m_backend.reset();
 
-    BackendPtr b(new Backend);
+    BackendPtr b(new Backend(sptr));
 
+    b->xsp = xsp;
     b->m_frontend_database = database;
 
-    if (map_s->query_encoding.length())
-        b->set_option("rpnCharset", map_s->query_encoding.c_str());
+    if (sptr->query_encoding.length())
+        b->set_option("rpnCharset", sptr->query_encoding.c_str());
 
     std::string url;
-    if (map_s->sru.length())
+    if (sptr->sru.length())
     {
-        url = "http://" + map_s->target;
-        b->set_option("sru", map_s->sru.c_str());
+        url = "http://" + sptr->target;
+        b->set_option("sru", sptr->sru.c_str());
     }
     else
-        url = map_s->target;
+        url = sptr->target;
 
     b->connect(url, error, addinfo);
     if (*error == 0)
@@ -419,6 +499,7 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
 {
     *number_of_records_returned = 0;
     Z_Records *records = 0;
+    bool enable_pz2_transform = false;
 
     if (start < 0 || number_to_present <= 0)
         return records;
@@ -433,10 +514,35 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
     const char *syntax_name = 0;
 
     if (preferredRecordSyntax)
-        syntax_name =
-            yaz_oid_to_string_buf(preferredRecordSyntax, 0, oid_name_str);
+    {
+        if (!oid_oidcmp(preferredRecordSyntax, yaz_oid_recsyn_xml)
+            && !strcmp(element_set_name, "pz2"))
+        {
+            if (b->sptr->request_syntax.length())
+            {
+                syntax_name = b->sptr->request_syntax.c_str();
+                enable_pz2_transform = true;
+            }
+        }
+        else
+        {
+            syntax_name =
+                yaz_oid_to_string_buf(preferredRecordSyntax, 0, oid_name_str);
+        }
+    }
+
+    yaz_log(YLOG_LOG, "enable_pz2_transform %s", enable_pz2_transform ?
+            "enabled" : "disabled");
+
     b->set_option("preferredRecordSyntax", syntax_name);
-        
+
+    if (enable_pz2_transform)
+    {
+        element_set_name = "F";
+        if (b->sptr->element_set.length())
+            element_set_name = b->sptr->element_set.c_str();
+    }
+
     b->set_option("elementSetName", element_set_name);
 
     b->present(start, number_to_present, recs, error, addinfo);
@@ -470,17 +576,73 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
                 npr = zget_surrogateDiagRec(odr, odr_database, sur_error,
                                             addinfo);
             }
+            else if (enable_pz2_transform)
+            {
+                char rec_type_str[100];
+
+                strcpy(rec_type_str, b->sptr->use_turbomarc ?
+                       "txml" : "xml");
+                
+                // prevent buffer overflow ...
+                if (b->sptr->record_encoding.length() > 0 &&
+                    b->sptr->record_encoding.length() < 
+                    (sizeof(rec_type_str)-20))
+                {
+                    strcat(rec_type_str, "; charset=");
+                    strcat(rec_type_str, b->sptr->record_encoding.c_str());
+                }
+                
+                int rec_len;
+                const char *rec_buf = ZOOM_record_get(recs[i], rec_type_str,
+                                                      &rec_len);
+                if (rec_buf && b->xsp)
+                {
+                    xmlDoc *rec_doc = xmlParseMemory(rec_buf, rec_len);
+                    if (rec_doc)
+                    { 
+                        xmlDoc *rec_res;
+                        rec_res = xsltApplyStylesheet(b->xsp, rec_doc, 0);
+
+                        if (rec_res)
+                            xsltSaveResultToString((xmlChar **) &rec_buf, &rec_len,
+                                                   rec_res, b->xsp);
+                    }
+                }
+
+                if (rec_buf)
+                {
+                    npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr));
+                    npr->databaseName = odr_database;
+                    npr->which = Z_NamePlusRecord_databaseRecord;
+                    npr->u.databaseRecord =
+                        z_ext_record_xml(odr, rec_buf, rec_len);
+                }
+                else
+                {
+                    npr = zget_surrogateDiagRec(
+                        odr, odr_database, 
+                        YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS,
+                        rec_type_str);
+                }
+            }
             else
             {
-                npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr));
                 Z_External *ext =
                     (Z_External *) ZOOM_record_get(recs[i], "ext", 0);
-                npr->databaseName = odr_database;
                 if (ext)
                 {
+                    npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr));
+                    npr->databaseName = odr_database;
                     npr->which = Z_NamePlusRecord_databaseRecord;
                     npr->u.databaseRecord = ext;
                 }
+                else
+                {
+                    npr = zget_surrogateDiagRec(
+                        odr, odr_database, 
+                        YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS,
+                        "ZOOM_record, type ext");
+                }
             }
             npl->records[i] = npr;
         }
@@ -526,10 +688,42 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package)
     Z_Query *query = sr->query;
     if (query->which == Z_Query_type_1 || query->which == Z_Query_type_101)
     {
+        // RPN
         WRBUF w = wrbuf_alloc();
         yaz_rpnquery_to_wrbuf(w, query->u.type_1);
 
         b->search_pqf(wrbuf_cstr(w), &hits, &error, &addinfo);
+
+        wrbuf_destroy(w);
+    }
+    else if (query->which == Z_Query_type_2)
+    {
+        // CCL
+        WRBUF w = wrbuf_alloc();
+        wrbuf_write(w, (const char *) query->u.type_2->buf,
+                    query->u.type_2->len);
+        int cerror, cpos;
+        struct ccl_rpn_node *cn;
+        cn = ccl_find_str(b->sptr->ccl_bibset, wrbuf_cstr(w), &cerror, &cpos);
+        wrbuf_destroy(w);
+
+        if (!cn)
+        {
+            char *addinfo = odr_strdup(odr, ccl_err_msg(cerror));
+
+            apdu_res = 
+                odr.create_searchResponse(apdu_req, 
+                                          YAZ_BIB1_MALFORMED_QUERY,
+                                          addinfo);
+            package.response() = apdu_res;
+            return;
+        }
+        w = wrbuf_alloc();
+        ccl_pquery(w, cn);
+        
+        b->search_pqf(wrbuf_cstr(w), &hits, &error, &addinfo);
+        
+        ccl_rpn_delete(cn);
         wrbuf_destroy(w);
     }
     else