query_rewrite: charset conversion of query terms
[metaproxy-moved-to-github.git] / src / filter_query_rewrite.cpp
index 09e6f00..6b2bfb9 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of Metaproxy.
-   Copyright (C) 2005-2008 Index Data
+   Copyright (C) 2005-2012 Index Data
 
 Metaproxy is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -17,16 +17,17 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 
 #include "config.hpp"
-#include "filter.hpp"
-#include "package.hpp"
+#include <metaproxy/filter.hpp>
+#include <metaproxy/package.hpp>
 
-#include "util.hpp"
-#include "xmlutil.hpp"
+#include <metaproxy/util.hpp>
 #include "filter_query_rewrite.hpp"
 
+#include <yaz/log.h>
 #include <yaz/zgdu.h>
 #include <yaz/xmlquery.h>
 #include <yaz/diagbib1.h>
+#include <yaz/query-charset.h>
 
 #include <libxslt/xsltutils.h>
 #include <libxslt/transform.h>
@@ -44,13 +45,14 @@ namespace metaproxy_1 {
             void configure(const xmlNode * ptr);
         private:
             xsltStylesheetPtr m_stylesheet;
-        };
+            std::string charset_from;
+            std::string charset_to;
+       };
     }
 }
 
-yf::QueryRewrite::Rep::Rep()
+yf::QueryRewrite::Rep::Rep() : m_stylesheet(0), charset_from("UTF-8")
 {
-    m_stylesheet = 0;
 }
 
 yf::QueryRewrite::Rep::~Rep()
@@ -72,7 +74,8 @@ void yf::QueryRewrite::process(mp::Package &package) const
     m_p->process(package);
 }
 
-void mp::filter::QueryRewrite::configure(const xmlNode *ptr, bool test_only)
+void mp::filter::QueryRewrite::configure(const xmlNode *ptr, bool test_only,
+                                         const char *path)
 {
     m_p->configure(ptr);
 }
@@ -91,17 +94,17 @@ void yf::QueryRewrite::Rep::process(mp::Package &package) const
             mp::odr odr;
             Z_SearchRequest *req = apdu_req->u.searchRequest;
             
-            xmlDocPtr doc_input = 0;
-            yaz_query2xml(req->query, &doc_input);
-            
-            if (!doc_input)
-            {
-                error_code = YAZ_BIB1_MALFORMED_QUERY;
-                addinfo = "converion from Query to XML failed";
-            }
-            else
+            if (m_stylesheet)
             {
-                if (m_stylesheet)
+                xmlDocPtr doc_input = 0;
+                yaz_query2xml(req->query, &doc_input);
+                
+                if (!doc_input)
+                {
+                    error_code = YAZ_BIB1_MALFORMED_QUERY;
+                    addinfo = "converion from Query to XML failed";
+                }
+                else
                 {
                     xmlDocPtr doc_res = xsltApplyStylesheet(m_stylesheet,
                                                             doc_input, 0);
@@ -117,10 +120,27 @@ void yf::QueryRewrite::Rep::process(mp::Package &package) const
                                       &error_code, &addinfo);
                         xmlFreeDoc(doc_res);
                     }
+                    xmlFreeDoc(doc_input);
+                }
+            }
+            if (charset_to.length() && charset_from.length() &&
+                (req->query->which == Z_Query_type_1
+                 || req->query->which == Z_Query_type_101))
+            {
+                yaz_iconv_t cd = yaz_iconv_open(charset_to.c_str(),
+                                                charset_from.c_str());
+                if (cd)
+                {
+                    int r = yaz_query_charset_convert_rpnquery_check(
+                        req->query->u.type_1, odr, cd);
+                    yaz_iconv_close(cd);
+                    if (r)
+                    {  /* query could not be char converted */
+                        error_code = YAZ_BIB1_MALFORMED_QUERY;
+                        addinfo = "could not convert query to target charset";
+                    }
                 }
-                xmlFreeDoc(doc_input);
             }
-            package.request() = gdu;
             if (error_code)
             {
                 Z_APDU *f_apdu = 
@@ -128,6 +148,7 @@ void yf::QueryRewrite::Rep::process(mp::Package &package) const
                 package.response() = f_apdu;
                 return;
             }
+            package.request() = gdu;
         } 
     }
     package.move();
@@ -140,7 +161,7 @@ void mp::filter::QueryRewrite::Rep::configure(const xmlNode *ptr)
         if (ptr->type != XML_ELEMENT_NODE)
             continue;
 
-        if (mp::xml::check_element_mp(ptr, "xslt"))
+        if (mp::xml::is_element_mp(ptr, "xslt"))
         {
             if (m_stylesheet)
             {
@@ -173,6 +194,25 @@ void mp::filter::QueryRewrite::Rep::configure(const xmlNode *ptr)
                      + "' in query_rewrite filter");
             }
         }
+        else if (mp::xml::is_element_mp(ptr, "charset"))
+        {
+            for (struct _xmlAttr *attr = ptr->properties; 
+                 attr; attr = attr->next)
+            {
+                if (!strcmp((const char *) attr->name, "from"))
+                {
+                    charset_from = mp::xml::get_text(attr);
+                }
+                else if (!strcmp((const char *) attr->name, "to"))
+                {
+                    charset_to = mp::xml::get_text(attr);
+                }
+                else
+                    throw mp::filter::FilterException
+                        ("Invalid attribute inside charset inside "
+                         "query_rewrite filter");
+            }
+        }
         else
         {
             throw mp::filter::FilterException
@@ -199,8 +239,9 @@ extern "C" {
 /*
  * Local variables:
  * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
  * indent-tabs-mode: nil
- * c-file-style: "stroustrup"
  * End:
  * vim: shiftwidth=4 tabstop=8 expandtab
  */
+