-.libs
-.deps
-*.lo
-*.la
-stamp-h*
+*
+!*.hpp
+!*.cpp
+!*.am
config.hpp
-socket
-Makefile
-Makefile.in
-config.hpp.in
-ex_filter_frontend_net
-ex_router_flexml
-test_boost_threads
-test_boost_time
-test_filter_auth_simple
-test_filter1
-test_filter2
-test_filter_frontend_net
-test_filter_log
-test_filter_multi
-test_filter_query_rewrite
-test_package1
-test_pipe
-test_thread_pool_observer
-test_session1
-test_session2
-test_filter_factory
-test_filter_z3950_client
-test_filter_backend_test
-test_filter_virt_db
-test_router_flexml
-test_ses_map
-tstdl
-metaproxy
-test_filter_bounce
-test_filter_record_transform
-test_filter_sru_to_z3950
-*.o
-metaproxy-config
filter_http_client.cpp filter_http_client.hpp \
filter_http_file.cpp filter_http_file.hpp \
filter_http_rewrite1.cpp filter_http_rewrite1.hpp \
+ filter_http_rewrite.cpp filter_http_rewrite.hpp \
filter_limit.cpp filter_limit.hpp \
filter_load_balance.cpp filter_load_balance.hpp \
filter_log.cpp filter_log.hpp \
torus.cpp torus.hpp \
url_recipe.cpp \
util.cpp \
+ html_parser.hpp html_parser.cpp \
router_chain.cpp \
xmlutil.cpp
test_filter_record_transform \
test_filter_sru_to_z3950 \
test_filter_virt_db \
+ test_filter_rewrite \
test_ses_map \
test_router_flexml \
+ test_html_parser \
test_xmlutil
TESTS=$(check_PROGRAMS)
test_router_flexml_SOURCES = test_router_flexml.cpp $(filter_src)
test_xmlutil_SOURCES = test_xmlutil.cpp
+test_html_parser_SOURCES = test_html_parser.cpp $(filter_src)
+test_filter_rewrite_SOURCES = test_filter_rewrite.cpp $(filter_src)
+
# doxygen target
dox:
(cd $(top_srcdir) ; make dox)
#include "filter_z3950_client.hpp"
#include "filter_zeerex_explain.hpp"
#include "filter_zoom.hpp"
+#include "filter_http_rewrite.hpp"
namespace mp = metaproxy_1;
&metaproxy_1_filter_z3950_client,
&metaproxy_1_filter_zeerex_explain,
&metaproxy_1_filter_zoom,
+ &metaproxy_1_filter_http_rewrite,
0
};
int i;
--- /dev/null
+/* This file is part of Metaproxy.
+ Copyright (C) 2005-2013 Index Data
+
+Metaproxy is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include "config.hpp"
+#include <metaproxy/filter.hpp>
+#include <metaproxy/package.hpp>
+#include <metaproxy/util.hpp>
+#include "filter_http_rewrite.hpp"
+
+#include <yaz/zgdu.h>
+#include <yaz/log.h>
+
+#include <boost/regex.hpp>
+#include <boost/lexical_cast.hpp>
+
+#include <list>
+#include <map>
+
+#if HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+namespace mp = metaproxy_1;
+namespace yf = mp::filter;
+
+yf::HttpRewrite::HttpRewrite()
+{
+}
+
+yf::HttpRewrite::~HttpRewrite()
+{
+}
+
+void yf::HttpRewrite::process(mp::Package & package) const
+{
+ yaz_log(YLOG_LOG, "HttpRewrite begins....");
+ Z_GDU *gdu = package.request().get();
+ //map of request/response vars
+ std::map<std::string, std::string> vars;
+ //we have an http req
+ if (gdu && gdu->which == Z_GDU_HTTP_Request)
+ {
+ Z_HTTP_Request *hreq = gdu->u.HTTP_Request;
+ mp::odr o;
+ rewrite_reqline(o, hreq, vars);
+ yaz_log(YLOG_LOG, ">> Request headers");
+ rewrite_headers(o, hreq->headers, vars, req_uri_pats, req_groups_bynum);
+ rewrite_body(o, &hreq->content_buf, &hreq->content_len, vars,
+ req_uri_pats, req_groups_bynum);
+ package.request() = gdu;
+ }
+ package.move();
+ gdu = package.response().get();
+ if (gdu && gdu->which == Z_GDU_HTTP_Response)
+ {
+ Z_HTTP_Response *hres = gdu->u.HTTP_Response;
+ yaz_log(YLOG_LOG, "Response code %d", hres->code);
+ mp::odr o;
+ yaz_log(YLOG_LOG, "<< Respose headers");
+ rewrite_headers(o, hres->headers, vars, res_uri_pats, res_groups_bynum);
+ rewrite_body(o, &hres->content_buf, &hres->content_len, vars,
+ res_uri_pats, res_groups_bynum);
+ package.response() = gdu;
+ }
+}
+
+void yf::HttpRewrite::rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq,
+ std::map<std::string, std::string> & vars) const
+{
+ //rewrite the request line
+ std::string path;
+ if (strstr(hreq->path, "http://") == hreq->path)
+ {
+ yaz_log(YLOG_LOG, "Path in the method line is absolute, "
+ "possibly a proxy request");
+ path += hreq->path;
+ }
+ else
+ {
+ //TODO what about proto
+ path += "http://";
+ path += z_HTTP_header_lookup(hreq->headers, "Host");
+ path += hreq->path;
+ }
+ yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str());
+ std::string npath =
+ test_patterns(vars, path, req_uri_pats, req_groups_bynum);
+ if (!npath.empty())
+ {
+ yaz_log(YLOG_LOG, "Rewritten request URL is %s", npath.c_str());
+ hreq->path = odr_strdup(o, npath.c_str());
+ }
+}
+
+void yf::HttpRewrite::rewrite_headers (mp::odr & o, Z_HTTP_Header *headers,
+ std::map<std::string, std::string> & vars,
+ const spair_vec & uri_pats,
+ const std::vector<std::map<int, std::string> > & groups_bynum) const
+{
+ for (Z_HTTP_Header *header = headers;
+ header != 0;
+ header = header->next)
+ {
+ std::string sheader(header->name);
+ sheader += ": ";
+ sheader += header->value;
+ yaz_log(YLOG_LOG, "%s: %s", header->name, header->value);
+ std::string out = test_patterns(vars, sheader, uri_pats, groups_bynum);
+ if (!out.empty())
+ {
+ size_t pos = out.find(": ");
+ if (pos == std::string::npos)
+ {
+ yaz_log(YLOG_LOG, "Header malformed during rewrite, ignoring");
+ continue;
+ }
+ header->name = odr_strdup(o, out.substr(0, pos).c_str());
+ header->value = odr_strdup(o, out.substr(pos+2,
+ std::string::npos).c_str());
+ }
+ }
+}
+
+void yf::HttpRewrite::rewrite_body (mp::odr & o, char **content_buf, int *content_len,
+ std::map<std::string, std::string> & vars,
+ const spair_vec & uri_pats,
+ const std::vector<std::map<int, std::string> > & groups_bynum) const
+{
+ if (*content_buf)
+ {
+ std::string body(*content_buf);
+ std::string nbody =
+ test_patterns(vars, body, uri_pats, groups_bynum);
+ if (!nbody.empty())
+ {
+ *content_buf = odr_strdup(o, nbody.c_str());
+ *content_len = nbody.size();
+ }
+ }
+}
+
+/**
+ * Tests pattern from the vector in order and executes recipe on
+ the first match.
+ */
+const std::string yf::HttpRewrite::test_patterns(
+ std::map<std::string, std::string> & vars,
+ const std::string & txt,
+ const spair_vec & uri_pats,
+ const std::vector<std::map<int, std::string> > & groups_bynum_vec)
+ const
+{
+ for (unsigned i = 0; i < uri_pats.size(); i++)
+ {
+ std::string out = search_replace(vars, txt,
+ uri_pats[i].first, uri_pats[i].second,
+ groups_bynum_vec[i]);
+ if (!out.empty()) return out;
+ }
+ return "";
+}
+
+
+const std::string yf::HttpRewrite::search_replace(
+ std::map<std::string, std::string> & vars,
+ const std::string & txt,
+ const std::string & uri_re,
+ const std::string & uri_pat,
+ const std::map<int, std::string> & groups_bynum) const
+{
+ //exec regex against value
+ boost::regex re(uri_re);
+ boost::smatch what;
+ std::string::const_iterator start, end;
+ start = txt.begin();
+ end = txt.end();
+ std::string out;
+ while (regex_search(start, end, what, re)) //find next full match
+ {
+ unsigned i;
+ for (i = 1; i < what.size(); ++i)
+ {
+ //check if the group is named
+ std::map<int, std::string>::const_iterator it
+ = groups_bynum.find(i);
+ if (it != groups_bynum.end())
+ { //it is
+ if (!what[i].str().empty())
+ vars[it->second] = what[i];
+ }
+
+ }
+ //prepare replacement string
+ std::string rvalue = sub_vars(uri_pat, vars);
+ yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'",
+ what.str(0).c_str(), rvalue.c_str());
+ out.append(start, what[0].first);
+ out.append(rvalue);
+ start = what[0].second; //move search forward
+ }
+ //if we had a match cat the last part
+ if (start != txt.begin())
+ out.append(start, end);
+ return out;
+}
+
+void yf::HttpRewrite::parse_groups(
+ const spair_vec & uri_pats,
+ std::vector<std::map<int, std::string> > & groups_bynum_vec)
+{
+ for (unsigned h = 0; h < uri_pats.size(); h++)
+ {
+ int gnum = 0;
+ bool esc = false;
+ //regex is first, subpat is second
+ std::string str = uri_pats[h].first;
+ //for each pair we have an indexing map
+ std::map<int, std::string> groups_bynum;
+ for (unsigned i = 0; i < str.size(); ++i)
+ {
+ if (!esc && str[i] == '\\')
+ {
+ esc = true;
+ continue;
+ }
+ if (!esc && str[i] == '(') //group starts
+ {
+ gnum++;
+ if (i+1 < str.size() && str[i+1] == '?') //group with attrs
+ {
+ i++;
+ if (i+1 < str.size() && str[i+1] == ':') //non-capturing
+ {
+ if (gnum > 0) gnum--;
+ i++;
+ continue;
+ }
+ if (i+1 < str.size() && str[i+1] == 'P') //optional, python
+ i++;
+ if (i+1 < str.size() && str[i+1] == '<') //named
+ {
+ i++;
+ std::string gname;
+ bool term = false;
+ while (++i < str.size())
+ {
+ if (str[i] == '>') { term = true; break; }
+ if (!isalnum(str[i]))
+ throw mp::filter::FilterException
+ ("Only alphanumeric chars allowed, found "
+ " in '"
+ + str
+ + "' at "
+ + boost::lexical_cast<std::string>(i));
+ gname += str[i];
+ }
+ if (!term)
+ throw mp::filter::FilterException
+ ("Unterminated group name '" + gname
+ + " in '" + str +"'");
+ groups_bynum[gnum] = gname;
+ yaz_log(YLOG_LOG, "Found named group '%s' at $%d",
+ gname.c_str(), gnum);
+ }
+ }
+ }
+ esc = false;
+ }
+ groups_bynum_vec.push_back(groups_bynum);
+ }
+}
+
+std::string yf::HttpRewrite::sub_vars (const std::string & in,
+ const std::map<std::string, std::string> & vars)
+{
+ std::string out;
+ bool esc = false;
+ for (unsigned i = 0; i < in.size(); ++i)
+ {
+ if (!esc && in[i] == '\\')
+ {
+ esc = true;
+ continue;
+ }
+ if (!esc && in[i] == '$') //var
+ {
+ if (i+1 < in.size() && in[i+1] == '{') //ref prefix
+ {
+ ++i;
+ std::string name;
+ bool term = false;
+ while (++i < in.size())
+ {
+ if (in[i] == '}') { term = true; break; }
+ name += in[i];
+ }
+ if (!term) throw mp::filter::FilterException
+ ("Unterminated var ref in '"+in+"' at "
+ + boost::lexical_cast<std::string>(i));
+ std::map<std::string, std::string>::const_iterator it
+ = vars.find(name);
+ if (it != vars.end())
+ {
+ out += it->second;
+ }
+ }
+ else
+ {
+ throw mp::filter::FilterException
+ ("Malformed or trimmed var ref in '"
+ +in+"' at "+boost::lexical_cast<std::string>(i));
+ }
+ continue;
+ }
+ //passthru
+ out += in[i];
+ esc = false;
+ }
+ return out;
+}
+
+void yf::HttpRewrite::configure(
+ const spair_vec req_uri_pats,
+ const spair_vec res_uri_pats)
+{
+ //TODO should we really copy them out?
+ this->req_uri_pats = req_uri_pats;
+ this->res_uri_pats = res_uri_pats;
+ //pick up names
+ parse_groups(req_uri_pats, req_groups_bynum);
+ parse_groups(res_uri_pats, res_groups_bynum);
+}
+
+
+static void configure_rules(const xmlNode *ptr, yf::HttpRewrite::spair_vec & dest)
+{
+ for (ptr = ptr->children; ptr; ptr = ptr->next)
+ {
+ if (ptr->type != XML_ELEMENT_NODE)
+ continue;
+ else if (!strcmp((const char *) ptr->name, "rewrite"))
+ {
+ std::string from, to;
+ const struct _xmlAttr *attr;
+ for (attr = ptr->properties; attr; attr = attr->next)
+ {
+ if (!strcmp((const char *) attr->name, "from"))
+ from = mp::xml::get_text(attr->children);
+ else if (!strcmp((const char *) attr->name, "to"))
+ to = mp::xml::get_text(attr->children);
+ else
+ throw mp::filter::FilterException
+ ("Bad attribute "
+ + std::string((const char *) attr->name)
+ + " in rewrite section of http_rewrite");
+ }
+ yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'",
+ from.c_str(), to.c_str());
+ if (!from.empty())
+ dest.push_back(std::make_pair(from, to));
+ }
+ else
+ {
+ throw mp::filter::FilterException
+ ("Bad element o"
+ + std::string((const char *) ptr->name)
+ + " in http_rewrite1 filter");
+ }
+ }
+}
+
+void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only,
+ const char *path)
+{
+ spair_vec req_uri_pats;
+ spair_vec res_uri_pats;
+ for (ptr = ptr->children; ptr; ptr = ptr->next)
+ {
+ if (ptr->type != XML_ELEMENT_NODE)
+ continue;
+ else if (!strcmp((const char *) ptr->name, "request"))
+ {
+ configure_rules(ptr, req_uri_pats);
+ }
+ else if (!strcmp((const char *) ptr->name, "response"))
+ {
+ configure_rules(ptr, res_uri_pats);
+ }
+ else
+ {
+ throw mp::filter::FilterException
+ ("Bad element "
+ + std::string((const char *) ptr->name)
+ + " in http_rewrite1 filter");
+ }
+ }
+ configure(req_uri_pats, res_uri_pats);
+}
+
+static mp::filter::Base* filter_creator()
+{
+ return new mp::filter::HttpRewrite;
+}
+
+extern "C" {
+ struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite = {
+ 0,
+ "http_rewrite",
+ filter_creator
+ };
+}
+
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
--- /dev/null
+/* This file is part of Metaproxy.
+ Copyright (C) 2005-2013 Index Data
+
+Metaproxy is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef FILTER_HTTP_REWRITE_HPP
+#define FILTER_HTTP_REWRITE_HPP
+
+#include <metaproxy/filter.hpp>
+#include <vector>
+#include <map>
+#include <metaproxy/util.hpp>
+
+namespace mp = metaproxy_1;
+
+namespace metaproxy_1 {
+ namespace filter {
+ class HttpRewrite : public Base {
+ public:
+ typedef std::pair<std::string, std::string> string_pair;
+ typedef std::vector<string_pair> spair_vec;
+ typedef spair_vec::iterator spv_iter;
+ HttpRewrite();
+ ~HttpRewrite();
+ void process(metaproxy_1::Package & package) const;
+ void configure(const xmlNode * ptr, bool test_only,
+ const char *path);
+ void configure(const spair_vec req_uri_pats,
+ const spair_vec res_uri_pats);
+ private:
+ spair_vec req_uri_pats;
+ spair_vec res_uri_pats;
+ std::vector<std::map<int, std::string> > req_groups_bynum;
+ std::vector<std::map<int, std::string> > res_groups_bynum;
+ void rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq,
+ std::map<std::string, std::string> & vars) const;
+ void rewrite_headers (mp::odr & o, Z_HTTP_Header *headers,
+ std::map<std::string, std::string> & vars,
+ const spair_vec & uri_pats,
+ const std::vector<std::map<int, std::string> > & groups_bynum_vec) const;
+ void rewrite_body (mp::odr & o, char **content_buf, int *content_len,
+ std::map<std::string, std::string> & vars,
+ const spair_vec & uri_pats,
+ const std::vector<std::map<int, std::string> >
+ & groups_bynum) const;
+ const std::string test_patterns(
+ std::map<std::string, std::string> & vars,
+ const std::string & txt,
+ const spair_vec & uri_pats,
+ const std::vector<std::map<int, std::string> >
+ & groups_bynum) const;
+ const std::string search_replace(
+ std::map<std::string, std::string> & vars,
+ const std::string & txt,
+ const std::string & uri_re,
+ const std::string & uri_pat,
+ const std::map<int, std::string> & groups_bynum) const;
+ static void parse_groups(
+ const spair_vec & uri_pats,
+ std::vector<std::map<int, std::string> > & groups_bynum_vec);
+ static std::string sub_vars (const std::string & in,
+ const std::map<std::string, std::string> & vars);
+ };
+ }
+}
+
+extern "C" {
+ extern struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite;
+}
+
+#endif
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
--- /dev/null
+/* This file is part of Metaproxy.
+ Copyright (C) 2005-2013 Index Data
+
+Metaproxy is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include "config.hpp"
+#include "html_parser.hpp"
+
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <stdio.h>
+
+#define TAG_MAX_LEN 64
+
+#define SPACECHR " \t\r\n\f"
+
+#define DEBUG(x) x
+
+#if HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+namespace mp = metaproxy_1;
+
+mp::HTMLParser::HTMLParser()
+{
+}
+
+mp::HTMLParser::~HTMLParser()
+{
+}
+
+static void parse_str(mp::HTMLParserEvent & event, const char * str);
+
+void mp::HTMLParser::parse(mp::HTMLParserEvent & event, const char *str) const
+{
+ parse_str(event, str);
+}
+
+//static C functions follow would probably make sense to wrap this in PIMPL?
+
+static char* dupe (const char *buff, int len)
+{
+ char *value = (char *) malloc (len + 1);
+ assert (value);
+ memcpy (value, buff, len);
+ value[len] = '\0';
+ return value;
+}
+
+static int skipSpace (const char *cp)
+{
+ int i = 0;
+ while (cp[i] && strchr (SPACECHR, cp[i]))
+ i++;
+ return i;
+}
+
+static int skipName (const char *cp, char *dst)
+{
+ int i;
+ int j = 0;
+ for (i=0; cp[i] && !strchr (SPACECHR "/>=", cp[i]); i++)
+ if (j < TAG_MAX_LEN-1)
+ {
+ dst[j] = tolower(cp[j]);
+ j++;
+ }
+ dst[j] = '\0';
+ return i;
+}
+
+static int skipAttribute (const char *cp, char *name, const char **value, int *val_len)
+{
+ int i = skipName (cp, name);
+ *value = NULL;
+ if (!i)
+ return skipSpace (cp);
+ i += skipSpace (cp + i);
+ if (cp[i] == '=')
+ {
+ int v0, v1;
+ i++;
+ i += skipSpace (cp + i);
+ if (cp[i] == '\"' || cp[i] == '\'')
+ {
+ char tr = cp[i];
+ v0 = ++i;
+ while (cp[i] != tr && cp[i])
+ i++;
+ v1 = i;
+ if (cp[i])
+ i++;
+ }
+ else
+ {
+ v0 = i;
+ while (cp[i] && !strchr (SPACECHR ">", cp[i]))
+ i++;
+ v1 = i;
+ }
+ *value = cp + v0;
+ *val_len = v1 - v0;
+ }
+ i += skipSpace (cp + i);
+ return i;
+}
+
+static int tagAttrs (mp::HTMLParserEvent & event,
+ const char *tagName,
+ const char *cp)
+{
+ int i;
+ char attr_name[TAG_MAX_LEN];
+ const char *attr_value;
+ int val_len;
+ i = skipSpace (cp);
+ while (cp[i] && cp[i] != '>')
+ {
+ int nor = skipAttribute (cp+i, attr_name, &attr_value, &val_len);
+ i += nor;
+ if (nor)
+ {
+ DEBUG(printf ("------ attr %s=%s\n", attr_name, dupe(attr_value, val_len)));
+ event.attribute(tagName, attr_name, attr_value, val_len);
+ }
+ else
+ {
+ if (!nor)
+ i++;
+ }
+ }
+ return i;
+}
+
+static int tagStart (mp::HTMLParserEvent & event,
+ char *tagName, const char *cp, const char which)
+{
+ int i = 0;
+ i = skipName (cp, tagName);
+ switch (which)
+ {
+ case '/' :
+ DEBUG(printf ("------ tag close %s\n", tagName));
+ event.closeTag(tagName);
+ break;
+ case '!' :
+ DEBUG(printf ("------ dtd %s\n", tagName));
+ break;
+ case '?' :
+ DEBUG(printf ("------ pi %s\n", tagName));
+ break;
+ default :
+ DEBUG(printf ("------ tag open %s\n", tagName));
+ event.openTagStart(tagName);
+ break;
+ }
+ return i;
+}
+
+static int tagEnd (mp::HTMLParserEvent & event, const char *tagName, const char *cp)
+{
+ int i = 0;
+ while (cp[i] && cp[i] != '>')
+ i++;
+ if (cp[i] == '>')
+ {
+ event.anyTagEnd(tagName);
+ i++;
+ }
+ return i;
+}
+
+static void tagText (mp::HTMLParserEvent & event, const char *text_start, const char *text_end)
+{
+ if (text_end - text_start) //got text to flush
+ {
+ DEBUG(printf ("------ text %s\n", dupe(text_start, text_end-text_start)));
+ event.text(text_start, text_end-text_start);
+ }
+}
+
+static void parse_str (mp::HTMLParserEvent & event, const char *cp)
+{
+ const char *text_start = cp;
+ const char *text_end = cp;
+ while (*cp)
+ {
+ if (cp[0] == '<' && cp[1]) //tag?
+ {
+ char which = cp[1];
+ if (which == '/') cp++;
+ if (!strchr (SPACECHR, cp[1])) //valid tag starts
+ {
+ tagText (event, text_start, text_end); //flush any text
+ char tagName[TAG_MAX_LEN];
+ cp++;
+ if (which == '/')
+ {
+ cp += tagStart (event, tagName, cp, which);
+ }
+ else if (which == '!' || which == '?') //pi or dtd
+ {
+ cp++;
+ cp += tagStart (event, tagName, cp, which);
+ }
+ else
+ {
+ cp += tagStart (event, tagName, cp, which);
+ cp += tagAttrs (event, tagName, cp);
+ }
+ cp += tagEnd (event, tagName, cp);
+ text_start = cp;
+ text_end = cp;
+ continue;
+ }
+ }
+ //text
+ cp++;
+ text_end = cp;
+ }
+ tagText (event, text_start, text_end); //flush any text
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
--- /dev/null
+/* This file is part of Metaproxy.
+ Copyright (C) 2005-2013 Index Data
+
+Metaproxy is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef HTML_PARSER_HPP
+#define HTML_PARSER_HPP
+
+#include <boost/scoped_ptr.hpp>
+
+namespace metaproxy_1 {
+ class HTMLParserEvent {
+ public:
+ virtual void openTagStart(const char *name) = 0;
+ virtual void anyTagEnd(const char *name) = 0;
+ virtual void attribute(const char *tagName,
+ const char *name,
+ const char *value,
+ int val_len) = 0;
+ virtual void closeTag(const char *name) = 0;
+ virtual void text(const char *value, int len) = 0;
+ };
+ class HTMLParser {
+ public:
+ HTMLParser();
+ ~HTMLParser();
+ void parse(HTMLParserEvent & event, const char *str) const;
+ };
+}
+
+#endif
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
--- /dev/null
+/* This file is part of Metaproxy.
+ Copyright (C) 2005-2013 Index Data
+
+Metaproxy is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include "config.hpp"
+#include <iostream>
+#include <stdexcept>
+
+#include "filter_http_client.hpp"
+#include "filter_http_rewrite.hpp"
+#include <metaproxy/util.hpp>
+#include <metaproxy/router_chain.hpp>
+#include <metaproxy/package.hpp>
+
+#include <boost/regex.hpp>
+#include <boost/lexical_cast.hpp>
+
+#include <yaz/log.h>
+
+#define BOOST_AUTO_TEST_MAIN
+#define BOOST_TEST_DYN_LINK
+
+#include <boost/test/auto_unit_test.hpp>
+
+using namespace boost::unit_test;
+namespace mp = metaproxy_1;
+
+struct TestConfig {
+ TestConfig()
+ {
+ std::cout << "global setup\n";
+ yaz_log_init_level(YLOG_ALL);
+ }
+ ~TestConfig()
+ {
+ std::cout << "global teardown\n";
+ }
+};
+
+BOOST_GLOBAL_FIXTURE( TestConfig );
+
+BOOST_AUTO_TEST_CASE( test_filter_rewrite_1 )
+{
+ try
+ {
+ std::cout << "Running non-xml config test case" << std::endl;
+ mp::RouterChain router;
+ mp::filter::HttpRewrite fhr;
+
+ //configure the filter
+ mp::filter::HttpRewrite::spair_vec vec_req;
+ vec_req.push_back(std::make_pair(
+ "(?<proto>http\\:\\/\\/s?)(?<pxhost>[^\\/?#]+)\\/(?<pxpath>[^\\/]+)"
+ "\\/(?<host>[^\\/]+)(?<path>.*)",
+ "${proto}${host}${path}"
+ ));
+ vec_req.push_back(std::make_pair(
+ "(?:Host\\: )(.*)",
+ "Host: ${host}"
+ ));
+
+ mp::filter::HttpRewrite::spair_vec vec_res;
+ vec_res.push_back(std::make_pair(
+ "(?<proto>http\\:\\/\\/s?)(?<host>[^\\/?# \"'>]+)\\/(?<path>[^ \"'>]+)",
+ "${proto}${pxhost}/${pxpath}/${host}/${path}"
+ ));
+
+ fhr.configure(vec_req, vec_res);
+
+ router.append(fhr);
+
+ // create an http request
+ mp::Package pack;
+
+ mp::odr odr;
+ Z_GDU *gdu_req = z_get_HTTP_Request_uri(odr,
+ "http://proxyhost/proxypath/targetsite/page1.html", 0, 1);
+
+ pack.request() = gdu_req;
+
+ //create the http response
+
+ const char *resp_buf =
+ "HTTP/1.1 200 OK\r\n"
+ "Content-Length: 441\r\n"
+ "Content-Type: text/html\r\n"
+ "Link: <http://targetsite/file.xml>; rel=absolute\r\n"
+ "Link: </dir/file.xml>; rel=relative\r\n"
+ "\r\n"
+ "<html><head><title>Hello proxy!</title>"
+ "<style>"
+ "body {"
+ " background-image:url('http://targetsite/images/bg.png');"
+ "}"
+ "</style>"
+ "</head>"
+ "<script>var jslink=\"http://targetsite/webservice.xml\";</script>"
+ "<body>"
+ "<p>Welcome to our website. It doesn't make it easy to get pro"
+ "xified"
+ "<a href=\"http://targetsite/page2.html\">"
+ " An absolute link</a>"
+ "<a target=_blank href='http://targetsite/page3.html\">"
+ " Another abs link</a>"
+ "<a href=\"/docs/page4.html\" />"
+ "</body></html>";
+
+ const char *resp_expected =
+ "HTTP/1.1 200 OK\r\n"
+ "Content-Length: 521\r\n"
+ "Content-Type: text/html\r\n"
+ "Link: <http://proxyhost/proxypath/targetsite/file.xml>; rel=absolute\r\n"
+ "Link: </dir/file.xml>; rel=relative\r\n"
+ "\r\n"
+ "<html><head><title>Hello proxy!</title>"
+ "<style>"
+ "body {"
+ " background-image:url('http://proxyhost/proxypath/targetsite/images/bg.png');"
+ "}"
+ "</style>"
+ "</head>"
+ "<script>var jslink=\"http://proxyhost/proxypath/targetsite/webservice.xml\";</script>"
+ "<body>"
+ "<p>Welcome to our website. It doesn't make it easy to get pro"
+ "xified"
+ "<a href=\"http://proxyhost/proxypath/targetsite/page2.html\">"
+ " An absolute link</a>"
+ "<a target=_blank href='http://proxyhost/proxypath/targetsite/page3.html\">"
+ " Another abs link</a>"
+ "<a href=\"/docs/page4.html\" />"
+ "</body></html>";
+
+ int r;
+ Z_GDU *gdu_res;
+ ODR dec = odr_createmem(ODR_DECODE);
+ odr_setbuf(dec, (char *) resp_buf, strlen(resp_buf), 0);
+ r = z_GDU(dec, &gdu_res, 0, 0);
+
+ BOOST_CHECK(r);
+ if (r)
+ {
+ BOOST_CHECK_EQUAL(gdu_res->which, Z_GDU_HTTP_Response);
+ }
+
+ pack.response() = gdu_res;
+
+ //feed to the router
+ pack.router(router).move();
+
+ //analyze the response
+ Z_GDU *gdu_res_rew = pack.response().get();
+ BOOST_CHECK(gdu_res_rew);
+ BOOST_CHECK_EQUAL(gdu_res_rew->which, Z_GDU_HTTP_Response);
+
+ Z_HTTP_Response *hres = gdu_res_rew->u.HTTP_Response;
+ BOOST_CHECK(hres);
+
+ //compare buffers
+ std::cout << "Expected result:\n" << resp_expected << std::endl;
+
+ ODR enc = odr_createmem(ODR_ENCODE);
+ z_GDU(enc, &gdu_res_rew, 0, 0);
+ char *resp_result;
+ int resp_result_len;
+ resp_result = odr_getbuf(enc, &resp_result_len, 0);
+
+ BOOST_CHECK(resp_result);
+ BOOST_CHECK_EQUAL(resp_result_len, strlen(resp_expected));
+
+ std::cout << "Rewriten result:\n" << resp_result << std::endl;
+ std::cout << "Rewriten result buf len: " << resp_result_len
+ << std::endl;
+
+ BOOST_CHECK(memcmp(resp_result, resp_expected, resp_result_len) == 0);
+
+ odr_destroy(dec);
+ odr_destroy(enc);
+ }
+ catch (std::exception & e) {
+ std::cout << e.what();
+ std::cout << std::endl;
+ BOOST_CHECK (false);
+ }
+}
+
+/*
+BOOST_AUTO_TEST_CASE( test_filter_rewrite_2 )
+{
+ try
+ {
+ std::cout << "Running xml config test case" << std::endl;
+ mp::RouterChain router;
+ mp::filter::HttpRewrite fhr;
+
+ std::string xmlconf =
+ "<?xml version='1.0'?>\n"
+ "<filter xmlns='http://indexdata.com/metaproxy'\n"
+ " id='rewrite1' type='http_rewrite'>\n"
+ " <request>\n"
+ " <rewrite from='"
+ "(?<proto>https?://)(?<pxhost>[^ /?#]+)/(?<pxpath>[^ /]+)"
+ "/(?<host>[^ /]+)(?<path>[^ ]*)'\n"
+ " to='${proto}${host}${path}' />\n"
+ " <rewrite from='(?:Host: )(.*)'\n"
+ " to='Host: ${host}' />\n"
+ " </request>\n"
+ " <response>\n"
+ " <rewrite from='"
+ "(?<proto>https?://)(?<host>[^/?# "'>]+)/(?<path>[^ "'>]+)'\n"
+ " to='${proto}${pxhost}/${pxpath}/${host}/${path}' />\n"
+ " </response>\n"
+ "</filter>\n"
+ ;
+
+ std::cout << xmlconf;
+
+ // reading and parsing XML conf
+ xmlDocPtr doc = xmlParseMemory(xmlconf.c_str(), xmlconf.size());
+ BOOST_CHECK(doc);
+ xmlNode *root_element = xmlDocGetRootElement(doc);
+ fhr.configure(root_element, true, "");
+ xmlFreeDoc(doc);
+
+ router.append(fhr);
+
+ // create an http request
+ mp::Package pack;
+
+ mp::odr odr;
+ Z_GDU *gdu_req = z_get_HTTP_Request_uri(odr,
+ "http://proxyhost/proxypath/targetsite/page1.html", 0, 1);
+
+ pack.request() = gdu_req;
+
+ //create the http response
+
+ const char *resp_buf =
+ "HTTP/1.1 200 OK\r\n"
+ "Content-Length: 50\r\n"
+ "Content-Type: text/html\r\n"
+ "Link: <http://targetsite/file.xml>; rel=absolute\r\n"
+ "Link: </dir/file.xml>; rel=relative\r\n"
+ "\r\n"
+ "<html><head><title>Hello proxy!</title>"
+ "<style>"
+ "body {"
+ " background-image:url('http://targetsite/images/bg.png');"
+ "}"
+ "</style>"
+ "</head>"
+ "<script>var jslink=\"http://targetsite/webservice.xml\";</script>"
+ "<body>"
+ "<p>Welcome to our website. It doesn't make it easy to get pro"
+ "xified"
+ "<a href=\"http://targetsite/page2.html\">"
+ " An absolute link</a>"
+ "<a target=_blank href='http://targetsite/page3.html\">"
+ " Another abs link</a>"
+ "<a href=\"/docs/page4.html\" />"
+ "</body></html>";
+
+ const char *resp_buf_rew =
+ "HTTP/1.1 200 OK\r\n"
+ "Content-Length: 50\r\n"
+ "Content-Type: text/html\r\n"
+ "Link: <http://proxyhost/proxypath/targetsite/file.xml>; rel=absolute\r\n"
+ "Link: </dir/file.xml>; rel=relative\r\n"
+ "\r\n"
+ "<html><head><title>Hello proxy!</title>"
+ "<style>"
+ "body {"
+ " background-image:url('http://proxyhost/proxypath/targetsite/images/bg.png');"
+ "}"
+ "</style>"
+ "</head>"
+ "<script>var jslink=\"http://proxyhost/proxypath/targetsite/webservice.xml\";</script>"
+ "<body>"
+ "<p>Welcome to our website. It doesn't make it easy to get pro"
+ "xified"
+ "<a href=\"http://proxyhost/proxypath/targetsite/page.html\">"
+ " An absolute link</a>"
+ "<a target=_blank href='http://proxyhost/proxypath/targetsite/anotherpage.html\">"
+ " Another abs link</a>"
+ "<a href=\"/docs/page2.html\" />"
+ "</body></html>";
+
+ int r;
+ Z_GDU *gdu_res;
+ ODR odr2 = odr_createmem(ODR_DECODE);
+ odr_setbuf(odr2, (char *) resp_buf, strlen(resp_buf), 0);
+ r = z_GDU(odr2, &gdu_res, 0, 0);
+
+ BOOST_CHECK(r == 0);
+ if (r)
+ {
+ BOOST_CHECK_EQUAL(gdu_res->which, Z_GDU_HTTP_Response);
+ }
+
+ pack.response() = gdu_res;
+
+ //feed to the router
+ pack.router(router).move();
+
+ //analyze the response
+ Z_GDU *gdu_res_rew = pack.response().get();
+ BOOST_CHECK(gdu_res_rew);
+ BOOST_CHECK_EQUAL(gdu_res_rew->which, Z_GDU_HTTP_Response);
+
+ Z_HTTP_Response *hres = gdu_res_rew->u.HTTP_Response;
+ BOOST_CHECK(hres);
+
+ //how to compare the buffers:
+
+ odr_destroy(odr2);
+ }
+ catch (std::exception & e) {
+ std::cout << e.what();
+ std::cout << std::endl;
+ BOOST_CHECK (false);
+ }
+}
+*/
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
--- /dev/null
+/* This file is part of Metaproxy.
+ Copyright (C) 2005-2013 Index Data
+
+Metaproxy is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include "config.hpp"
+#include <iostream>
+#include <stdexcept>
+
+#include "html_parser.hpp"
+#include <metaproxy/util.hpp>
+
+#include <boost/lexical_cast.hpp>
+
+#include <yaz/log.h>
+
+#define BOOST_AUTO_TEST_MAIN
+#define BOOST_TEST_DYN_LINK
+
+#include <boost/test/auto_unit_test.hpp>
+
+using namespace boost::unit_test;
+namespace mp = metaproxy_1;
+
+class MyEvent : public mp::HTMLParserEvent {
+ public:
+ std::string out;
+ void openTagStart(const char *name)
+ {
+ out += "<";
+ out += name;
+ }
+
+ void attribute(const char *tagName,
+ const char *name, const char *value, int val_len)
+ {
+ out += " ";
+ out += name;
+ out += "=\"";
+ out.append(value, val_len);
+ out += "\"";
+ }
+
+ void anyTagEnd(const char *name)
+ {
+ out += ">";
+ }
+
+ void closeTag(const char *name)
+ {
+ out += "</";
+ out += name;
+ }
+
+ void text(const char *value, int len)
+ {
+ out.append(value, len);
+ }
+};
+
+
+BOOST_AUTO_TEST_CASE( test_html_parser_1 )
+{
+ try
+ {
+ mp::HTMLParser hp;
+ const char* html =
+ "<html><body><a t1=v1 t2='v2' t3=\"v3\">some text</a>"
+ "<hr><table ></table ></body></html";
+ const char* expected =
+ "<html><body><a t1=\"v1\" t2=\"v2\" t3=\"v3\">some text</a>"
+ "<hr><table></table></body></html";
+ MyEvent e;
+ hp.parse(e, html);
+ BOOST_CHECK_EQUAL(std::string(expected), e.out);
+ }
+ catch (std::exception & e)
+ {
+ std::cout << e.what();
+ std::cout << std::endl;
+ BOOST_CHECK (false);
+ }
+}
+
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+