From: Adam Dickmeiss Date: Wed, 19 Jun 2013 08:26:59 +0000 (+0200) Subject: Merge branch 'master' into rewrite-filter X-Git-Tag: v1.3.59~57^2~5 X-Git-Url: http://lists.indexdata.com/cgi-bin?a=commitdiff_plain;h=e17fbdb735a7524ba17ff13938707fcaad7f5470;hp=fef99337bb6e6f908982427797c66ae775f9ae1c;p=metaproxy-moved-to-github.git Merge branch 'master' into rewrite-filter --- diff --git a/src/.gitignore b/src/.gitignore index caad553..a9d9820 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -1,39 +1,5 @@ -.libs -.deps -*.lo -*.la -stamp-h* +* +!*.hpp +!*.cpp +!*.am config.hpp -socket -Makefile -Makefile.in -config.hpp.in -ex_filter_frontend_net -ex_router_flexml -test_boost_threads -test_boost_time -test_filter_auth_simple -test_filter1 -test_filter2 -test_filter_frontend_net -test_filter_log -test_filter_multi -test_filter_query_rewrite -test_package1 -test_pipe -test_thread_pool_observer -test_session1 -test_session2 -test_filter_factory -test_filter_z3950_client -test_filter_backend_test -test_filter_virt_db -test_router_flexml -test_ses_map -tstdl -metaproxy -test_filter_bounce -test_filter_record_transform -test_filter_sru_to_z3950 -*.o -metaproxy-config diff --git a/src/Makefile.am b/src/Makefile.am index 564f7b8..20ed9be 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -21,6 +21,7 @@ filter_src = \ filter_http_client.cpp filter_http_client.hpp \ filter_http_file.cpp filter_http_file.hpp \ filter_http_rewrite1.cpp filter_http_rewrite1.hpp \ + filter_http_rewrite.cpp filter_http_rewrite.hpp \ filter_limit.cpp filter_limit.hpp \ filter_load_balance.cpp filter_load_balance.hpp \ filter_log.cpp filter_log.hpp \ @@ -57,6 +58,7 @@ libmetaproxy_la_SOURCES = \ torus.cpp torus.hpp \ url_recipe.cpp \ util.cpp \ + html_parser.hpp html_parser.cpp \ router_chain.cpp \ router_flexml.hpp router_flexml.cpp \ router_xml.cpp \ @@ -109,8 +111,10 @@ check_PROGRAMS = \ test_filter_record_transform \ test_filter_sru_to_z3950 \ test_filter_virt_db \ + test_filter_rewrite \ test_ses_map \ test_router_flexml \ + test_html_parser \ test_xmlutil TESTS=$(check_PROGRAMS) @@ -139,6 +143,9 @@ test_ses_map_SOURCES = test_ses_map.cpp test_router_flexml_SOURCES = test_router_flexml.cpp test_xmlutil_SOURCES = test_xmlutil.cpp +test_html_parser_SOURCES = test_html_parser.cpp +test_filter_rewrite_SOURCES = test_filter_rewrite.cpp + # doxygen target dox: (cd $(top_srcdir) ; make dox) diff --git a/src/factory_static.cpp b/src/factory_static.cpp index 3d88a82..95d0a62 100644 --- a/src/factory_static.cpp +++ b/src/factory_static.cpp @@ -54,6 +54,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "filter_z3950_client.hpp" #include "filter_zeerex_explain.hpp" #include "filter_zoom.hpp" +#include "filter_http_rewrite.hpp" namespace mp = metaproxy_1; @@ -87,6 +88,7 @@ mp::FactoryStatic::FactoryStatic() &metaproxy_1_filter_z3950_client, &metaproxy_1_filter_zeerex_explain, &metaproxy_1_filter_zoom, + &metaproxy_1_filter_http_rewrite, 0 }; int i; diff --git a/src/filter_http_rewrite.cpp b/src/filter_http_rewrite.cpp new file mode 100644 index 0000000..55ae35c --- /dev/null +++ b/src/filter_http_rewrite.cpp @@ -0,0 +1,448 @@ +/* This file is part of Metaproxy. + Copyright (C) 2005-2013 Index Data + +Metaproxy is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "config.hpp" +#include +#include +#include +#include "filter_http_rewrite.hpp" + +#include +#include + +#include +#include + +#include +#include + +#if HAVE_SYS_TYPES_H +#include +#endif + +namespace mp = metaproxy_1; +namespace yf = mp::filter; + +namespace metaproxy_1 { + namespace filter { + class HttpRewrite::RuleScope { + public: + std::vector tags; + std::vector attrs; + std::string content_type; + }; + class HttpRewrite::Rule { + public: + enum Section { METHOD, HEADER, BODY }; + std::string regex; + std::string recipe; + std::map group_index; + std::vector scopes; + Section section; + const std::string search_replace( + std::map & vars, + const std::string & txt) const; + std::string sub_vars ( + const std::map & vars) const; + void parse_groups(); + }; + class HttpRewrite::Rules { + public: + std::vector rules; + void rewrite_reqline (mp::odr & o, Z_HTTP_Request *hreq, + std::map & vars) const; + void rewrite_headers(mp::odr & o, Z_HTTP_Header *headers, + std::map & vars) const; + void rewrite_body (mp::odr & o, + char **content_buf, int *content_len, + std::map & vars) const; + const std::string test_patterns( + std::map & vars, + const std::string & txt) const; + }; + } +} + +yf::HttpRewrite::HttpRewrite() : req_rules(new Rules), res_rules(new Rules) +{ +} + +yf::HttpRewrite::~HttpRewrite() +{ +} + +void yf::HttpRewrite::process(mp::Package & package) const +{ + yaz_log(YLOG_LOG, "HttpRewrite begins...."); + Z_GDU *gdu = package.request().get(); + //map of request/response vars + std::map vars; + //we have an http req + if (gdu && gdu->which == Z_GDU_HTTP_Request) + { + Z_HTTP_Request *hreq = gdu->u.HTTP_Request; + mp::odr o; + req_rules->rewrite_reqline(o, hreq, vars); + yaz_log(YLOG_LOG, ">> Request headers"); + req_rules->rewrite_headers(o, hreq->headers, vars); + req_rules->rewrite_body(o, + &hreq->content_buf, &hreq->content_len, + vars); + package.request() = gdu; + } + package.move(); + gdu = package.response().get(); + if (gdu && gdu->which == Z_GDU_HTTP_Response) + { + Z_HTTP_Response *hres = gdu->u.HTTP_Response; + yaz_log(YLOG_LOG, "Response code %d", hres->code); + mp::odr o; + yaz_log(YLOG_LOG, "<< Respose headers"); + res_rules->rewrite_headers(o, hres->headers, vars); + res_rules->rewrite_body(o, &hres->content_buf, + &hres->content_len, vars); + package.response() = gdu; + } +} + +void yf::HttpRewrite::Rules::rewrite_reqline (mp::odr & o, + Z_HTTP_Request *hreq, + std::map & vars) const +{ + //rewrite the request line + std::string path; + if (strstr(hreq->path, "http://") == hreq->path) + { + yaz_log(YLOG_LOG, "Path in the method line is absolute, " + "possibly a proxy request"); + path += hreq->path; + } + else + { + //TODO what about proto + path += "http://"; + path += z_HTTP_header_lookup(hreq->headers, "Host"); + path += hreq->path; + } + yaz_log(YLOG_LOG, "Proxy request URL is %s", path.c_str()); + std::string npath = + test_patterns(vars, path); + if (!npath.empty()) + { + yaz_log(YLOG_LOG, "Rewritten request URL is %s", npath.c_str()); + hreq->path = odr_strdup(o, npath.c_str()); + } +} + +void yf::HttpRewrite::Rules::rewrite_headers(mp::odr & o, + Z_HTTP_Header *headers, + std::map & vars) const +{ + for (Z_HTTP_Header *header = headers; + header != 0; + header = header->next) + { + std::string sheader(header->name); + sheader += ": "; + sheader += header->value; + yaz_log(YLOG_LOG, "%s: %s", header->name, header->value); + std::string out = test_patterns(vars, sheader); + if (!out.empty()) + { + size_t pos = out.find(": "); + if (pos == std::string::npos) + { + yaz_log(YLOG_LOG, "Header malformed during rewrite, ignoring"); + continue; + } + header->name = odr_strdup(o, out.substr(0, pos).c_str()); + header->value = odr_strdup(o, out.substr(pos+2, + std::string::npos).c_str()); + } + } +} + +void yf::HttpRewrite::Rules::rewrite_body (mp::odr & o, + char **content_buf, + int *content_len, + std::map & vars) const +{ + if (*content_buf) + { + std::string body(*content_buf); + std::string nbody = + test_patterns(vars, body); + if (!nbody.empty()) + { + *content_buf = odr_strdup(o, nbody.c_str()); + *content_len = nbody.size(); + } + } +} + +/** + * Tests pattern from the vector in order and executes recipe on + the first match. + */ +const std::string yf::HttpRewrite::Rules::test_patterns( + std::map & vars, + const std::string & txt) const +{ + for (unsigned i = 0; i < rules.size(); i++) + { + std::string out = rules[i].search_replace(vars, txt); + if (!out.empty()) return out; + } + return ""; +} + +const std::string yf::HttpRewrite::Rule::search_replace( + std::map & vars, + const std::string & txt) const +{ + //exec regex against value + boost::regex re(regex); + boost::smatch what; + std::string::const_iterator start, end; + start = txt.begin(); + end = txt.end(); + std::string out; + while (regex_search(start, end, what, re)) //find next full match + { + unsigned i; + for (i = 1; i < what.size(); ++i) + { + //check if the group is named + std::map::const_iterator it + = group_index.find(i); + if (it != group_index.end()) + { //it is + if (!what[i].str().empty()) + vars[it->second] = what[i]; + } + + } + //prepare replacement string + std::string rvalue = sub_vars(vars); + yaz_log(YLOG_LOG, "! Rewritten '%s' to '%s'", + what.str(0).c_str(), rvalue.c_str()); + out.append(start, what[0].first); + out.append(rvalue); + start = what[0].second; //move search forward + } + //if we had a match cat the last part + if (start != txt.begin()) + out.append(start, end); + return out; +} + +void yf::HttpRewrite::Rule::parse_groups() +{ + int gnum = 0; + bool esc = false; + const std::string & str = regex; + yaz_log(YLOG_LOG, "Parsing groups from '%s'", str.c_str()); + for (unsigned i = 0; i < str.size(); ++i) + { + if (!esc && str[i] == '\\') + { + esc = true; + continue; + } + if (!esc && str[i] == '(') //group starts + { + gnum++; + if (i+1 < str.size() && str[i+1] == '?') //group with attrs + { + i++; + if (i+1 < str.size() && str[i+1] == ':') //non-capturing + { + if (gnum > 0) gnum--; + i++; + continue; + } + if (i+1 < str.size() && str[i+1] == 'P') //optional, python + i++; + if (i+1 < str.size() && str[i+1] == '<') //named + { + i++; + std::string gname; + bool term = false; + while (++i < str.size()) + { + if (str[i] == '>') { term = true; break; } + if (!isalnum(str[i])) + throw mp::filter::FilterException + ("Only alphanumeric chars allowed, found " + " in '" + + str + + "' at " + + boost::lexical_cast(i)); + gname += str[i]; + } + if (!term) + throw mp::filter::FilterException + ("Unterminated group name '" + gname + + " in '" + str +"'"); + group_index[gnum] = gname; + yaz_log(YLOG_LOG, "Found named group '%s' at $%d", + gname.c_str(), gnum); + } + } + } + esc = false; + } +} + +std::string yf::HttpRewrite::Rule::sub_vars ( + const std::map & vars) const +{ + std::string out; + bool esc = false; + const std::string & in = recipe; + for (unsigned i = 0; i < in.size(); ++i) + { + if (!esc && in[i] == '\\') + { + esc = true; + continue; + } + if (!esc && in[i] == '$') //var + { + if (i+1 < in.size() && in[i+1] == '{') //ref prefix + { + ++i; + std::string name; + bool term = false; + while (++i < in.size()) + { + if (in[i] == '}') { term = true; break; } + name += in[i]; + } + if (!term) throw mp::filter::FilterException + ("Unterminated var ref in '"+in+"' at " + + boost::lexical_cast(i)); + std::map::const_iterator it + = vars.find(name); + if (it != vars.end()) + { + out += it->second; + } + } + else + { + throw mp::filter::FilterException + ("Malformed or trimmed var ref in '" + +in+"' at "+boost::lexical_cast(i)); + } + continue; + } + //passthru + out += in[i]; + esc = false; + } + return out; +} + +void yf::HttpRewrite::configure_rules(const xmlNode *ptr, + Rules & rules) +{ + for (ptr = ptr->children; ptr; ptr = ptr->next) + { + if (ptr->type != XML_ELEMENT_NODE) + continue; + else if (!strcmp((const char *) ptr->name, "rewrite")) + { + Rule rule; + const struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + { + if (!strcmp((const char *) attr->name, "from")) + rule.regex = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "to")) + rule.recipe = mp::xml::get_text(attr->children); + else + throw mp::filter::FilterException + ("Bad attribute " + + std::string((const char *) attr->name) + + " in rewrite section of http_rewrite"); + } + yaz_log(YLOG_LOG, "Found rewrite rule from '%s' to '%s'", + rule.regex.c_str(), rule.recipe.c_str()); + rule.parse_groups(); + if (!rule.regex.empty()) + rules.rules.push_back(rule); + } + else + { + throw mp::filter::FilterException + ("Bad element o" + + std::string((const char *) ptr->name) + + " in http_rewrite1 filter"); + } + } +} + +void yf::HttpRewrite::configure(const xmlNode * ptr, bool test_only, + const char *path) +{ + for (ptr = ptr->children; ptr; ptr = ptr->next) + { + if (ptr->type != XML_ELEMENT_NODE) + continue; + else if (!strcmp((const char *) ptr->name, "request")) + { + configure_rules(ptr, *req_rules); + } + else if (!strcmp((const char *) ptr->name, "response")) + { + configure_rules(ptr, *res_rules); + } + else + { + throw mp::filter::FilterException + ("Bad element " + + std::string((const char *) ptr->name) + + " in http_rewrite1 filter"); + } + } +} + +static mp::filter::Base* filter_creator() +{ + return new mp::filter::HttpRewrite; +} + +extern "C" { + struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite = { + 0, + "http_rewrite", + filter_creator + }; +} + + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/filter_http_rewrite.hpp b/src/filter_http_rewrite.hpp new file mode 100644 index 0000000..d611142 --- /dev/null +++ b/src/filter_http_rewrite.hpp @@ -0,0 +1,59 @@ +/* This file is part of Metaproxy. + Copyright (C) 2005-2013 Index Data + +Metaproxy is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifndef FILTER_HTTP_REWRITE_HPP +#define FILTER_HTTP_REWRITE_HPP + +#include +#include + +namespace mp = metaproxy_1; + +namespace metaproxy_1 { + namespace filter { + class HttpRewrite : public Base { + class Rules; + class Rule; + class RuleScope; + boost::scoped_ptr req_rules; + boost::scoped_ptr res_rules; + void configure_rules(const xmlNode *ptr, Rules & rules); + public: + HttpRewrite(); + ~HttpRewrite(); + void process(metaproxy_1::Package & package) const; + void configure(const xmlNode * ptr, + bool test_only, const char *path); + }; + } +} + +extern "C" { + extern struct metaproxy_1_filter_struct metaproxy_1_filter_http_rewrite; +} + +#endif +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/html_parser.cpp b/src/html_parser.cpp new file mode 100644 index 0000000..8d91a2c --- /dev/null +++ b/src/html_parser.cpp @@ -0,0 +1,248 @@ +/* This file is part of Metaproxy. + Copyright (C) 2005-2013 Index Data + +Metaproxy is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "config.hpp" +#include "html_parser.hpp" + +#include +#include +#include +#include +#include + +#define TAG_MAX_LEN 64 + +#define SPACECHR " \t\r\n\f" + +#define DEBUG(x) x + +#if HAVE_SYS_TYPES_H +#include +#endif + +namespace mp = metaproxy_1; + +mp::HTMLParser::HTMLParser() +{ +} + +mp::HTMLParser::~HTMLParser() +{ +} + +static void parse_str(mp::HTMLParserEvent & event, const char * str); + +void mp::HTMLParser::parse(mp::HTMLParserEvent & event, const char *str) const +{ + parse_str(event, str); +} + +//static C functions follow would probably make sense to wrap this in PIMPL? + +static char* dupe (const char *buff, int len) +{ + char *value = (char *) malloc (len + 1); + assert (value); + memcpy (value, buff, len); + value[len] = '\0'; + return value; +} + +static int skipSpace (const char *cp) +{ + int i = 0; + while (cp[i] && strchr (SPACECHR, cp[i])) + i++; + return i; +} + +static int skipName (const char *cp, char *dst) +{ + int i; + int j = 0; + for (i=0; cp[i] && !strchr (SPACECHR "/>=", cp[i]); i++) + if (j < TAG_MAX_LEN-1) + { + dst[j] = tolower(cp[j]); + j++; + } + dst[j] = '\0'; + return i; +} + +static int skipAttribute (const char *cp, char *name, const char **value, int *val_len) +{ + int i = skipName (cp, name); + *value = NULL; + if (!i) + return skipSpace (cp); + i += skipSpace (cp + i); + if (cp[i] == '=') + { + int v0, v1; + i++; + i += skipSpace (cp + i); + if (cp[i] == '\"' || cp[i] == '\'') + { + char tr = cp[i]; + v0 = ++i; + while (cp[i] != tr && cp[i]) + i++; + v1 = i; + if (cp[i]) + i++; + } + else + { + v0 = i; + while (cp[i] && !strchr (SPACECHR ">", cp[i])) + i++; + v1 = i; + } + *value = cp + v0; + *val_len = v1 - v0; + } + i += skipSpace (cp + i); + return i; +} + +static int tagAttrs (mp::HTMLParserEvent & event, + const char *tagName, + const char *cp) +{ + int i; + char attr_name[TAG_MAX_LEN]; + const char *attr_value; + int val_len; + i = skipSpace (cp); + while (cp[i] && cp[i] != '>') + { + int nor = skipAttribute (cp+i, attr_name, &attr_value, &val_len); + i += nor; + if (nor) + { + DEBUG(printf ("------ attr %s=%s\n", attr_name, dupe(attr_value, val_len))); + event.attribute(tagName, attr_name, attr_value, val_len); + } + else + { + if (!nor) + i++; + } + } + return i; +} + +static int tagStart (mp::HTMLParserEvent & event, + char *tagName, const char *cp, const char which) +{ + int i = 0; + i = skipName (cp, tagName); + switch (which) + { + case '/' : + DEBUG(printf ("------ tag close %s\n", tagName)); + event.closeTag(tagName); + break; + case '!' : + DEBUG(printf ("------ dtd %s\n", tagName)); + break; + case '?' : + DEBUG(printf ("------ pi %s\n", tagName)); + break; + default : + DEBUG(printf ("------ tag open %s\n", tagName)); + event.openTagStart(tagName); + break; + } + return i; +} + +static int tagEnd (mp::HTMLParserEvent & event, const char *tagName, const char *cp) +{ + int i = 0; + while (cp[i] && cp[i] != '>') + i++; + if (cp[i] == '>') + { + event.anyTagEnd(tagName); + i++; + } + return i; +} + +static void tagText (mp::HTMLParserEvent & event, const char *text_start, const char *text_end) +{ + if (text_end - text_start) //got text to flush + { + DEBUG(printf ("------ text %s\n", dupe(text_start, text_end-text_start))); + event.text(text_start, text_end-text_start); + } +} + +static void parse_str (mp::HTMLParserEvent & event, const char *cp) +{ + const char *text_start = cp; + const char *text_end = cp; + while (*cp) + { + if (cp[0] == '<' && cp[1]) //tag? + { + char which = cp[1]; + if (which == '/') cp++; + if (!strchr (SPACECHR, cp[1])) //valid tag starts + { + tagText (event, text_start, text_end); //flush any text + char tagName[TAG_MAX_LEN]; + cp++; + if (which == '/') + { + cp += tagStart (event, tagName, cp, which); + } + else if (which == '!' || which == '?') //pi or dtd + { + cp++; + cp += tagStart (event, tagName, cp, which); + } + else + { + cp += tagStart (event, tagName, cp, which); + cp += tagAttrs (event, tagName, cp); + } + cp += tagEnd (event, tagName, cp); + text_start = cp; + text_end = cp; + continue; + } + } + //text + cp++; + text_end = cp; + } + tagText (event, text_start, text_end); //flush any text +} + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/html_parser.hpp b/src/html_parser.hpp new file mode 100644 index 0000000..ad46061 --- /dev/null +++ b/src/html_parser.hpp @@ -0,0 +1,53 @@ +/* This file is part of Metaproxy. + Copyright (C) 2005-2013 Index Data + +Metaproxy is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifndef HTML_PARSER_HPP +#define HTML_PARSER_HPP + +#include + +namespace metaproxy_1 { + class HTMLParserEvent { + public: + virtual void openTagStart(const char *name) = 0; + virtual void anyTagEnd(const char *name) = 0; + virtual void attribute(const char *tagName, + const char *name, + const char *value, + int val_len) = 0; + virtual void closeTag(const char *name) = 0; + virtual void text(const char *value, int len) = 0; + }; + class HTMLParser { + public: + HTMLParser(); + ~HTMLParser(); + void parse(HTMLParserEvent & event, const char *str) const; + }; +} + +#endif +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + diff --git a/src/test_filter_rewrite.cpp b/src/test_filter_rewrite.cpp new file mode 100644 index 0000000..90c17d6 --- /dev/null +++ b/src/test_filter_rewrite.cpp @@ -0,0 +1,358 @@ +/* This file is part of Metaproxy. + Copyright (C) 2005-2013 Index Data + +Metaproxy is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "config.hpp" +#include +#include + +#include "filter_http_client.hpp" +#include "filter_http_rewrite.hpp" +#include +#include +#include + +#include +#include + +#include + +#define BOOST_AUTO_TEST_MAIN +#define BOOST_TEST_DYN_LINK + +#include + +using namespace boost::unit_test; +namespace mp = metaproxy_1; +/* + * The global testconfig is commented out, as it won't even compile + * on old Centos5 machines +struct TestConfig { + TestConfig() + { + std::cout << "global setup\n"; + yaz_log_init_level(YLOG_ALL); + } + ~TestConfig() + { + std::cout << "global teardown\n"; + } +}; + +BOOST_GLOBAL_FIXTURE( TestConfig ); +*/ + +BOOST_AUTO_TEST_CASE( test_filter_rewrite_1 ) +{ + try + { + std::cout << "Running non-xml config test case" << std::endl; + mp::RouterChain router; + mp::filter::HttpRewrite fhr; + + std::string xmlconf = + "\n" + "\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + "\n" + ; + + std::cout << xmlconf; + + // reading and parsing XML conf + xmlDocPtr doc = xmlParseMemory(xmlconf.c_str(), xmlconf.size()); + BOOST_CHECK(doc); + xmlNode *root_element = xmlDocGetRootElement(doc); + fhr.configure(root_element, true, ""); + xmlFreeDoc(doc); + + router.append(fhr); + + // create an http request + mp::Package pack; + + mp::odr odr; + Z_GDU *gdu_req = z_get_HTTP_Request_uri(odr, + "http://proxyhost/proxypath/targetsite/page1.html", 0, 1); + + pack.request() = gdu_req; + + //create the http response + + const char *resp_buf = + "HTTP/1.1 200 OK\r\n" + "Content-Length: 441\r\n" + "Content-Type: text/html\r\n" + "Link: ; rel=absolute\r\n" + "Link: ; rel=relative\r\n" + "\r\n" + "Hello proxy!" + "" + "" + "" + "" + "

Welcome to our website. It doesn't make it easy to get pro" + "xified" + "" + " An absolute link" + "" + "" + "" + "" + "

Welcome to our website. It doesn't make it easy to get pro" + "xified" + "" + " An absolute link" + "\n" + "\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + " \n" + "\n" + ; + + std::cout << xmlconf; + + // reading and parsing XML conf + xmlDocPtr doc = xmlParseMemory(xmlconf.c_str(), xmlconf.size()); + BOOST_CHECK(doc); + xmlNode *root_element = xmlDocGetRootElement(doc); + fhr.configure(root_element, true, ""); + xmlFreeDoc(doc); + + router.append(fhr); + + // create an http request + mp::Package pack; + + mp::odr odr; + Z_GDU *gdu_req = z_get_HTTP_Request_uri(odr, + "http://proxyhost/proxypath/targetsite/page1.html", 0, 1); + + pack.request() = gdu_req; + + //create the http response + + const char *resp_buf = + "HTTP/1.1 200 OK\r\n" + "Content-Length: 50\r\n" + "Content-Type: text/html\r\n" + "Link: ; rel=absolute\r\n" + "Link: ; rel=relative\r\n" + "\r\n" + "Hello proxy!" + "" + "" + "" + "" + "

Welcome to our website. It doesn't make it easy to get pro" + "xified" + "" + " An absolute link" + "" + "" + "" + "" + "

Welcome to our website. It doesn't make it easy to get pro" + "xified" + "" + " An absolute link" + "some text" + "


some text" + "