X-Git-Url: http://lists.indexdata.com/cgi-bin?a=blobdiff_plain;f=src%2Fhtml_parser.cpp;h=8c4426d622aec429df53228378c1180b08c4c08b;hb=bb6b58cbec0e41a8ba2403e540185e77882e8741;hp=47b2e148fedb407a35d89287ae57d3324f1fdba4;hpb=68250b45b381d70615236e9160e683c2ba8bf53a;p=metaproxy-moved-to-github.git diff --git a/src/html_parser.cpp b/src/html_parser.cpp index 47b2e14..8c4426d 100644 --- a/src/html_parser.cpp +++ b/src/html_parser.cpp @@ -42,6 +42,9 @@ namespace metaproxy_1 { int tagAttrs(HTMLParserEvent &event, const char *name, int len, const char *cp); + int skipAttribute(HTMLParserEvent &event, + const char *cp, int *attr_len, + const char **value, int *val_len, int *tr); Rep(); ~Rep(); int m_verbose; @@ -94,9 +97,12 @@ static int skipName(const char *cp) return i; } -static int skipAttribute(const char *cp, int *attr_len, - const char **value, int *val_len) +int mp::HTMLParser::Rep::skipAttribute(HTMLParserEvent &event, + const char *cp, int *attr_len, + const char **value, int *val_len, + int *tr) { + int v0, v1; int i = skipName(cp); *attr_len = i; *value = NULL; @@ -105,14 +111,13 @@ static int skipAttribute(const char *cp, int *attr_len, i += skipSpace(cp + i); if (cp[i] == '=') { - int v0, v1; i++; i += skipSpace(cp + i); if (cp[i] == '\"' || cp[i] == '\'') { - char tr = cp[i]; + *tr = cp[i]; v0 = ++i; - while (cp[i] != tr && cp[i]) + while (cp[i] != *tr && cp[i]) i++; v1 = i; if (cp[i]) @@ -120,6 +125,7 @@ static int skipAttribute(const char *cp, int *attr_len, } else { + *tr = 0; v0 = i; while (cp[i] && !strchr(SPACECHR ">", cp[i])) i++; @@ -127,8 +133,8 @@ static int skipAttribute(const char *cp, int *attr_len, } *value = cp + v0; *val_len = v1 - v0; + i += skipSpace(cp + i); } - i += skipSpace(cp + i); return i; } @@ -143,19 +149,19 @@ int mp::HTMLParser::Rep::tagAttrs(HTMLParserEvent &event, int attr_len; const char *value; int val_len; - int nor = skipAttribute(cp+i, &attr_len, &value, &val_len); + int tr; + char x[2]; + int nor = skipAttribute(event, cp+i, &attr_len, &value, &val_len, &tr); + if (!nor) + break; i += nor; - if (nor) - { - if (m_verbose) - printf ("------ attr %.*s=%.*s\n", attr_len, attr_name, - val_len, value); - event.attribute(name, len, attr_name, attr_len, value, val_len); - } - else - { - i++; - } + + x[0] = tr; + x[1] = 0; + if (m_verbose) + printf ("------ attr %.*s=%.*s\n", attr_len, attr_name, + val_len, value); + event.attribute(name, len, attr_name, attr_len, value, val_len, x); } return i; } @@ -209,14 +215,24 @@ int mp::HTMLParser::Rep::tagEnd(HTMLParserEvent &event, { int i = 0; int close_it = 0; - while (cp[i] && cp[i] != '>') + for (; cp[i] && cp[i] != '/' && cp[i] != '>'; i++) + ; + if (i > 0) + { + if (m_verbose) + printf("------ text %.*s\n", i, cp); + event.text(cp, i); + } + if (cp[i] == '/') { - if (cp[i] == '/') - close_it = 1; + close_it = 1; i++; } if (cp[i] == '>') { + if (m_verbose) + printf("------ any tag %s %.*s\n", + close_it ? " close" : "end", tag_len, tag); event.anyTagEnd(tag, tag_len, close_it); i++; }