X-Git-Url: http://lists.indexdata.com/cgi-bin?a=blobdiff_plain;f=src%2Fhtml_parser.cpp;h=8c4426d622aec429df53228378c1180b08c4c08b;hb=1751c7afd96e1984f5710284cdcb245a93d9ce6c;hp=1436553f691048fa1b9cd66905ea2fa1c4d0ed5a;hpb=897639233e3a6232d039666ba38b393bf7ac0ef0;p=metaproxy-moved-to-github.git diff --git a/src/html_parser.cpp b/src/html_parser.cpp index 1436553..8c4426d 100644 --- a/src/html_parser.cpp +++ b/src/html_parser.cpp @@ -44,7 +44,7 @@ namespace metaproxy_1 { const char *cp); int skipAttribute(HTMLParserEvent &event, const char *cp, int *attr_len, - const char **value, int *val_len); + const char **value, int *val_len, int *tr); Rep(); ~Rep(); int m_verbose; @@ -99,8 +99,10 @@ static int skipName(const char *cp) int mp::HTMLParser::Rep::skipAttribute(HTMLParserEvent &event, const char *cp, int *attr_len, - const char **value, int *val_len) + const char **value, int *val_len, + int *tr) { + int v0, v1; int i = skipName(cp); *attr_len = i; *value = NULL; @@ -109,14 +111,13 @@ int mp::HTMLParser::Rep::skipAttribute(HTMLParserEvent &event, i += skipSpace(cp + i); if (cp[i] == '=') { - int v0, v1; i++; i += skipSpace(cp + i); if (cp[i] == '\"' || cp[i] == '\'') { - char tr = cp[i]; + *tr = cp[i]; v0 = ++i; - while (cp[i] != tr && cp[i]) + while (cp[i] != *tr && cp[i]) i++; v1 = i; if (cp[i]) @@ -124,6 +125,7 @@ int mp::HTMLParser::Rep::skipAttribute(HTMLParserEvent &event, } else { + *tr = 0; v0 = i; while (cp[i] && !strchr(SPACECHR ">", cp[i])) i++; @@ -131,8 +133,8 @@ int mp::HTMLParser::Rep::skipAttribute(HTMLParserEvent &event, } *value = cp + v0; *val_len = v1 - v0; + i += skipSpace(cp + i); } - i += skipSpace(cp + i); return i; } @@ -147,19 +149,19 @@ int mp::HTMLParser::Rep::tagAttrs(HTMLParserEvent &event, int attr_len; const char *value; int val_len; - int nor = skipAttribute(event, cp+i, &attr_len, &value, &val_len); + int tr; + char x[2]; + int nor = skipAttribute(event, cp+i, &attr_len, &value, &val_len, &tr); + if (!nor) + break; i += nor; - if (nor) - { - if (m_verbose) - printf ("------ attr %.*s=%.*s\n", attr_len, attr_name, - val_len, value); - event.attribute(name, len, attr_name, attr_len, value, val_len); - } - else - { - i++; - } + + x[0] = tr; + x[1] = 0; + if (m_verbose) + printf ("------ attr %.*s=%.*s\n", attr_len, attr_name, + val_len, value); + event.attribute(name, len, attr_name, attr_len, value, val_len, x); } return i; } @@ -216,7 +218,11 @@ int mp::HTMLParser::Rep::tagEnd(HTMLParserEvent &event, for (; cp[i] && cp[i] != '/' && cp[i] != '>'; i++) ; if (i > 0) + { + if (m_verbose) + printf("------ text %.*s\n", i, cp); event.text(cp, i); + } if (cp[i] == '/') { close_it = 1; @@ -224,6 +230,9 @@ int mp::HTMLParser::Rep::tagEnd(HTMLParserEvent &event, } if (cp[i] == '>') { + if (m_verbose) + printf("------ any tag %s %.*s\n", + close_it ? " close" : "end", tag_len, tag); event.anyTagEnd(tag, tag_len, close_it); i++; }