From 17e50819d6c5b2596ec54f2ae910b7403f29e976 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Mon, 9 Nov 2015 18:07:18 +0800 Subject: [PATCH 12/18] Avoid processing entities after encoding conversion failures For https://bugzilla.gnome.org/show_bug.cgi?id=756527 and was also raised by Chromium team in the past When we hit a convwersion failure when switching encoding it is bestter to stop parsing there, this was treated as a fatal error but the parser was continuing to process to extract more errors, unfortunately that makes little sense as the data is obviously corrupt and can potentially lead to unexpected behaviour. --- parser.c | 7 +++++-- parserInternals.c | 11 ++++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/parser.c b/parser.c index 134afe7..c79b4e8 100644 --- a/parser.c +++ b/parser.c @@ -10665,7 +10665,8 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) { xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n"); } xmlParseEncodingDecl(ctxt); - if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { + if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || + (ctxt->instate == XML_PARSER_EOF)) { /* * The XML REC instructs us to stop parsing right here */ @@ -10789,6 +10790,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { if (CUR == 0) { xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL); + return(-1); } /* @@ -10806,7 +10808,8 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) { * Note that we will switch encoding on the fly. */ xmlParseXMLDecl(ctxt); - if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { + if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) || + (ctxt->instate == XML_PARSER_EOF)) { /* * The XML REC instructs us to stop parsing right here */ diff --git a/parserInternals.c b/parserInternals.c index df204fd..c8230c1 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -937,6 +937,7 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) { xmlCharEncodingHandlerPtr handler; int len = -1; + int ret; if (ctxt == NULL) return(-1); switch (enc) { @@ -1097,7 +1098,15 @@ xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc) if (handler == NULL) return(-1); ctxt->charset = XML_CHAR_ENCODING_UTF8; - return(xmlSwitchToEncodingInt(ctxt, handler, len)); + ret = xmlSwitchToEncodingInt(ctxt, handler, len); + if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) { + /* + * on encoding conversion errors, stop the parser + */ + xmlStopParser(ctxt); + ctxt->errNo = XML_I18N_CONV_FAILED; + } + return(ret); } /** -- 2.5.0