|
| 1 | +From ddc5f3d22644e0f6fbcc20541c86825757ffee62 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Mike Dalessio < [email protected]> |
| 3 | +Date: Mon, 21 Feb 2022 18:27:45 -0500 |
| 4 | +Subject: [PATCH] Revert "Different approach to fix quadratic behavior in HTML |
| 5 | + push parser" |
| 6 | + |
| 7 | +This reverts commit 798bdf13f6964a650b9a0b7b4b3a769f6f1d509a. |
| 8 | +--- |
| 9 | + HTMLparser.c | 14 +------------- |
| 10 | + 1 file changed, 1 insertion(+), 13 deletions(-) |
| 11 | + |
| 12 | +diff --git a/HTMLparser.c b/HTMLparser.c |
| 13 | +index eba2d7c..c0b8119 100644 |
| 14 | +--- a/HTMLparser.c |
| 15 | ++++ b/HTMLparser.c |
| 16 | +@@ -3960,25 +3960,13 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { |
| 17 | + htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, |
| 18 | + "htmlParseStartTag: invalid element name\n", |
| 19 | + NULL, NULL); |
| 20 | +- /* |
| 21 | +- * The recovery code is disabled for now as it can result in |
| 22 | +- * quadratic behavior with the push parser. htmlParseStartTag |
| 23 | +- * must consume all content up to the final '>' in order to avoid |
| 24 | +- * rescanning for this terminator. |
| 25 | +- * |
| 26 | +- * For a proper fix in line with HTML5, htmlParseStartTag and |
| 27 | +- * htmlParseElement should only be called when there's an ASCII |
| 28 | +- * alpha character following the initial '<'. Otherwise, the '<' |
| 29 | +- * should be emitted as text (unless followed by '!', '/' or '?'). |
| 30 | +- */ |
| 31 | +-#if 0 |
| 32 | + /* if recover preserve text on classic misconstructs */ |
| 33 | + if ((ctxt->recovery) && ((IS_BLANK_CH(CUR)) || (CUR == '<') || |
| 34 | + (CUR == '=') || (CUR == '>') || (((CUR >= '0') && (CUR <= '9'))))) { |
| 35 | + htmlParseCharDataInternal(ctxt, '<'); |
| 36 | + return(-1); |
| 37 | + } |
| 38 | +-#endif |
| 39 | ++ |
| 40 | + |
| 41 | + /* Dump the bogus tag like browsers do */ |
| 42 | + while ((CUR != 0) && (CUR != '>') && |
| 43 | +-- |
| 44 | +2.31.0 |
| 45 | + |
0 commit comments