Skip to content

Commit 25b61a4

Browse files
committed
fix: allow DOCTYPE with disable_entities=True (default)
Simplify disable_entities overly defensive parser config. Fixes #397.
1 parent a2a9ab7 commit 25b61a4

File tree

2 files changed

+39
-34
lines changed

2 files changed

+39
-34
lines changed

tests/test_xmltodict.py

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ def test_disable_entities_true_rejects_xmlbomb():
419419
]>
420420
<bomb>&c;</bomb>
421421
"""
422-
with pytest.raises(expat.ExpatError, match="entities are disabled"):
422+
with pytest.raises(ValueError, match="entities are disabled"):
423423
parse(xml, disable_entities=True)
424424

425425

@@ -437,39 +437,57 @@ def test_disable_entities_false_returns_xmlbomb():
437437
assert parse(xml, disable_entities=False) == expectedResult
438438

439439

440-
def test_disable_entities_true_rejects_external_dtd():
440+
def test_external_entity():
441441
xml = """
442442
<!DOCTYPE external [
443443
<!ENTITY ee SYSTEM "http://www.python.org/">
444444
]>
445445
<root>&ee;</root>
446446
"""
447-
with pytest.raises(expat.ExpatError, match="entities are disabled"):
448-
parse(xml, disable_entities=True)
447+
with pytest.raises(ValueError, match="entities are disabled"):
448+
parse(xml)
449+
assert parse(xml, disable_entities=False) == {"root": None}
449450

450451

451-
def test_disable_entities_true_attempts_external_dtd():
452+
def test_external_entity_with_custom_expat():
452453
xml = """
453454
<!DOCTYPE external [
454455
<!ENTITY ee SYSTEM "http://www.python.org/">
455456
]>
456457
<root>&ee;</root>
457458
"""
458459

459-
def raising_external_ref_handler(*args, **kwargs):
460-
parser = ParserCreate(*args, **kwargs)
461-
parser.ExternalEntityRefHandler = lambda *x: 0
462-
return parser
463-
expat.ParserCreate = raising_external_ref_handler
464-
# Using this try/catch because a TypeError is thrown before
465-
# the ExpatError.
466-
try:
467-
parse(xml, disable_entities=False, expat=expat)
468-
except expat.ExpatError:
469-
assert True
470-
else:
471-
assert False
472-
expat.ParserCreate = ParserCreate
460+
class CustomExpat:
461+
def __init__(self, external_entity_result):
462+
self.external_entity_result = external_entity_result
463+
464+
def ParserCreate(self, *args, **kwargs):
465+
parser = ParserCreate(*args, **kwargs)
466+
467+
def _handler(*args, **kwargs):
468+
return self.external_entity_result
469+
470+
parser.ExternalEntityRefHandler = _handler
471+
return parser
472+
473+
ExpatError = expat.ExpatError
474+
475+
with pytest.raises(expat.ExpatError):
476+
parse(xml, disable_entities=False, expat=CustomExpat(0))
477+
assert parse(xml, disable_entities=False, expat=CustomExpat(1)) == {"root": None}
478+
with pytest.raises(ValueError):
479+
assert parse(xml, disable_entities=True, expat=CustomExpat(1))
480+
with pytest.raises(ValueError):
481+
assert parse(xml, disable_entities=True, expat=CustomExpat(0))
482+
483+
484+
def test_disable_entities_true_allows_doctype_without_entities():
485+
xml = """<?xml version='1.0' encoding='UTF-8'?>
486+
<!DOCTYPE data SYSTEM "diagram.dtd">
487+
<foo>bar</foo>
488+
"""
489+
assert parse(xml, disable_entities=True) == {"foo": "bar"}
490+
assert parse(xml, disable_entities=False) == {"foo": "bar"}
473491

474492

475493
def test_disable_entities_allows_comments_by_default():

xmltodict.py

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -355,22 +355,9 @@ def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
355355
parser.buffer_text = True
356356
if disable_entities:
357357
def _forbid_entities(*_args, **_kwargs):
358-
raise expat.ExpatError("xmltodict.parse(): entities are disabled")
359-
360-
def _forbid_entities_default(text):
361-
if not text:
362-
return
363-
stripped = text.lstrip()
364-
if stripped.startswith('<!--'):
365-
return
366-
if stripped.startswith('<!') or stripped.startswith('&'):
367-
_forbid_entities()
368-
369-
# Reject DTD/entity constructs explicitly instead of ignoring them.
370-
parser.DefaultHandler = _forbid_entities_default
358+
raise ValueError("entities are disabled")
359+
371360
parser.EntityDeclHandler = _forbid_entities
372-
parser.StartDoctypeDeclHandler = _forbid_entities
373-
parser.ExternalEntityRefHandler = _forbid_entities
374361
if hasattr(xml_input, 'read'):
375362
parser.ParseFile(xml_input)
376363
elif isgenerator(xml_input):

0 commit comments

Comments
 (0)