Skip to content

Commit e43537e

Browse files
committed
feat(unparse): add limited XML comment round-trip; unify _emit behavior
- Emit `#comment` at any depth via `_emit`, preserving dict order - Support top‑level comments without tripping multiple roots (lazy validation) - Use `_XMLGenerator.comment` for output - Add tests for element/top‑level comments and round‑trip behavior
1 parent e5039ad commit e43537e

File tree

2 files changed

+81
-10
lines changed

2 files changed

+81
-10
lines changed

tests/test_dicttoxml.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,26 @@ def test_pretty_print(self):
144144
self.assertEqual(xml, unparse(obj, pretty=True,
145145
newl=newl, indent=indent))
146146

147+
def test_unparse_with_element_comment(self):
148+
obj = {"a": {"#comment": "note", "b": "1"}}
149+
xml = _strip(unparse(obj, full_document=True))
150+
self.assertEqual(xml, "<a><!--note--><b>1</b></a>")
151+
152+
def test_unparse_with_multiple_element_comments(self):
153+
obj = {"a": {"#comment": ["n1", "n2"], "b": "1"}}
154+
xml = _strip(unparse(obj, full_document=True))
155+
self.assertEqual(xml, "<a><!--n1--><!--n2--><b>1</b></a>")
156+
157+
def test_unparse_with_top_level_comment(self):
158+
obj = {"#comment": "top", "a": "1"}
159+
xml = _strip(unparse(obj, full_document=True))
160+
self.assertEqual(xml, "<!--top--><a>1</a>")
161+
162+
def test_unparse_with_multiple_top_level_comments(self):
163+
obj = {"#comment": ["t1", "t2"], "a": "1"}
164+
xml = _strip(unparse(obj, full_document=True))
165+
self.assertEqual(xml, "<!--t1--><!--t2--><a>1</a>")
166+
147167
def test_pretty_print_with_int_indent(self):
148168
obj = {'a': OrderedDict((
149169
('b', [{'c': [1, 2]}, 3]),
@@ -164,6 +184,30 @@ def test_pretty_print_with_int_indent(self):
164184
self.assertEqual(xml, unparse(obj, pretty=True,
165185
newl=newl, indent=indent))
166186

187+
def test_comment_roundtrip_limited(self):
188+
# Input with top-level comments and an element-level comment
189+
xml = """
190+
<!--top1--><a><b>1</b><!--e1--></a><!--top2-->
191+
"""
192+
# Parse with comment processing enabled
193+
parsed1 = parse(xml, process_comments=True)
194+
# Unparse and parse again (roundtrip)
195+
xml2 = unparse(parsed1)
196+
parsed2 = parse(xml2, process_comments=True)
197+
198+
# Content preserved
199+
self.assertIn('a', parsed2)
200+
self.assertEqual(parsed2['a']['b'], '1')
201+
202+
# Element-level comment preserved under '#comment'
203+
self.assertEqual(parsed2['a']['#comment'], 'e1')
204+
205+
# Top-level comments preserved as a list (order not guaranteed)
206+
top = parsed2.get('#comment')
207+
self.assertIsNotNone(top)
208+
top_list = top if isinstance(top, list) else [top]
209+
self.assertEqual(set(top_list), {'top1', 'top2'})
210+
167211
def test_encoding(self):
168212
try:
169213
value = unichr(39321)

xmltodict.py

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"Makes working with XML feel like you are working with JSON"
33

44
from xml.parsers import expat
5-
from xml.sax.saxutils import XMLGenerator
5+
from xml.sax.saxutils import XMLGenerator, escape
66
from xml.sax.xmlreader import AttributesImpl
77
from io import StringIO
88

@@ -459,7 +459,25 @@ def _emit(key, value, content_handler,
459459
namespace_separator=':',
460460
namespaces=None,
461461
full_document=True,
462-
expand_iter=None):
462+
expand_iter=None,
463+
comment_key='#comment'):
464+
if isinstance(key, str) and key == comment_key:
465+
comments_list = value if isinstance(value, list) else [value]
466+
if isinstance(indent, int):
467+
indent = " " * indent
468+
for comment_text in comments_list:
469+
if comment_text is None:
470+
continue
471+
comment_text = _convert_value_to_string(comment_text)
472+
if comment_text == "":
473+
continue
474+
if pretty:
475+
content_handler.ignorableWhitespace(depth * indent)
476+
content_handler.comment(comment_text)
477+
if pretty:
478+
content_handler.ignorableWhitespace(newl)
479+
return
480+
463481
key = _process_namespace(key, namespaces, namespace_separator, attr_prefix)
464482
if preprocessor is not None:
465483
result = preprocessor(key, value)
@@ -519,7 +537,7 @@ def _emit(key, value, content_handler,
519537
attr_prefix, cdata_key, depth+1, preprocessor,
520538
pretty, newl, indent, namespaces=namespaces,
521539
namespace_separator=namespace_separator,
522-
expand_iter=expand_iter)
540+
expand_iter=expand_iter, comment_key=comment_key)
523541
if cdata is not None:
524542
content_handler.characters(cdata)
525543
if pretty and children:
@@ -529,8 +547,13 @@ def _emit(key, value, content_handler,
529547
content_handler.ignorableWhitespace(newl)
530548

531549

550+
class _XMLGenerator(XMLGenerator):
551+
def comment(self, text):
552+
self._write(f"<!--{escape(text)}-->")
553+
554+
532555
def unparse(input_dict, output=None, encoding='utf-8', full_document=True,
533-
short_empty_elements=False,
556+
short_empty_elements=False, comment_key='#comment',
534557
**kwargs):
535558
"""Emit an XML document for the given `input_dict` (reverse of `parse`).
536559
@@ -546,21 +569,25 @@ def unparse(input_dict, output=None, encoding='utf-8', full_document=True,
546569
can be customized with the `newl` and `indent` parameters.
547570
548571
"""
549-
if full_document and len(input_dict) != 1:
550-
raise ValueError('Document must have exactly one root.')
551572
must_return = False
552573
if output is None:
553574
output = StringIO()
554575
must_return = True
555576
if short_empty_elements:
556-
content_handler = XMLGenerator(output, encoding, True)
577+
content_handler = _XMLGenerator(output, encoding, True)
557578
else:
558-
content_handler = XMLGenerator(output, encoding)
579+
content_handler = _XMLGenerator(output, encoding)
559580
if full_document:
560581
content_handler.startDocument()
582+
seen_root = False
561583
for key, value in input_dict.items():
562-
_emit(key, value, content_handler, full_document=full_document,
563-
**kwargs)
584+
if key != comment_key and full_document and seen_root:
585+
raise ValueError("Document must have exactly one root.")
586+
_emit(key, value, content_handler, full_document=full_document, comment_key=comment_key, **kwargs)
587+
if key != comment_key:
588+
seen_root = True
589+
if full_document and not seen_root:
590+
raise ValueError("Document must have exactly one root.")
564591
if full_document:
565592
content_handler.endDocument()
566593
if must_return:

0 commit comments

Comments
 (0)