Skip to content

Commit a497fed

Browse files
committed
feat(unparse): add selective force_cdata support (bool/tuple/callable)
- Add _should_force_cdata method similar to _should_force_list - Support tuple of element names and callable functions for force_cdata - Maintain full backwards compatibility with boolean values - Add comprehensive tests for all force_cdata scenarios - Update documentation with consistent examples for force_cdata and force_list Fixes #375
1 parent 5b3423c commit a497fed

File tree

3 files changed

+119
-3
lines changed

3 files changed

+119
-3
lines changed

README.md

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,13 +210,13 @@ Parse XML input into a Python dictionary.
210210
- `xml_attribs=True`: Include attributes in output dict (with `attr_prefix`).
211211
- `attr_prefix='@'`: Prefix for XML attributes in the dict.
212212
- `cdata_key='#text'`: Key for text content in the dict.
213-
- `force_cdata=False`: Force all text content to be wrapped as CDATA.
213+
- `force_cdata=False`: Force text content to be wrapped as CDATA for specific elements. Can be a boolean (True/False), a tuple of element names to force CDATA for, or a callable function that receives (path, key, value) and returns True/False.
214214
- `cdata_separator=''`: Separator string to join multiple text nodes. This joins adjacent text nodes. For example, set to a space to avoid concatenation.
215215
- `postprocessor=None`: Function to modify parsed items.
216216
- `dict_constructor=dict`: Constructor for dictionaries (e.g., dict or OrderedDict).
217217
- `strip_whitespace=True`: Remove leading/trailing whitespace in text nodes. Default is True; this trims whitespace in text nodes. Set to False to preserve whitespace exactly.
218218
- `namespaces=None`: Mapping of namespaces to prefixes, or None to keep full URIs.
219-
- `force_list=None`: List of keys or callable to force list values. Useful for elements that may appear once or multiple times. Ensures consistent list output. Can also be a callable for fine-grained control.
219+
- `force_list=None`: Force list values for specific elements. Can be a boolean (True/False), a tuple of element names to force lists for, or a callable function that receives (path, key, value) and returns True/False. Useful for elements that may appear once or multiple times to ensure consistent list output.
220220
- `item_depth=0`: Depth at which to call `item_callback`.
221221
- `item_callback=lambda *args: True`: Function called on items at `item_depth`.
222222
- `comment_key='#comment'`: Key used for XML comments when `process_comments=True`. Only used when `process_comments=True`. Comments can be preserved but multiple top-level comments may not retain order.
@@ -239,6 +239,50 @@ Convert a Python dictionary back into XML.
239239

240240
Note: xmltodict aims to cover the common 90% of cases. It does not preserve every XML nuance (attribute order, mixed content ordering, multiple top-level comments). For exact fidelity, use a full XML library such as lxml.
241241

242+
## Examples
243+
244+
### Selective force_cdata
245+
246+
The `force_cdata` parameter can be used to selectively force CDATA wrapping for specific elements:
247+
248+
```python
249+
>>> xml = '<a><b>data1</b><c>data2</c><d>data3</d></a>'
250+
>>> # Force CDATA only for 'b' and 'd' elements
251+
>>> xmltodict.parse(xml, force_cdata=('b', 'd'))
252+
{'a': {'b': {'#text': 'data1'}, 'c': 'data2', 'd': {'#text': 'data3'}}}
253+
254+
>>> # Force CDATA for all elements (original behavior)
255+
>>> xmltodict.parse(xml, force_cdata=True)
256+
{'a': {'b': {'#text': 'data1'}, 'c': {'#text': 'data2'}, 'd': {'#text': 'data3'}}}
257+
258+
>>> # Use a callable for complex logic
259+
>>> def should_force_cdata(path, key, value):
260+
... return key in ['b', 'd'] and len(value) > 4
261+
>>> xmltodict.parse(xml, force_cdata=should_force_cdata)
262+
{'a': {'b': {'#text': 'data1'}, 'c': 'data2', 'd': {'#text': 'data3'}}}
263+
```
264+
265+
### Selective force_list
266+
267+
The `force_list` parameter can be used to selectively force list values for specific elements:
268+
269+
```python
270+
>>> xml = '<a><b>data1</b><b>data2</b><c>data3</c></a>'
271+
>>> # Force lists only for 'b' elements
272+
>>> xmltodict.parse(xml, force_list=('b',))
273+
{'a': {'b': ['data1', 'data2'], 'c': 'data3'}}
274+
275+
>>> # Force lists for all elements (original behavior)
276+
>>> xmltodict.parse(xml, force_list=True)
277+
{'a': [{'b': ['data1', 'data2'], 'c': ['data3']}]}
278+
279+
>>> # Use a callable for complex logic
280+
>>> def should_force_list(path, key, value):
281+
... return key in ['b'] and isinstance(value, str)
282+
>>> xmltodict.parse(xml, force_list=should_force_list)
283+
{'a': {'b': ['data1', 'data2'], 'c': 'data3'}}
284+
```
285+
242286
## Ok, how do I get it?
243287

244288
### Using pypi

tests/test_xmltodict.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,68 @@ def test_force_cdata(self):
3939
self.assertEqual(parse('<a>data</a>', force_cdata=True),
4040
{'a': {'#text': 'data'}})
4141

42+
def test_selective_force_cdata_tuple(self):
43+
xml = "<a><b>data1</b><c>data2</c><d>data3</d></a>"
44+
# Test with tuple of specific element names
45+
result = parse(xml, force_cdata=("b", "d"))
46+
expected = {
47+
"a": {"b": {"#text": "data1"}, "c": "data2", "d": {"#text": "data3"}}
48+
}
49+
self.assertEqual(result, expected)
50+
51+
def test_selective_force_cdata_single_element(self):
52+
xml = "<a><b>data1</b><c>data2</c></a>"
53+
# Test with single element name
54+
result = parse(xml, force_cdata=("b",))
55+
expected = {"a": {"b": {"#text": "data1"}, "c": "data2"}}
56+
self.assertEqual(result, expected)
57+
58+
def test_selective_force_cdata_empty_tuple(self):
59+
xml = "<a><b>data1</b><c>data2</c></a>"
60+
# Test with empty tuple (should behave like force_cdata=False)
61+
result = parse(xml, force_cdata=())
62+
expected = {"a": {"b": "data1", "c": "data2"}}
63+
self.assertEqual(result, expected)
64+
65+
def test_selective_force_cdata_callable(self):
66+
xml = "<a><b>data1</b><c>data2</c><d>data3</d></a>"
67+
68+
# Test with callable function
69+
def should_force_cdata(path, key, value):
70+
return key in ["b", "d"]
71+
72+
result = parse(xml, force_cdata=should_force_cdata)
73+
expected = {
74+
"a": {"b": {"#text": "data1"}, "c": "data2", "d": {"#text": "data3"}}
75+
}
76+
self.assertEqual(result, expected)
77+
78+
def test_selective_force_cdata_nested_elements(self):
79+
xml = "<a><b><c>data1</c></b><d>data2</d></a>"
80+
# Test with nested elements - only 'c' should be forced
81+
result = parse(xml, force_cdata=("c",))
82+
expected = {"a": {"b": {"c": {"#text": "data1"}}, "d": "data2"}}
83+
self.assertEqual(result, expected)
84+
85+
def test_selective_force_cdata_with_attributes(self):
86+
xml = '<a><b attr="value">data1</b><c>data2</c></a>'
87+
# Test with attributes - force_cdata should still work
88+
result = parse(xml, force_cdata=("b",))
89+
expected = {"a": {"b": {"@attr": "value", "#text": "data1"}, "c": "data2"}}
90+
self.assertEqual(result, expected)
91+
92+
def test_selective_force_cdata_backwards_compatibility(self):
93+
xml = "<a><b>data1</b><c>data2</c></a>"
94+
# Test that boolean True still works (backwards compatibility)
95+
result_true = parse(xml, force_cdata=True)
96+
expected_true = {"a": {"b": {"#text": "data1"}, "c": {"#text": "data2"}}}
97+
self.assertEqual(result_true, expected_true)
98+
99+
# Test that boolean False still works (backwards compatibility)
100+
result_false = parse(xml, force_cdata=False)
101+
expected_false = {"a": {"b": "data1", "c": "data2"}}
102+
self.assertEqual(result_false, expected_false)
103+
42104
def test_custom_cdata(self):
43105
self.assertEqual(parse('<a>data</a>',
44106
force_cdata=True,

xmltodict.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def endElement(self, full_name):
139139
self.item, self.data = self.stack.pop()
140140
if self.strip_whitespace and data:
141141
data = data.strip() or None
142-
if data and self.force_cdata and item is None:
142+
if data and self._should_force_cdata(name, data) and item is None:
143143
item = self.dict_constructor()
144144
if item is not None:
145145
if data:
@@ -194,6 +194,16 @@ def _should_force_list(self, key, value):
194194
except TypeError:
195195
return self.force_list(self.path[:-1], key, value)
196196

197+
def _should_force_cdata(self, key, value):
198+
if not self.force_cdata:
199+
return False
200+
if isinstance(self.force_cdata, bool):
201+
return self.force_cdata
202+
try:
203+
return key in self.force_cdata
204+
except TypeError:
205+
return self.force_cdata(self.path[:-1], key, value)
206+
197207

198208
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
199209
namespace_separator=':', disable_entities=True, process_comments=False, **kwargs):

0 commit comments

Comments
 (0)