1
1
from __future__ import annotations as _annotations
2
2
3
3
from collections .abc import Iterable , Iterator , Mapping
4
- from dataclasses import asdict , dataclass , is_dataclass
4
+ from dataclasses import asdict , dataclass , field , fields , is_dataclass
5
5
from datetime import date
6
- from typing import Any
6
+ from typing import Any , Literal
7
7
from xml .etree import ElementTree
8
8
9
9
from pydantic import BaseModel
10
10
11
11
__all__ = ('format_as_xml' ,)
12
12
13
+ from pydantic .fields import ComputedFieldInfo , FieldInfo
14
+
13
15
14
16
def format_as_xml (
15
17
obj : Any ,
16
18
root_tag : str | None = None ,
17
19
item_tag : str = 'item' ,
18
20
none_str : str = 'null' ,
19
21
indent : str | None = ' ' ,
22
+ include_field_info : Literal ['once' ] | bool = False ,
20
23
) -> str :
21
24
"""Format a Python object as XML.
22
25
@@ -33,6 +36,10 @@ def format_as_xml(
33
36
for dataclasses and Pydantic models.
34
37
none_str: String to use for `None` values.
35
38
indent: Indentation string to use for pretty printing.
39
+ include_field_info: Whether to include attributes like Pydantic `Field` attributes and dataclasses `field()`
40
+ `metadata` as XML attributes. In both cases the allowed `Field` attributes and `field()` metadata keys are
41
+ `title` and `description`. If a field is repeated in the data (e.g. in a list) by setting `once`
42
+ the attributes are included only in the first occurrence of an XML element relative to the same field.
36
43
37
44
Returns:
38
45
XML representation of the object.
@@ -51,7 +58,12 @@ def format_as_xml(
51
58
'''
52
59
```
53
60
"""
54
- el = _ToXml (item_tag = item_tag , none_str = none_str ).to_xml (obj , root_tag )
61
+ el = _ToXml (
62
+ data = obj ,
63
+ item_tag = item_tag ,
64
+ none_str = none_str ,
65
+ include_field_info = include_field_info ,
66
+ ).to_xml (root_tag )
55
67
if root_tag is None and el .text is None :
56
68
join = '' if indent is None else '\n '
57
69
return join .join (_rootless_xml_elements (el , indent ))
@@ -63,11 +75,26 @@ def format_as_xml(
63
75
64
76
@dataclass
65
77
class _ToXml :
78
+ data : Any
66
79
item_tag : str
67
80
none_str : str
68
-
69
- def to_xml (self , value : Any , tag : str | None ) -> ElementTree .Element :
70
- element = ElementTree .Element (self .item_tag if tag is None else tag )
81
+ include_field_info : Literal ['once' ] | bool
82
+ # a map of Pydantic and dataclasses Field paths to their metadata:
83
+ # a field unique string representation and its class
84
+ _fields_info : dict [str , tuple [str , FieldInfo | ComputedFieldInfo ]] = field (default_factory = dict )
85
+ # keep track of fields we have extracted attributes from
86
+ _included_fields : set [str ] = field (default_factory = set )
87
+ # keep track of class names for dataclasses and Pydantic models, that occur in lists
88
+ _element_names : dict [str , str ] = field (default_factory = dict )
89
+ # flag for parsing dataclasses and Pydantic models once
90
+ _is_info_extracted : bool = False
91
+ _FIELD_ATTRIBUTES = ('title' , 'description' )
92
+
93
+ def to_xml (self , tag : str | None = None ) -> ElementTree .Element :
94
+ return self ._to_xml (value = self .data , path = '' , tag = tag )
95
+
96
+ def _to_xml (self , value : Any , path : str , tag : str | None = None ) -> ElementTree .Element :
97
+ element = self ._create_element (self .item_tag if tag is None else tag , path )
71
98
if value is None :
72
99
element .text = self .none_str
73
100
elif isinstance (value , str ):
@@ -79,31 +106,96 @@ def to_xml(self, value: Any, tag: str | None) -> ElementTree.Element:
79
106
elif isinstance (value , date ):
80
107
element .text = value .isoformat ()
81
108
elif isinstance (value , Mapping ):
82
- self ._mapping_to_xml (element , value ) # pyright: ignore[reportUnknownArgumentType]
109
+ if tag is None and path in self ._element_names :
110
+ element .tag = self ._element_names [path ]
111
+ self ._mapping_to_xml (element , value , path ) # pyright: ignore[reportUnknownArgumentType]
83
112
elif is_dataclass (value ) and not isinstance (value , type ):
113
+ self ._init_structure_info ()
84
114
if tag is None :
85
- element = ElementTree .Element (value .__class__ .__name__ )
86
- dc_dict = asdict (value )
87
- self ._mapping_to_xml (element , dc_dict )
115
+ element .tag = value .__class__ .__name__
116
+ self ._mapping_to_xml (element , asdict (value ), path )
88
117
elif isinstance (value , BaseModel ):
118
+ self ._init_structure_info ()
89
119
if tag is None :
90
- element = ElementTree .Element (value .__class__ .__name__ )
91
- self ._mapping_to_xml (element , value .model_dump (mode = 'python' ))
120
+ element .tag = value .__class__ .__name__
121
+ # by dumping the model we loose all metadata in nested data structures,
122
+ # but we have collected it when called _init_structure_info
123
+ self ._mapping_to_xml (element , value .model_dump (), path )
92
124
elif isinstance (value , Iterable ):
93
- for item in value : # pyright: ignore[reportUnknownVariableType]
94
- item_el = self .to_xml (item , None )
95
- element .append (item_el )
125
+ for n , item in enumerate (value ): # pyright: ignore[reportUnknownVariableType,reportUnknownArgumentType]
126
+ element .append (self ._to_xml (value = item , path = f'{ path } .[{ n } ]' if path else f'[{ n } ]' ))
96
127
else :
97
128
raise TypeError (f'Unsupported type for XML formatting: { type (value )} ' )
98
129
return element
99
130
100
- def _mapping_to_xml (self , element : ElementTree .Element , mapping : Mapping [Any , Any ]) -> None :
131
+ def _create_element (self , tag : str , path : str ) -> ElementTree .Element :
132
+ element = ElementTree .Element (tag )
133
+ if path in self ._fields_info :
134
+ field_repr , field_info = self ._fields_info [path ]
135
+ if self .include_field_info and self .include_field_info != 'once' or field_repr not in self ._included_fields :
136
+ field_attributes = self ._extract_attributes (field_info )
137
+ for k , v in field_attributes .items ():
138
+ element .set (k , v )
139
+ self ._included_fields .add (field_repr )
140
+ return element
141
+
142
+ def _init_structure_info (self ):
143
+ """Create maps with all data information (fields info and class names), if not already created."""
144
+ if not self ._is_info_extracted :
145
+ self ._parse_data_structures (self .data )
146
+ self ._is_info_extracted = True
147
+
148
+ def _mapping_to_xml (
149
+ self ,
150
+ element : ElementTree .Element ,
151
+ mapping : Mapping [Any , Any ],
152
+ path : str = '' ,
153
+ ) -> None :
101
154
for key , value in mapping .items ():
102
155
if isinstance (key , int ):
103
156
key = str (key )
104
157
elif not isinstance (key , str ):
105
158
raise TypeError (f'Unsupported key type for XML formatting: { type (key )} , only str and int are allowed' )
106
- element .append (self .to_xml (value , key ))
159
+ element .append (self ._to_xml (value = value , path = f'{ path } .{ key } ' if path else key , tag = key ))
160
+
161
+ def _parse_data_structures (
162
+ self ,
163
+ value : Any ,
164
+ path : str = '' ,
165
+ ):
166
+ """Parse data structures as dataclasses or Pydantic models to extract element names and attributes."""
167
+ if value is None or isinstance (value , (str | int | float | date | bytearray | bytes | bool )):
168
+ return
169
+ elif isinstance (value , Mapping ):
170
+ for k , v in value .items (): # pyright: ignore[reportUnknownVariableType]
171
+ self ._parse_data_structures (v , f'{ path } .{ k } ' if path else f'{ k } ' )
172
+ elif is_dataclass (value ) and not isinstance (value , type ):
173
+ self ._element_names [path ] = value .__class__ .__name__
174
+ for field in fields (value ):
175
+ new_path = f'{ path } .{ field .name } ' if path else field .name
176
+ if self .include_field_info and field .metadata :
177
+ attributes = {k : v for k , v in field .metadata .items () if k in self ._FIELD_ATTRIBUTES }
178
+ if attributes :
179
+ field_repr = f'{ value .__class__ .__name__ } .{ field .name } '
180
+ self ._fields_info [new_path ] = (field_repr , FieldInfo (** attributes ))
181
+ self ._parse_data_structures (getattr (value , field .name ), new_path )
182
+ elif isinstance (value , BaseModel ):
183
+ self ._element_names [path ] = value .__class__ .__name__
184
+ for model_fields in (value .__class__ .model_fields , value .__class__ .model_computed_fields ):
185
+ for field , info in model_fields .items ():
186
+ new_path = f'{ path } .{ field } ' if path else field
187
+ if self .include_field_info and (isinstance (info , ComputedFieldInfo ) or not info .exclude ):
188
+ field_repr = f'{ value .__class__ .__name__ } .{ field } '
189
+ self ._fields_info [new_path ] = (field_repr , info )
190
+ self ._parse_data_structures (getattr (value , field ), new_path )
191
+ elif isinstance (value , Iterable ):
192
+ for n , item in enumerate (value ): # pyright: ignore[reportUnknownVariableType,reportUnknownArgumentType]
193
+ new_path = f'{ path } .[{ n } ]' if path else f'[{ n } ]'
194
+ self ._parse_data_structures (item , new_path )
195
+
196
+ @classmethod
197
+ def _extract_attributes (cls , info : FieldInfo | ComputedFieldInfo ) -> dict [str , str ]:
198
+ return {attr : str (value ) for attr in cls ._FIELD_ATTRIBUTES if (value := getattr (info , attr , None )) is not None }
107
199
108
200
109
201
def _rootless_xml_elements (root : ElementTree .Element , indent : str | None ) -> Iterator [str ]:
0 commit comments