Hello;
I've been working on deserializing a very large and gnarly XML export file from a scientific instrument. As it's often the case, I spent hours reinventing the wheel with vanilla pydantic before discovering this excellent library to achieve the same purpose (but better, pydantic-xml is truly excellent).
So far, I've only encountered this minor issue: Setting optional attr fields in a BaseXmlModel which default to None allows me to parse elements that only sometimes have that attribute. However, round-tripping to the original XML is prevented by the default serialization of the field, which dumps None-valued fields as empty strings, thus including the attribute everywhere.
Setting skip_empty to True in BaseXmlModel.to_xml does not work for me because it nukes inner element fields that are likewise optional and I want explicitly serialized. The only workaround I could find was to add a custom XmlFieldSerializer to the attribute (see example).
Is this the intended default behavior?
Please find attached a sample Python script to recreate the issue.
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "pydantic-xml>=2.19.0",
# ]
# ///
"""Keyed model serialization."""
from __future__ import annotations # Python <3.14 compatibility.
from typing import TYPE_CHECKING, Annotated, final
from xml.etree import ElementTree as ET
from pydantic import ConfigDict
from pydantic_xml import BaseXmlModel, XmlFieldSerializer, attr, element
from typing_extensions import Sentinel
if TYPE_CHECKING:
from typing import ClassVar, Literal
from pydantic_xml.model import XmlElementWriter
MISSING = Sentinel("MISSING")
def serialize_key[T: int | None](
self: BaseXmlModel,
element: XmlElementWriter,
value: T,
field_name: str,
) -> T:
"""Serialize key as string attribute if not `None`."""
if value is not None:
element.set_attribute(field_name, str(value))
return value
class Keyed(BaseXmlModel, tag="Keyed"):
"""Keyed model with default key serializer."""
model_config: ClassVar[ConfigDict] = ConfigDict(
frozen=True, extra="forbid"
)
key: Annotated[int | None, attr(default=None)]
inner: Annotated[int | None, element(tag="Inner", default=None)]
@final
class Patched(Keyed, tag="Keyed"):
"""Keyed model with patched key serializer."""
model_config: ClassVar[ConfigDict] = ConfigDict(
frozen=True, extra="forbid"
)
key: Annotated[
int | None, XmlFieldSerializer(serialize_key), attr(default=None)
]
def main() -> None:
"""Test serialization."""
valid = b'<Keyed key="0"><Inner /></Keyed>'
invalid = b"<Keyed><Inner /></Keyed>"
patched_valid = Patched.from_xml(valid)
patched_invalid = Patched.from_xml(invalid)
keyed_valid = Keyed.from_xml(valid)
keyed_invalid = Keyed.from_xml(invalid)
context = dict(encoding="utf-8", short_empty_elements=True)
# Roundtrip works with patched serializer:
assert patched_valid.to_xml(**context) == valid # type: ignore
assert patched_invalid.to_xml(**context) == invalid # type: ignore
# Roundtrip does not work without patched serializer:
assert keyed_valid.to_xml(**context) == valid # type: ignore
assert keyed_invalid.to_xml(**context) != invalid # type: ignore
# `None`-valued attribute gets dumped as empty string without patched
# serializer:
assert patched_invalid.to_xml_tree().attrib.get("key", MISSING) is MISSING
assert keyed_invalid.to_xml_tree().attrib.get("key", MISSING) == ""
if __name__ == "__main__":
main()
Hello;
I've been working on deserializing a very large and gnarly XML export file from a scientific instrument. As it's often the case, I spent hours reinventing the wheel with vanilla
pydanticbefore discovering this excellent library to achieve the same purpose (but better,pydantic-xmlis truly excellent).So far, I've only encountered this minor issue: Setting optional
attrfields in aBaseXmlModelwhich default toNoneallows me to parse elements that only sometimes have that attribute. However, round-tripping to the original XML is prevented by the default serialization of the field, which dumpsNone-valued fields as empty strings, thus including the attribute everywhere.Setting
skip_emptytoTrueinBaseXmlModel.to_xmldoes not work for me because it nukes innerelementfields that are likewise optional and I want explicitly serialized. The only workaround I could find was to add a customXmlFieldSerializerto the attribute (see example).Is this the intended default behavior?
Please find attached a sample Python script to recreate the issue.