Skip to content

Optional attributes set to None serialize to empty string. #308

@ferdezgarcia

Description

@ferdezgarcia

Hello;

I've been working on deserializing a very large and gnarly XML export file from a scientific instrument. As it's often the case, I spent hours reinventing the wheel with vanilla pydantic before discovering this excellent library to achieve the same purpose (but better, pydantic-xml is truly excellent).

So far, I've only encountered this minor issue: Setting optional attr fields in a BaseXmlModel which default to None allows me to parse elements that only sometimes have that attribute. However, round-tripping to the original XML is prevented by the default serialization of the field, which dumps None-valued fields as empty strings, thus including the attribute everywhere.

Setting skip_empty to True in BaseXmlModel.to_xml does not work for me because it nukes inner element fields that are likewise optional and I want explicitly serialized. The only workaround I could find was to add a custom XmlFieldSerializer to the attribute (see example).

Is this the intended default behavior?

Please find attached a sample Python script to recreate the issue.

# /// script
# requires-python = ">=3.12"
# dependencies = [
#     "pydantic-xml>=2.19.0",
# ]
# ///

"""Keyed model serialization."""

from __future__ import annotations  # Python <3.14 compatibility.

from typing import TYPE_CHECKING, Annotated, final
from xml.etree import ElementTree as ET

from pydantic import ConfigDict
from pydantic_xml import BaseXmlModel, XmlFieldSerializer, attr, element
from typing_extensions import Sentinel

if TYPE_CHECKING:
    from typing import ClassVar, Literal

    from pydantic_xml.model import XmlElementWriter


MISSING = Sentinel("MISSING")


def serialize_key[T: int | None](
    self: BaseXmlModel,
    element: XmlElementWriter,
    value: T,
    field_name: str,
) -> T:
    """Serialize key as string attribute if not `None`."""

    if value is not None:
        element.set_attribute(field_name, str(value))

    return value


class Keyed(BaseXmlModel, tag="Keyed"):
    """Keyed model with default key serializer."""

    model_config: ClassVar[ConfigDict] = ConfigDict(
        frozen=True, extra="forbid"
    )

    key: Annotated[int | None, attr(default=None)]
    inner: Annotated[int | None, element(tag="Inner", default=None)]


@final
class Patched(Keyed, tag="Keyed"):
    """Keyed model with patched key serializer."""

    model_config: ClassVar[ConfigDict] = ConfigDict(
        frozen=True, extra="forbid"
    )

    key: Annotated[
        int | None, XmlFieldSerializer(serialize_key), attr(default=None)
    ]


def main() -> None:
    """Test serialization."""

    valid = b'<Keyed key="0"><Inner /></Keyed>'
    invalid = b"<Keyed><Inner /></Keyed>"

    patched_valid = Patched.from_xml(valid)
    patched_invalid = Patched.from_xml(invalid)

    keyed_valid = Keyed.from_xml(valid)
    keyed_invalid = Keyed.from_xml(invalid)

    context = dict(encoding="utf-8", short_empty_elements=True)

    # Roundtrip works with patched serializer:
    assert patched_valid.to_xml(**context) == valid  # type: ignore
    assert patched_invalid.to_xml(**context) == invalid  # type: ignore

    # Roundtrip does not work without patched serializer:
    assert keyed_valid.to_xml(**context) == valid  # type: ignore
    assert keyed_invalid.to_xml(**context) != invalid  # type: ignore

    # `None`-valued attribute gets dumped as empty string without patched
    # serializer:
    assert patched_invalid.to_xml_tree().attrib.get("key", MISSING) is MISSING
    assert keyed_invalid.to_xml_tree().attrib.get("key", MISSING) == ""


if __name__ == "__main__":
    main()

Metadata

Metadata

Assignees

No one assigned

    Labels

    questionFurther information is requested

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions