From 4426b05550a86a530466eed97aea6b15ec0e43ca Mon Sep 17 00:00:00 2001 From: Derek Wang Date: Mon, 30 Jun 2025 14:38:10 -0700 Subject: [PATCH] Fix mutable default error in MetricsConfig dataclasses (#3137) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Pull Request resolved: https://github.com/pytorch/torchrec/pull/3137 # Context Found a dataclass validation error: "mutable default ... is not allowed: use default_factory". The issue was that `DefaultMetricsConfig` and `EmptyMetricsConfig` were created as static instances with mutable objects (lists/dicts), but then used as defaults in dataclass fields. This violates Python's dataclass rules since all instances would share the same mutable objects. Check out this for more info on python dataclasses: https://docs.python.org/3/library/dataclasses.html#mutable-default-values # Changes Converted the static config instances to factory functions that return fresh objects each time: * `DefaultMetricsConfig` → `_create_default_metrics_config()` * `EmptyMetricsConfig` → `_create_empty_metrics_config()` Updated the dataclass field in `TrainingAppConfig` to use `field(default_factory=_create_empty_metrics_config)` instead of the static instance. Now each dataclass gets its own separate config object, fixing the mutable default error. Reviewed By: TroyGarden Differential Revision: D77263018 --- torchrec/metrics/metrics_config.py | 43 +++++++++++++++++------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/torchrec/metrics/metrics_config.py b/torchrec/metrics/metrics_config.py index 91039ead3..9883c52cf 100644 --- a/torchrec/metrics/metrics_config.py +++ b/torchrec/metrics/metrics_config.py @@ -197,25 +197,32 @@ class MetricsConfig: ) -DefaultMetricsConfig = MetricsConfig( - rec_tasks=[DefaultTaskInfo], - rec_metrics={ - RecMetricEnum.NE: RecMetricDef( - rec_tasks=[DefaultTaskInfo], window_size=_DEFAULT_WINDOW_SIZE - ), - }, - throughput_metric=ThroughputDef(), - state_metrics=[], -) +def _create_default_metrics_config() -> MetricsConfig: + return MetricsConfig( + rec_tasks=[DefaultTaskInfo], + rec_metrics={ + RecMetricEnum.NE: RecMetricDef( + rec_tasks=[DefaultTaskInfo], window_size=_DEFAULT_WINDOW_SIZE + ), + }, + throughput_metric=ThroughputDef(), + state_metrics=[], + ) -# Explicitly specifying the empty fields to avoid any mistakes cased by simply -# relying on the Python default values, e.g., MetricConfig(). -EmptyMetricsConfig = MetricsConfig( - rec_tasks=[], - rec_metrics={}, - throughput_metric=None, - state_metrics=[], -) + +def _create_empty_metrics_config() -> MetricsConfig: + # Explicitly specifying the empty fields to avoid any mistakes cased by simply + # relying on the Python default values, e.g., MetricConfig(). + return MetricsConfig( + rec_tasks=[], + rec_metrics={}, + throughput_metric=None, + state_metrics=[], + ) + + +DefaultMetricsConfig: MetricsConfig = _create_default_metrics_config() +EmptyMetricsConfig: MetricsConfig = _create_empty_metrics_config() @dataclass