facebookresearch · d8ahazard · May 13, 2025 · May 13, 2025 · May 13, 2025 · May 13, 2025
diff --git a/README.md b/README.md
@@ -12,9 +12,21 @@
 
 --------------------------------------------------------------------------------
 
-Fairseq(-py) is a sequence modeling toolkit that allows researchers and
-developers to train custom models for translation, summarization, language
-modeling and other text generation tasks.
+# Fairseq
+
+Fairseq(-py) is a sequence modeling toolkit that allows researchers and developers to train custom models for translation, summarization, language modeling and other text generation tasks.
+
+## New in Version 0.13.0
+- Added support for PyTorch 2.6+ with safe globals handling
+- Updated dependency requirements for modern Python environments
+- Improved Windows compatibility
+- Added explicit fairscale dependency
+
+## Requirements and Installation
+* [PyTorch](http://pytorch.org/) version >= 2.6.0
+* Python version >= 3.8
+* For training new models, you'll also need an NVIDIA GPU and [NCCL](https://github.com/NVIDIA/nccl)
+* **For faster training** install NVIDIA's [apex](https://github.com/NVIDIA/apex) library with the `--cuda_ext` and `--deprecated_fused_adam` options
 
 We provide reference implementations of various sequence modeling papers:
 
@@ -146,39 +158,6 @@ en2de.translate('Hello world', beam=5)
 See the PyTorch Hub tutorials for [translation](https://pytorch.org/hub/pytorch_fairseq_translation/)
 and [RoBERTa](https://pytorch.org/hub/pytorch_fairseq_roberta/) for more examples.
 
-# Requirements and Installation
-
-* [PyTorch](http://pytorch.org/) version >= 1.10.0
-* Python version >= 3.8
-* For training new models, you'll also need an NVIDIA GPU and [NCCL](https://github.com/NVIDIA/nccl)
-* **To install fairseq** and develop locally:
-
-``` bash
-git clone https://github.com/pytorch/fairseq
-cd fairseq
-pip install --editable ./
-
-# on MacOS:
-# CFLAGS="-stdlib=libc++" pip install --editable ./
-
-# to install the latest stable release (0.10.x)
-# pip install fairseq
-```
-
-* **For faster training** install NVIDIA's [apex](https://github.com/NVIDIA/apex) library:
-
-``` bash
-git clone https://github.com/NVIDIA/apex
-cd apex
-pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" \
-  --global-option="--deprecated_fused_adam" --global-option="--xentropy" \
-  --global-option="--fast_multihead_attn" ./
-```
-
-* **For large datasets** install [PyArrow](https://arrow.apache.org/docs/python/install.html#using-pip): `pip install pyarrow`
-* If you use Docker make sure to increase the shared memory size either with `--ipc=host` or `--shm-size`
- as command line options to `nvidia-docker run` .
-
 # Getting Started
 
 The [full documentation](https://fairseq.readthedocs.io/) contains instructions

diff --git a/examples/MMPT/mmpt/utils/load_config.py b/examples/MMPT/mmpt/utils/load_config.py
@@ -59,6 +59,13 @@ def recursive_config(config_path):
         includes = config.includes
         config.pop("includes")
         base_config = recursive_config(includes)
+
+        # Filter out any MISSING values from config before merging
+        if isinstance(config, omegaconf.DictConfig):
+            for key in list(config.keys()):
+                if config[key] is None or (isinstance(config[key], str) and config[key] == "???"):
+                    config.pop(key)
+
         config = OmegaConf.merge(base_config, config)
     return config
 

diff --git a/examples/speech_recognition/kaldi/kaldi_initializer.py b/examples/speech_recognition/kaldi/kaldi_initializer.py
@@ -669,7 +669,7 @@ def initalize_kaldi(cfg: KaldiInitializerConfig) -> Path:
     return hlg_graph
 
 
-@hydra.main(config_path=config_path, config_name="kaldi_initializer")
+@hydra.main(version_base=None, config_path=config_path, config_name="kaldi_initializer")
 def cli_main(cfg: KaldiInitializerConfig) -> None:
     container = OmegaConf.to_container(cfg, resolve=True, enum_to_str=True)
     cfg = OmegaConf.create(container)
@@ -683,13 +683,19 @@ def cli_main(cfg: KaldiInitializerConfig) -> None:
     logging.basicConfig(level=logging.INFO)
 
     try:
-        from hydra._internal.utils import (
-            get_args,
-        )  # pylint: disable=import-outside-toplevel
-
-        cfg_name = get_args().config_name or "kaldi_initializer"
-    except ImportError:
-        logger.warning("Failed to get config name from hydra args")
+        import sys
+
+        # Use built-in argparse instead of hydra._internal.utils.get_args
+        cfg_name = "kaldi_initializer"
+        for i, arg in enumerate(sys.argv):
+            if arg == "--config-name" and i + 1 < len(sys.argv):
+                cfg_name = sys.argv[i + 1]
+                break
+            elif arg.startswith("--config-name="):
+                cfg_name = arg.split("=", 1)[1]
+                break
+    except:
+        logger.warning("Failed to get config name from command line arguments")
         cfg_name = "kaldi_initializer"
 
     cs = ConfigStore.instance()

diff --git a/examples/speech_recognition/new/infer.py b/examples/speech_recognition/new/infer.py
@@ -440,7 +440,7 @@ def main(cfg: InferConfig) -> float:
         return wer
 
 
-@hydra.main(config_path=config_path, config_name="infer")
+@hydra.main(version_base=None, config_path=config_path, config_name="infer")
 def hydra_main(cfg: InferConfig) -> Union[float, Tuple[float, Optional[float]]]:
     container = OmegaConf.to_container(cfg, resolve=True, enum_to_str=True)
     cfg = OmegaConf.create(container)
@@ -478,13 +478,19 @@ def hydra_main(cfg: InferConfig) -> Union[float, Tuple[float, Optional[float]]]:
 
 def cli_main() -> None:
     try:
-        from hydra._internal.utils import (
-            get_args,
-        )  # pylint: disable=import-outside-toplevel
-
-        cfg_name = get_args().config_name or "infer"
-    except ImportError:
-        logger.warning("Failed to get config name from hydra args")
+        import sys
+
+        # Use built-in argparse instead of hydra._internal.utils.get_args
+        cfg_name = "infer"
+        for i, arg in enumerate(sys.argv):
+            if arg == "--config-name" and i + 1 < len(sys.argv):
+                cfg_name = sys.argv[i + 1]
+                break
+            elif arg.startswith("--config-name="):
+                cfg_name = arg.split("=", 1)[1]
+                break
+    except:
+        logger.warning("Failed to get config name from command line arguments")
         cfg_name = "infer"
 
     cs = ConfigStore.instance()

diff --git a/examples/wav2vec/unsupervised/w2vu_generate.py b/examples/wav2vec/unsupervised/w2vu_generate.py
@@ -672,6 +672,7 @@ def main(cfg: UnsupGenerateConfig, model=None):
 
 
 @hydra.main(
+    version_base=None,
     config_path=os.path.join("../../..", "fairseq", "config"), config_name="config"
 )
 def hydra_main(cfg):
@@ -698,11 +699,19 @@ def hydra_main(cfg):
 
 def cli_main():
     try:
-        from hydra._internal.utils import get_args
-
-        cfg_name = get_args().config_name or "config"
+        import sys
+
+        # Use built-in argparse instead of hydra._internal.utils.get_args
+        cfg_name = "config"
+        for i, arg in enumerate(sys.argv):
+            if arg == "--config-name" and i + 1 < len(sys.argv):
+                cfg_name = sys.argv[i + 1]
+                break
+            elif arg.startswith("--config-name="):
+                cfg_name = arg.split("=", 1)[1]
+                break
     except:
-        logger.warning("Failed to get config name from hydra args")
+        logger.warning("Failed to get config name from command line arguments")
         cfg_name = "config"
 
     cs = ConfigStore.instance()

diff --git a/fairseq/checkpoint_utils.py b/fairseq/checkpoint_utils.py
@@ -337,30 +337,33 @@ def load_checkpoint_to_cpu(path, arg_overrides=None, load_on_all_ranks=False):
         local_path = PathManager.get_local_path(path)
 
     with open(local_path, "rb") as f:
-        state = torch.load(f, map_location=torch.device("cpu"))
+        # See if torch.load has weights_only parameter
+        if hasattr(torch.load, "weights_only"):
+            state = torch.load(f, map_location=torch.device("cpu"), weights_only=False)
+        else:
+            state = torch.load(f, map_location=torch.device("cpu"))
 
     if "args" in state and state["args"] is not None and arg_overrides is not None:
         args = state["args"]
         for arg_name, arg_val in arg_overrides.items():
             setattr(args, arg_name, arg_val)
 
     if "cfg" in state and state["cfg"] is not None:
-
-        # hack to be able to set Namespace in dict config. this should be removed when we update to newer
-        # omegaconf version that supports object flags, or when we migrate all existing models
+        # Use proper object flags approach for omegaconf 2.1+
         from omegaconf import __version__ as oc_version
-        from omegaconf import _utils
-
-        if oc_version < "2.2":
+
+        if oc_version >= "2.1.0":
+            # OmegaConf 2.1+ can handle this with allow_objects flag
+            state["cfg"] = OmegaConf.create(state["cfg"], flags={"allow_objects": True})
+        else:
+            # Fallback for older versions using the hacky approach
+            from omegaconf import _utils
             old_primitive = _utils.is_primitive_type
             _utils.is_primitive_type = lambda _: True
-
             state["cfg"] = OmegaConf.create(state["cfg"])
-
             _utils.is_primitive_type = old_primitive
-            OmegaConf.set_struct(state["cfg"], True)
-        else:
-            state["cfg"] = OmegaConf.create(state["cfg"], flags={"allow_objects": True})
+
+        OmegaConf.set_struct(state["cfg"], True)
 
         if arg_overrides is not None:
             overwrite_args_by_name(state["cfg"], arg_overrides)
@@ -906,12 +909,22 @@ def load_ema_from_checkpoint(fpath):
     new_state = None
 
     with PathManager.open(fpath, "rb") as f:
-        new_state = torch.load(
-            f,
-            map_location=(
-                lambda s, _: torch.serialization.default_restore_location(s, "cpu")
-            ),
-        )
+        # See if torch.load has weights_only parameter
+        if hasattr(torch.load, "weights_only"):
+            new_state = torch.load(
+                f,
+                map_location=(
+                    lambda s, _: torch.serialization.default_restore_location(s, "cpu")
+                ),
+                weights_only=False
+            )
+        else:
+            new_state = torch.load(
+                f,
+                map_location=(
+                    lambda s, _: torch.serialization.default_restore_location(s, "cpu")
+                ),
+            )
 
         # EMA model is stored in a separate "extra state"
         model_params = new_state["extra_state"]["ema"]

diff --git a/fairseq/dataclass/initialize.py b/fairseq/dataclass/initialize.py
@@ -58,4 +58,10 @@ def add_defaults(cfg: DictConfig) -> None:
                 dc = REGISTRIES[k]["dataclass_registry"].get(name)
 
             if dc is not None:
+                # Filter out any MISSING values before merging
+                if OmegaConf.is_config(field_cfg):
+                    for key in list(field_cfg.keys()):
+                        if field_cfg[key] is None or (isinstance(field_cfg[key], str) and field_cfg[key] == "???"):
+                            field_cfg.pop(key)
+
                 cfg[k] = merge_with_parent(dc, field_cfg)