Merge branch 'develop' into improve/on-prem-wrapper

hbredin · web-flow · commit e49a686e1717 · 2025-11-19T11:47:33.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,10 +2,12 @@
 
 ## next
 
+- BREAKING(util): make `Binarize.__call__` return `string` tracks (instead of `int`) [@benniekiss](https://github.com/benniekiss/)
 - feat(cli): add option to apply pipeline on a directory of audio files
+- feat(pipeline): add `preload` option to base `Pipeline.__call__` to force preloading audio in memory ([@antoinelaurent](https://github.com/antoinelaurent/))
+- feat(pipeline): add `Pipeline.cuda()` convenience method [@tkanarsky](https://github.com/tkanarsky/)
 - improve(util): make `permutate` faster thanks to vectorized cost function
-- BREAKING(util): make `Binarize.__call__` return `string` tracks (instead of `int`) [@benniekiss](https://github.com/benniekiss/)
-- improve(pyannoteAI): update on-premise wrapper to return both regular and exclusive diarization
+- improve(pyannoteAI): update pyannoteAI wrapper to return both regular and exclusive diarization
 
 ## Version 4.0.1 (2025-10-10)
 
diff --git a/src/pyannote/audio/core/pipeline.py b/src/pyannote/audio/core/pipeline.py
@@ -21,6 +21,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import annotations
 import os
 import warnings
 from collections import OrderedDict
@@ -406,7 +407,23 @@ def classes(self) -> List | Iterator:
         """
         raise NotImplementedError()
 
-    def __call__(self, file: AudioFile, **kwargs):
+    def __call__(self, file: AudioFile, preload: bool = False, **kwargs):
+        """Validate file, (optionally) load it in memory, then process it
+
+        Parameters
+        ----------
+        file : AudioFile
+            File to process
+        preload : bool, optional
+            Whether to preload waveform before applying the pipeline.
+        kwargs : keyword arguments, optional
+            Additional keyword arguments passed to `self.apply(...)`
+
+        Returns
+        -------
+        output : Any
+            Whatever `self.apply(...)` returns
+        """
         fix_reproducibility(getattr(self, "device", torch.device("cpu")))
 
         if not self.instantiated:
@@ -432,16 +449,35 @@ def __call__(self, file: AudioFile, **kwargs):
 
         file = Audio.validate_file(file)
 
+        # check if the instance has preprocessors and wrap the file if so
         if hasattr(self, "preprocessors"):
             file = ProtocolFile(file, lazy=self.preprocessors)
 
+        # pre-load the audio in memory if requested
+        if preload:
+            # raise error if `waveform`` is already in memory (or will be via a preprocessor)
+            if (
+                "waveform" in getattr(self, "preprocessors", dict())
+                or "waveform" in file
+            ):
+                raise ValueError(
+                    "Cannot preload audio: `waveform` key is already available or will be via a preprocessor."
+                )
+
+            # load waveform in memory (and keep track of its original sample rate)
+            file["waveform"], file["sample_rate"] = Audio()(file)
+
+            # the above line already took care of channel selection,
+            # therefore we remove the `channel` key from the file
+            file.pop("channel", None)
+
         # send file duration to telemetry as well as
         # requested number of speakers in case of diarization
         track_pipeline_apply(self, file, **kwargs)
 
         return self.apply(file, **kwargs)
 
-    def to(self, device: torch.device):
+    def to(self, device: torch.device) -> Pipeline:
         """Send pipeline to `device`"""
 
         if not isinstance(device, torch.device):
@@ -462,3 +498,14 @@ def to(self, device: torch.device):
         self.device = device
 
         return self
+
+    def cuda(self, device: torch.device | int | None = None) -> Pipeline:
+        """Send pipeline to (optionally specified) cuda device"""
+        if device is None:
+            return self.to(torch.device("cuda"))
+        elif isinstance(device, int):
+            return self.to(torch.device("cuda", device))
+        else:
+            if device.type != "cuda":
+                raise ValueError("Expected CUDA device. Use `Pipeline.to(device)` for other devices.")
+            return self.to(device)