Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion pyannote/audio/pipelines/speech_separation.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@
from typing import Callable, Optional, Text, Tuple, Union

import numpy as np
from scipy.ndimage import binary_dilation, binary_closing
import torch
Comment on lines +34 to 35
Copy link

Copilot AI Aug 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The import reorganization places scipy.ndimage import before torch import, breaking alphabetical ordering. Consider maintaining consistent import ordering for better code organization.

Suggested change
from scipy.ndimage import binary_dilation, binary_closing
import torch
import torch
from scipy.ndimage import binary_dilation, binary_closing

Copilot uses AI. Check for mistakes.
from einops import rearrange
from pyannote.core import Annotation, SlidingWindow, SlidingWindowFeature
from pyannote.metrics.diarization import GreedyDiarizationErrorRate
from pyannote.pipeline.parameter import Categorical, ParamDict, Uniform
from scipy.ndimage import binary_dilation

from pyannote.audio import Audio, Inference, Model, Pipeline
from pyannote.audio.core.io import AudioFile
Expand Down Expand Up @@ -163,12 +163,14 @@ def __init__(

if self._segmentation.model.specifications[0].powerset:
self.segmentation = ParamDict(
min_duration_on=Uniform(0.0, 1.0),
min_duration_off=Uniform(0.0, 1.0),
)

else:
self.segmentation = ParamDict(
threshold=Uniform(0.1, 0.9),
min_duration_on=Uniform(0.0, 1.0),
min_duration_off=Uniform(0.0, 1.0),
)

Expand Down Expand Up @@ -602,6 +604,19 @@ def apply(
# shape: (num_speakers, )
discrete_diarization.data = discrete_diarization.data[:, active_speakers]
num_frames, num_speakers = discrete_diarization.data.shape

# filter out too short segments
min_frames_on = int(
self._segmentation.model.num_frames(
self.segmentation.min_duration_on * self._audio.sample_rate
)
)

if min_frames_on > 0:
discrete_diarization.data = binary_closing(
discrete_diarization.data, structure=np.array([[True] * min_frames_on]).T
)

Comment on lines +616 to +619
Copy link

Copilot AI Aug 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Creating a new numpy array with np.array([[True] * min_frames_on]).T for each call is inefficient. Consider creating the structure array once outside the conditional or reusing it across calls.

Suggested change
discrete_diarization.data = binary_closing(
discrete_diarization.data, structure=np.array([[True] * min_frames_on]).T
)
structure = np.array([[True] * min_frames_on]).T
discrete_diarization.data = binary_closing(
discrete_diarization.data, structure=structure
)

Copilot uses AI. Check for mistakes.
hook("discrete_diarization", discrete_diarization)

clustered_separations = self.reconstruct(separations, hard_clusters, count)
Expand Down