Skip to content

Commit 37172e9

Browse files
committed
Resemblyzer diarization test module // pre-release
1 parent f8220a0 commit 37172e9

File tree

1 file changed

+15
-13
lines changed

1 file changed

+15
-13
lines changed

src/utils/diarize_resemblyzer.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@
7272
# Try webrtcvad; fall back to energy gate if missing.
7373
try:
7474
import webrtcvad
75-
7675
HAVE_WEBRTCVAD = True
7776
except Exception:
7877
webrtcvad = None
@@ -494,7 +493,7 @@ def _try_agglomerative_k(X, k):
494493
ac = AgglomerativeClustering(n_clusters=k, metric="cosine", linkage="average")
495494
except TypeError:
496495
ac = AgglomerativeClustering(n_clusters=k, affinity="cosine", linkage="average")
497-
# tiny jitter helps break ties
496+
# tiny jitter helps break ties / degeneracy
498497
Xj = (X + 1e-4 * np.random.randn(*X.shape)).astype(np.float32)
499498
return ac.fit_predict(Xj)
500499

@@ -639,7 +638,6 @@ def pick_labels(embeddings, method="bic", min_speakers=1, max_speakers=4,
639638

640639
# 4) optional collapse-to-1
641640
if not no_collapse and len(np.unique(labels)) > 1:
642-
# If massive imbalance AND poor structure, collapse.
643641
ncl = len(np.unique(labels))
644642
_, counts = np.unique(labels, return_counts=True)
645643
majority = counts.max() / float(n)
@@ -999,9 +997,7 @@ def main(
999997
else:
1000998
feat_for_cluster = X_win
1001999

1002-
# --- If user demands multi-speaker but CP produced weak evidence, fall back ---
1003-
too_few_regions = use_cp and (force_n is not None) and (feat_for_cluster.shape[0] < int(force_n) * 3)
1004-
1000+
# --- Primary clustering on regions/windows ---
10051001
lab_regions = pick_labels(
10061002
feat_for_cluster,
10071003
method=method,
@@ -1018,19 +1014,25 @@ def main(
10181014
no_collapse=no_collapse,
10191015
no_guard=no_guard,
10201016
)
1021-
too_imbalanced = (
1022-
use_cp and (force_n is not None) and len(np.unique(lab_regions)) >= 2 and _min_prop(lab_regions) < 0.10
1017+
1018+
# --- Generalized CP weakness detection → fallback to window-level ---
1019+
n_reg = feat_for_cluster.shape[0]
1020+
pmin = _min_prop(lab_regions) if getattr(lab_regions, "size", 0) else 1.0
1021+
uniq = len(np.unique(lab_regions)) if getattr(lab_regions, "size", 0) else 1
1022+
weak_cp = use_cp and (
1023+
n_reg < max(5, 2 * int(min_speakers)) # too few regions to trust
1024+
or uniq == 1 # collapsed partition
1025+
or (uniq >= 2 and pmin < 0.12) # severe imbalance
10231026
)
10241027

1025-
if too_few_regions or too_imbalanced:
1028+
if weak_cp:
10261029
logging.warning(
1027-
"CP produced weak evidence (regions=%d, min_prop=%.3f). Falling back to window-level clustering.",
1028-
feat_for_cluster.shape[0],
1029-
_min_prop(lab_regions),
1030+
"CP produced weak evidence (regions=%d, uniq=%d, min_prop=%.3f). Falling back to window-level clustering.",
1031+
n_reg, uniq, pmin
10301032
)
10311033
regions = [(i, i) for i in range(len(X_win))]
10321034
ts_regions = timestamps
1033-
feat_for_cluster = np.hstack([X_win, pitch_win]) if (use_pitch and pitch_win is not None) else X_win
1035+
feat_for_cluster = (np.hstack([X_win, pitch_win]) if (use_pitch and pitch_win is not None) else X_win)
10341036
lab_regions = pick_labels(
10351037
feat_for_cluster,
10361038
method=method,

0 commit comments

Comments
 (0)