7272# Try webrtcvad; fall back to energy gate if missing.
7373try :
7474 import webrtcvad
75-
7675 HAVE_WEBRTCVAD = True
7776except Exception :
7877 webrtcvad = None
@@ -494,7 +493,7 @@ def _try_agglomerative_k(X, k):
494493 ac = AgglomerativeClustering (n_clusters = k , metric = "cosine" , linkage = "average" )
495494 except TypeError :
496495 ac = AgglomerativeClustering (n_clusters = k , affinity = "cosine" , linkage = "average" )
497- # tiny jitter helps break ties
496+ # tiny jitter helps break ties / degeneracy
498497 Xj = (X + 1e-4 * np .random .randn (* X .shape )).astype (np .float32 )
499498 return ac .fit_predict (Xj )
500499
@@ -639,7 +638,6 @@ def pick_labels(embeddings, method="bic", min_speakers=1, max_speakers=4,
639638
640639 # 4) optional collapse-to-1
641640 if not no_collapse and len (np .unique (labels )) > 1 :
642- # If massive imbalance AND poor structure, collapse.
643641 ncl = len (np .unique (labels ))
644642 _ , counts = np .unique (labels , return_counts = True )
645643 majority = counts .max () / float (n )
@@ -999,9 +997,7 @@ def main(
999997 else :
1000998 feat_for_cluster = X_win
1001999
1002- # --- If user demands multi-speaker but CP produced weak evidence, fall back ---
1003- too_few_regions = use_cp and (force_n is not None ) and (feat_for_cluster .shape [0 ] < int (force_n ) * 3 )
1004-
1000+ # --- Primary clustering on regions/windows ---
10051001 lab_regions = pick_labels (
10061002 feat_for_cluster ,
10071003 method = method ,
@@ -1018,19 +1014,25 @@ def main(
10181014 no_collapse = no_collapse ,
10191015 no_guard = no_guard ,
10201016 )
1021- too_imbalanced = (
1022- use_cp and (force_n is not None ) and len (np .unique (lab_regions )) >= 2 and _min_prop (lab_regions ) < 0.10
1017+
1018+ # --- Generalized CP weakness detection → fallback to window-level ---
1019+ n_reg = feat_for_cluster .shape [0 ]
1020+ pmin = _min_prop (lab_regions ) if getattr (lab_regions , "size" , 0 ) else 1.0
1021+ uniq = len (np .unique (lab_regions )) if getattr (lab_regions , "size" , 0 ) else 1
1022+ weak_cp = use_cp and (
1023+ n_reg < max (5 , 2 * int (min_speakers )) # too few regions to trust
1024+ or uniq == 1 # collapsed partition
1025+ or (uniq >= 2 and pmin < 0.12 ) # severe imbalance
10231026 )
10241027
1025- if too_few_regions or too_imbalanced :
1028+ if weak_cp :
10261029 logging .warning (
1027- "CP produced weak evidence (regions=%d, min_prop=%.3f). Falling back to window-level clustering." ,
1028- feat_for_cluster .shape [0 ],
1029- _min_prop (lab_regions ),
1030+ "CP produced weak evidence (regions=%d, uniq=%d, min_prop=%.3f). Falling back to window-level clustering." ,
1031+ n_reg , uniq , pmin
10301032 )
10311033 regions = [(i , i ) for i in range (len (X_win ))]
10321034 ts_regions = timestamps
1033- feat_for_cluster = np .hstack ([X_win , pitch_win ]) if (use_pitch and pitch_win is not None ) else X_win
1035+ feat_for_cluster = ( np .hstack ([X_win , pitch_win ]) if (use_pitch and pitch_win is not None ) else X_win )
10341036 lab_regions = pick_labels (
10351037 feat_for_cluster ,
10361038 method = method ,
0 commit comments