@@ -47,11 +47,11 @@ def l2_normalize(X: np.ndarray):
4747 """
4848
4949 norm = np .sqrt (np .sum (X ** 2 , axis = 1 ))
50- norm [norm == 0 ] = 1.
50+ norm [norm == 0 ] = 1.0
5151 return (X .T / norm ).T
5252
5353
54- def dist_range (metric = ' euclidean' , normalize = False ):
54+ def dist_range (metric = " euclidean" , normalize = False ):
5555 """Return range of possible distance between two vectors
5656
5757 Parameters
@@ -67,42 +67,44 @@ def dist_range(metric='euclidean', normalize=False):
6767 Range of possible distance.
6868 """
6969
70- if metric == ' euclidean' :
70+ if metric == " euclidean" :
7171 if normalize :
72- return (0. , 2. )
73- return (0. , np .inf )
72+ return (0.0 , 2.0 )
73+ return (0.0 , np .inf )
7474
75- if metric == ' sqeuclidean' :
75+ if metric == " sqeuclidean" :
7676 if normalize :
77- return (0. , 4. )
78- return (0. , np .inf )
77+ return (0.0 , 4.0 )
78+ return (0.0 , np .inf )
7979
80- if metric == ' cosine' :
81- return (0. , 2. )
80+ if metric == " cosine" :
81+ return (0.0 , 2.0 )
8282
83- if metric == ' angular' :
84- return (0. , np .pi )
83+ if metric == " angular" :
84+ return (0.0 , np .pi )
8585
86- msg = f' dist_range does not support { metric } metric.'
86+ msg = f" dist_range does not support { metric } metric."
8787 raise NotImplementedError (msg )
8888
8989
9090def _pdist_func_1D (X , func ):
9191 """Helper function for pdist"""
9292
93- X = X .squeeze ()
94- n_items , = X .shape
93+ (n_items ,) = X .shape
94+
95+ if n_items < 2 :
96+ return np .array ([])
9597
9698 distances = []
9799
98100 for i in range (n_items - 1 ):
99- distance = func (X [i ], X [i + 1 :])
101+ distance = func (X [i ], X [i + 1 :])
100102 distances .append (distance )
101103
102104 return np .hstack (distances )
103105
104106
105- def pdist (fX , metric = ' euclidean' , ** kwargs ):
107+ def pdist (fX , metric = " euclidean" , ** kwargs ):
106108 """Same as scipy.spatial.distance with support for additional metrics
107109
108110 * 'angular': pairwise angular distance
@@ -112,35 +114,36 @@ def pdist(fX, metric='euclidean', **kwargs):
112114 * 'average': pairwise average (only for 1-dimensional fX)
113115 """
114116
115- if metric == 'angular' :
116- cosine = scipy .spatial .distance .pdist (
117- fX , metric = 'cosine' , ** kwargs )
117+ if metric == "angular" :
118+ cosine = scipy .spatial .distance .pdist (fX , metric = "cosine" , ** kwargs )
118119 return np .arccos (np .clip (1.0 - cosine , - 1.0 , 1.0 ))
119120
120- elif metric == 'equal' :
121+ elif metric == "equal" :
122+ assert fX .ndim == 1 , f"'{ metric } ' metric only supports 1-dimensional fX."
121123 return _pdist_func_1D (fX , lambda x , X : x == X )
122124
123- elif metric == 'minimum' :
125+ elif metric == "minimum" :
126+ assert fX .ndim == 1 , f"'{ metric } ' metric only supports 1-dimensional fX."
124127 return _pdist_func_1D (fX , np .minimum )
125128
126- elif metric == 'maximum' :
129+ elif metric == "maximum" :
130+ assert fX .ndim == 1 , f"'{ metric } ' metric only supports 1-dimensional fX."
127131 return _pdist_func_1D (fX , np .maximum )
128132
129- elif metric == 'average' :
130- return _pdist_func_1D (fX , lambda x , X : .5 * (x + X ))
133+ elif metric == "average" :
134+ assert fX .ndim == 1 , f"'{ metric } ' metric only supports 1-dimensional fX."
135+ return _pdist_func_1D (fX , lambda x , X : 0.5 * (x + X ))
131136
132137 else :
133138 return scipy .spatial .distance .pdist (fX , metric = metric , ** kwargs )
134139
135140
136141def _cdist_func_1D (X_trn , X_tst , func ):
137142 """Helper function for cdist"""
138- X_trn = X_trn .squeeze ()
139- X_tst = X_tst .squeeze ()
140143 return np .vstack (func (x_trn , X_tst ) for x_trn in iter (X_trn ))
141144
142145
143- def cdist (fX_trn , fX_tst , metric = ' euclidean' , ** kwargs ):
146+ def cdist (fX_trn , fX_tst , metric = " euclidean" , ** kwargs ):
144147 """Same as scipy.spatial.distance.cdist with support for additional metrics
145148
146149 * 'angular': pairwise angular distance
@@ -150,28 +153,38 @@ def cdist(fX_trn, fX_tst, metric='euclidean', **kwargs):
150153 * 'average': pairwise average (only for 1-dimensional fX)
151154 """
152155
153- if metric == 'angular' :
154- cosine = scipy .spatial .distance .cdist (
155- fX_trn , fX_tst , metric = 'cosine' , ** kwargs )
156+ if metric == "angular" :
157+ cosine = scipy .spatial .distance .cdist (fX_trn , fX_tst , metric = "cosine" , ** kwargs )
156158 return np .arccos (np .clip (1.0 - cosine , - 1.0 , 1.0 ))
157159
158- elif metric == 'equal' :
159- return _cdist_func_1D (fX_trn , fX_tst ,
160- lambda x_trn , X_tst : x_trn == X_tst )
160+ elif metric == "equal" :
161+ assert (
162+ fX_trn .ndim == 1 and fX_tst .ndim == 1
163+ ), f"'{ metric } ' metric only supports 1-dimensional fX_trn and fX_tst."
164+ return _cdist_func_1D (fX_trn , fX_tst , lambda x_trn , X_tst : x_trn == X_tst )
161165
162- elif metric == 'minimum' :
166+ elif metric == "minimum" :
167+ assert (
168+ fX_trn .ndim == 1 and fX_tst .ndim == 1
169+ ), f"'{ metric } ' metric only supports 1-dimensional fX_trn and fX_tst."
163170 return _cdist_func_1D (fX_trn , fX_tst , np .minimum )
164171
165- elif metric == 'maximum' :
172+ elif metric == "maximum" :
173+ assert (
174+ fX_trn .ndim == 1 and fX_tst .ndim == 1
175+ ), f"'{ metric } ' metric only supports 1-dimensional fX_trn and fX_tst."
166176 return _cdist_func_1D (fX_trn , fX_tst , np .maximum )
167177
168- elif metric == 'average' :
169- return _cdist_func_1D (fX_trn , fX_tst ,
170- lambda x_trn , X_tst : .5 * (x_trn + X_tst ))
178+ elif metric == "average" :
179+ assert (
180+ fX_trn .ndim == 1 and fX_tst .ndim == 1
181+ ), f"'{ metric } ' metric only supports 1-dimensional fX_trn and fX_tst."
182+ return _cdist_func_1D (
183+ fX_trn , fX_tst , lambda x_trn , X_tst : 0.5 * (x_trn + X_tst )
184+ )
171185
172186 else :
173- return scipy .spatial .distance .cdist (
174- fX_trn , fX_tst , metric = metric , ** kwargs )
187+ return scipy .spatial .distance .cdist (fX_trn , fX_tst , metric = metric , ** kwargs )
175188
176189
177190def to_condensed (n , i , j ):
@@ -200,7 +213,7 @@ def to_condensed(n, i, j):
200213 """
201214 i , j = np .array (i ), np .array (j )
202215 if np .any (i == j ):
203- raise ValueError (' i and j should be different.' )
216+ raise ValueError (" i and j should be different." )
204217 i , j = np .minimum (i , j ), np .maximum (i , j )
205218 return np .int64 (i * n - i * i / 2 - 3 * i / 2 + j - 1 )
206219
@@ -222,6 +235,6 @@ def to_squared(n, k):
222235
223236 """
224237 k = np .array (k )
225- i = np .int64 (n - np .sqrt (- 8 * k + 4 * n ** 2 - 4 * n + 1 )/ 2 - 1 / 2 )
226- j = np .int64 (i ** 2 / 2 - i * n + 3 * i / 2 + k + 1 )
238+ i = np .int64 (n - np .sqrt (- 8 * k + 4 * n ** 2 - 4 * n + 1 ) / 2 - 1 / 2 )
239+ j = np .int64 (i ** 2 / 2 - i * n + 3 * i / 2 + k + 1 )
227240 return i , j
0 commit comments