From 193e20b1316c679d66ca32abefd0b05163793ac2 Mon Sep 17 00:00:00 2001 From: mshzy <156560471+mshzy@users.noreply.github.com> Date: Tue, 2 Jun 2026 10:28:36 +0800 Subject: [PATCH] fix: 5 bug fixes across linear models, trees, KNN, and Bayesian regression - LinearRegression: fix np.squeeze producing 0-d weights when N=1 (fixes #77) - DecisionTree: handle empty child splits to prevent ZeroDivisionError (fixes #58) - KNN: add epsilon to prevent division by zero when distance=0 - BayesianRegression: replace np.linalg.inv with pinv for numerical stability - GP: replace bare except with ImportError for scipy import --- numpy_ml/linear_models/bayesian_regression.py | 10 +++++----- numpy_ml/linear_models/linear_regression.py | 2 +- numpy_ml/nonparametric/gp.py | 2 +- numpy_ml/nonparametric/knn.py | 7 ++++--- numpy_ml/trees/dt.py | 5 ++++- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/numpy_ml/linear_models/bayesian_regression.py b/numpy_ml/linear_models/bayesian_regression.py index b9fe9d8..6ba0ea7 100644 --- a/numpy_ml/linear_models/bayesian_regression.py +++ b/numpy_ml/linear_models/bayesian_regression.py @@ -111,7 +111,7 @@ def fit(self, X, y): # sigma I = np.eye(N) # noqa: E741 a = y - (X @ mu) - b = np.linalg.inv(X @ V @ X.T + I) + b = np.linalg.pinv(X @ V @ X.T + I) c = y - (X @ mu) shape = N + alpha @@ -122,8 +122,8 @@ def fit(self, X, y): sigma = scale / (shape - 1) # mean - V_inv = np.linalg.inv(V) - L = np.linalg.inv(V_inv + X.T @ X) + V_inv = np.linalg.pinv(V) + L = np.linalg.pinv(V_inv + X.T @ X) R = V_inv @ mu + X.T @ y mu = L @ R @@ -263,8 +263,8 @@ def fit(self, X, y): mu = self.mu sigma = self.sigma - V_inv = np.linalg.inv(V) - L = np.linalg.inv(V_inv + X.T @ X) + V_inv = np.linalg.pinv(V) + L = np.linalg.pinv(V_inv + X.T @ X) R = V_inv @ mu + X.T @ y mu = L @ R diff --git a/numpy_ml/linear_models/linear_regression.py b/numpy_ml/linear_models/linear_regression.py index b6cf5d9..ba7a266 100644 --- a/numpy_ml/linear_models/linear_regression.py +++ b/numpy_ml/linear_models/linear_regression.py @@ -197,7 +197,7 @@ def fit(self, X, y, weights=None): N = X.shape[0] weights = np.ones(N) if weights is None else np.atleast_1d(weights) - weights = np.squeeze(weights) if weights.size > 1 else weights + weights = np.atleast_1d(np.squeeze(weights)) if weights.size > 1 else weights err_str = f"weights must have shape ({N},) but got {weights.shape}" assert weights.shape == (N,), err_str diff --git a/numpy_ml/nonparametric/gp.py b/numpy_ml/nonparametric/gp.py index 0811126..55d06c4 100644 --- a/numpy_ml/nonparametric/gp.py +++ b/numpy_ml/nonparametric/gp.py @@ -5,7 +5,7 @@ try: _SCIPY = True from scipy.stats import norm -except: +except ImportError: _SCIPY = False warnings.warn( "Could not import scipy.stats. Confidence scores " diff --git a/numpy_ml/nonparametric/knn.py b/numpy_ml/nonparametric/knn.py index 8825229..d297204 100644 --- a/numpy_ml/nonparametric/knn.py +++ b/numpy_ml/nonparametric/knn.py @@ -56,7 +56,7 @@ def fit(self, X, y): Targets for the `N` rows in `X`. """ if X.ndim != 2: - raise Exception("X must be two-dimensional") + raise ValueError("X must be two-dimensional") self._ball_tree.fit(X, y) def predict(self, X): @@ -88,14 +88,15 @@ def predict(self, X): pred, _ = sorted(counts, key=lambda x: (-x[1], x[0]))[0] elif H["weights"] == "distance": best_score = -np.inf + eps = np.finfo(float).eps for label in set(targets): - scores = [1 / n.distance for n in nearest if n.val == label] + scores = [1 / max(n.distance, eps) for n in nearest if n.val == label] pred = label if np.sum(scores) > best_score else pred else: if H["weights"] == "uniform": pred = np.mean(targets) elif H["weights"] == "distance": - weights = [1 / n.distance for n in nearest] + weights = [1 / max(n.distance, eps) for n in nearest] pred = np.average(targets, weights=weights) predictions.append(pred) return np.array(predictions) diff --git a/numpy_ml/trees/dt.py b/numpy_ml/trees/dt.py index 3bd033c..b98b2a1 100644 --- a/numpy_ml/trees/dt.py +++ b/numpy_ml/trees/dt.py @@ -120,7 +120,10 @@ def predict_class_probs(self, X): return np.array([self._traverse(x, self.root, prob=True) for x in X]) def _grow(self, X, Y, cur_depth=0): - # if all labels are the same, return a leaf + # if all labels are the same, or node is empty, return a leaf + if len(Y) == 0: + prob = np.zeros(self.n_classes) if self.classifier else 0.0 + return Leaf(prob) if len(set(Y)) == 1: if self.classifier: prob = np.zeros(self.n_classes)