From 193e20b1316c679d66ca32abefd0b05163793ac2 Mon Sep 17 00:00:00 2001
From: mshzy <156560471+mshzy@users.noreply.github.com>
Date: Tue, 2 Jun 2026 10:28:36 +0800
Subject: [PATCH] fix: 5 bug fixes across linear models, trees, KNN, and
 Bayesian regression

- LinearRegression: fix np.squeeze producing 0-d weights when N=1 (fixes #77)
- DecisionTree: handle empty child splits to prevent ZeroDivisionError (fixes #58)
- KNN: add epsilon to prevent division by zero when distance=0
- BayesianRegression: replace np.linalg.inv with pinv for numerical stability
- GP: replace bare except with ImportError for scipy import
---
 numpy_ml/linear_models/bayesian_regression.py | 10 +++++-----
 numpy_ml/linear_models/linear_regression.py   |  2 +-
 numpy_ml/nonparametric/gp.py                  |  2 +-
 numpy_ml/nonparametric/knn.py                 |  7 ++++---
 numpy_ml/trees/dt.py                          |  5 ++++-
 5 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/numpy_ml/linear_models/bayesian_regression.py b/numpy_ml/linear_models/bayesian_regression.py
index b9fe9d8..6ba0ea7 100644
--- a/numpy_ml/linear_models/bayesian_regression.py
+++ b/numpy_ml/linear_models/bayesian_regression.py
@@ -111,7 +111,7 @@ def fit(self, X, y):
         # sigma
         I = np.eye(N)  # noqa: E741
         a = y - (X @ mu)
-        b = np.linalg.inv(X @ V @ X.T + I)
+        b = np.linalg.pinv(X @ V @ X.T + I)
         c = y - (X @ mu)
 
         shape = N + alpha
@@ -122,8 +122,8 @@ def fit(self, X, y):
         sigma = scale / (shape - 1)
 
         # mean
-        V_inv = np.linalg.inv(V)
-        L = np.linalg.inv(V_inv + X.T @ X)
+        V_inv = np.linalg.pinv(V)
+        L = np.linalg.pinv(V_inv + X.T @ X)
         R = V_inv @ mu + X.T @ y
 
         mu = L @ R
@@ -263,8 +263,8 @@ def fit(self, X, y):
         mu = self.mu
         sigma = self.sigma
 
-        V_inv = np.linalg.inv(V)
-        L = np.linalg.inv(V_inv + X.T @ X)
+        V_inv = np.linalg.pinv(V)
+        L = np.linalg.pinv(V_inv + X.T @ X)
         R = V_inv @ mu + X.T @ y
 
         mu = L @ R
diff --git a/numpy_ml/linear_models/linear_regression.py b/numpy_ml/linear_models/linear_regression.py
index b6cf5d9..ba7a266 100644
--- a/numpy_ml/linear_models/linear_regression.py
+++ b/numpy_ml/linear_models/linear_regression.py
@@ -197,7 +197,7 @@ def fit(self, X, y, weights=None):
         N = X.shape[0]
 
         weights = np.ones(N) if weights is None else np.atleast_1d(weights)
-        weights = np.squeeze(weights) if weights.size > 1 else weights
+        weights = np.atleast_1d(np.squeeze(weights)) if weights.size > 1 else weights
         err_str = f"weights must have shape ({N},) but got {weights.shape}"
         assert weights.shape == (N,), err_str
 
diff --git a/numpy_ml/nonparametric/gp.py b/numpy_ml/nonparametric/gp.py
index 0811126..55d06c4 100644
--- a/numpy_ml/nonparametric/gp.py
+++ b/numpy_ml/nonparametric/gp.py
@@ -5,7 +5,7 @@
 try:
     _SCIPY = True
     from scipy.stats import norm
-except:
+except ImportError:
     _SCIPY = False
     warnings.warn(
         "Could not import scipy.stats. Confidence scores "
diff --git a/numpy_ml/nonparametric/knn.py b/numpy_ml/nonparametric/knn.py
index 8825229..d297204 100644
--- a/numpy_ml/nonparametric/knn.py
+++ b/numpy_ml/nonparametric/knn.py
@@ -56,7 +56,7 @@ def fit(self, X, y):
             Targets for the `N` rows in `X`.
         """
         if X.ndim != 2:
-            raise Exception("X must be two-dimensional")
+            raise ValueError("X must be two-dimensional")
         self._ball_tree.fit(X, y)
 
     def predict(self, X):
@@ -88,14 +88,15 @@ def predict(self, X):
                     pred, _ = sorted(counts, key=lambda x: (-x[1], x[0]))[0]
                 elif H["weights"] == "distance":
                     best_score = -np.inf
+                    eps = np.finfo(float).eps
                     for label in set(targets):
-                        scores = [1 / n.distance for n in nearest if n.val == label]
+                        scores = [1 / max(n.distance, eps) for n in nearest if n.val == label]
                         pred = label if np.sum(scores) > best_score else pred
             else:
                 if H["weights"] == "uniform":
                     pred = np.mean(targets)
                 elif H["weights"] == "distance":
-                    weights = [1 / n.distance for n in nearest]
+                    weights = [1 / max(n.distance, eps) for n in nearest]
                     pred = np.average(targets, weights=weights)
             predictions.append(pred)
         return np.array(predictions)
diff --git a/numpy_ml/trees/dt.py b/numpy_ml/trees/dt.py
index 3bd033c..b98b2a1 100644
--- a/numpy_ml/trees/dt.py
+++ b/numpy_ml/trees/dt.py
@@ -120,7 +120,10 @@ def predict_class_probs(self, X):
         return np.array([self._traverse(x, self.root, prob=True) for x in X])
 
     def _grow(self, X, Y, cur_depth=0):
-        # if all labels are the same, return a leaf
+        # if all labels are the same, or node is empty, return a leaf
+        if len(Y) == 0:
+            prob = np.zeros(self.n_classes) if self.classifier else 0.0
+            return Leaf(prob)
         if len(set(Y)) == 1:
             if self.classifier:
                 prob = np.zeros(self.n_classes)