Fix Error with parenthesis, and simplify computation of lognormalized gaussian probabilities

blckmaxima · anntzer · commit ba62fed96d4d · 2023-02-27T19:04:13.000+01:00
Fix strict setting for Variational HMMs
Fix a bug in the lower bound computation. Simplify term1 of the Variational Gaussian likelihood
Update tests per the variational lower bound change
Back-out added strict parameter for convergence monitor, and rely on warnings instead
diff --git a/doc/source/tutorial.rst b/doc/source/tutorial.rst
@@ -174,7 +174,6 @@ ConvergenceMonitor(
     history=[...],
     iter=15,
     n_iter=100,
-    strict=False,
     tol=0.01,
     verbose=False,
 )
diff --git a/lib/hmmlearn/base.py b/lib/hmmlearn/base.py
@@ -55,7 +55,7 @@ class ConvergenceMonitor:
 
     _template = "{iter:>10d} {log_prob:>16.8f} {delta:>+16.8f}"
 
-    def __init__(self, tol, n_iter, verbose, *, strict=False):
+    def __init__(self, tol, n_iter, verbose):
         """
         Parameters
         ----------
@@ -67,14 +67,10 @@ def __init__(self, tol, n_iter, verbose, *, strict=False):
             Maximum number of iterations to perform.
         verbose : bool
             Whether per-iteration convergence reports are printed.
-        strict : bool
-            Whether to enforce that the values reported are strictly
-            increasing.
         """
         self.tol = tol
         self.n_iter = n_iter
         self.verbose = verbose
-        self.strict = strict
         self.history = deque()
         self.iter = 0
 
@@ -110,9 +106,14 @@ def report(self, log_prob):
             message = self._template.format(
                 iter=self.iter + 1, log_prob=log_prob, delta=delta)
             print(message, file=sys.stderr)
-        if self.strict and self.history and log_prob < self.history[-1]:
-            raise ValueError(f"Model is not converging.  Current: {log_prob}"
-                             f" is not greater than {self.history[-1]}.")
+
+        # Allow for some wiggleroom based on precision.
+        precision = np.finfo(float).eps ** (1/2)
+        if self.history and (log_prob - self.history[-1]) < -precision:
+            delta = log_prob - self.history[-1]
+            _log.warning(f"Model is not converging.  Current: {log_prob}"
+                         f" is not greater than {self.history[-1]}."
+                         f" Delta is {delta}")
         self.history.append(log_prob)
         self.iter += 1
 
@@ -1020,15 +1021,8 @@ def __init__(self, n_components=1,
 
         self.startprob_prior = startprob_prior
         self.transmat_prior = transmat_prior
-        # For the case of updating all model components
-        # at each iteration, we can be strict with the convergence
-        # monitory - we know that the Lower Bound will improve
-        # at each iteration. (https://arxiv.org/abs/1601.00670)
-        # During testing we  did not see non-strict improvements due to
-        # floating point slop, though that doesn't mean they couldn't
-        # happy in the future.
         self.monitor_ = ConvergenceMonitor(
-            self.tol, self.n_iter, self.verbose, strict=(params == "ste"))
+            self.tol, self.n_iter, self.verbose)
 
     def _init(self, X, lengths=None):
         """
diff --git a/lib/hmmlearn/tests/test_variational_gaussian.py b/lib/hmmlearn/tests/test_variational_gaussian.py
@@ -56,10 +56,6 @@ def test_random_fit(self, implementation, params='stmc', n_features=3,
             covariance_type=self.covariance_type,
             implementation=implementation)
 
-        # Depending on the random seed, the model may converge rather quickly,
-        # and throw an assertion in this test, as the function we call
-        # computes each iteration independently by calling fit() `n_iter`
-        # times.
         assert_log_likelihood_increasing(model, X, lengths, n_iter=10)
 
     @pytest.mark.parametrize("implementation", ["scaling", "log"])
@@ -344,7 +340,7 @@ def test_initialization(self, implementation):
 
 
 class TestSpherical(_TestGaussian):
-    test_fit_mcgrory_titterington1d_mean = 1.40653312
+    test_fit_mcgrory_titterington1d_mean = 1.4105851867634462
     covariance_type = "spherical"
 
     def new_for_init(self, implementation):
@@ -429,7 +425,7 @@ def test_initialization(self, implementation):
 
 
 class TestDiagonal(_TestGaussian):
-    test_fit_mcgrory_titterington1d_mean = 1.40653312
+    test_fit_mcgrory_titterington1d_mean = 1.410585186763446
     covariance_type = "diag"
 
     def new_for_init(self, implementation):
diff --git a/lib/hmmlearn/vhmm.py b/lib/hmmlearn/vhmm.py
@@ -647,21 +647,24 @@ def _compute_subnorm_log_likelihood(self, X):
         # In general, things are neater if we pretend the covariance is
         # full / tied.  Or, we could treat each case separately, and reduce
         # the number of operations. That's left for the future :-)
-        term1 = np.zeros_like(self.dof_posterior_, dtype=float)
-        for d in range(1, self.n_features+1):
-            term1 += special.digamma(.5 * self.dof_posterior_ + 1 - d)
+        nf = self.n_features
+
+        term1 = special.digamma(
+            .5 * (self.dof_posterior_ - np.arange(0, nf)[:, None])
+        ).sum(axis=0)
+
         scale_posterior_ = self.scale_posterior_
         if self.covariance_type in ("diag", "spherical"):
             scale_posterior_ = fill_covars(self.scale_posterior_,
                     self.covariance_type, self.n_components, self.n_features)
         W_k = np.linalg.inv(scale_posterior_)
-        term1 += self.n_features * np.log(2) + _utils.logdet(W_k)
-        term1 /= 2
+        term1 += nf * np.log(2) + _utils.logdet(W_k)
+        term1 /= 2.
 
         # We ignore the constant that is typically excluded in the literature
         # term2 = self.n_features * log(2 * M_PI) / 2
         term2 = 0
-        term3 = self.n_features / self.beta_posterior_
+        term3 = nf / self.beta_posterior_
 
         # (X - Means) * W_k * (X-Means)^T * self.dof_posterior_
         delta = (X - self.means_posterior_[:, None])
@@ -784,7 +787,7 @@ def _compute_lower_bound(self, log_prob):
         if self.covariance_type != "full":
             scale_posterior_ = fill_covars(self.scale_posterior_,
                     self.covariance_type, self.n_components, self.n_features)
-            scale_prior_ = fill_covars(self.scale_posterior_,
+            scale_prior_ = fill_covars(self.scale_prior_,
                     self.covariance_type, self.n_components, self.n_features)
 
         W_k = np.linalg.inv(scale_posterior_)

Original file line number	Diff line number	Diff line change
`@@ -174,7 +174,6 @@ ConvergenceMonitor(`
`174`	`174`	`history=[...],`
`175`	`175`	`iter=15,`
`176`	`176`	`n_iter=100,`
`177`		`- strict=False,`
`178`	`177`	`tol=0.01,`
`179`	`178`	`verbose=False,`
`180`	`179`	`)`