Initialize startprob/transmat of VI Hmms with Dirichlet sampled estimates, similar to EM HMM. (hmmlearn#506)

blckmaxima · web-flow · commit 1935f9b22d0b · 2023-02-10T17:40:20.000+01:00
* Initialize startprob_posterior and transmat_posterior of the Variational HMMs similar to the EM models (using Dirichlet Distribution)
bump number of random initializations to demonstrate learning the best model in the Variational Inference Example
Reduce the variational gaussian tests, with no loss of coverage

* add note about Variational Gaussian Test, and set random seed.

* Improve readability with consistency in test code.
diff --git a/examples/plot_variational_inference.py b/examples/plot_variational_inference.py
@@ -66,7 +66,9 @@ def gaussian_hinton_diagram(startprob, transmat, means,
 rs = check_random_state(2022)
 sample_length = 500
 num_samples = 1
-num_inits = 1
+# With random initialization, it takes a few tries to find the
+# best solution
+num_inits = 5
 num_states = np.arange(1, 7)
 verbose = False
 
diff --git a/lib/hmmlearn/base.py b/lib/hmmlearn/base.py
@@ -1043,17 +1043,19 @@ def _init(self, X, lengths=None):
             these should be ``n_samples``.
         """
         self._check_and_set_n_features(X)
-        uniform_prior = 1 / self.n_components
-        # We could consider random initialization here as well
+        nc = self.n_components
+        uniform_prior = 1 / nc
+        random_state = check_random_state(self.random_state)
         if (self._needs_init("s", "startprob_posterior_")
                 or self._needs_init("s", "startprob_prior_")):
             if self.startprob_prior is None:
                 startprob_init = uniform_prior
             else:
                 startprob_init = self.startprob_prior
 
-            self.startprob_prior_ = np.full(self.n_components, startprob_init)
-            self.startprob_posterior_ = self.startprob_prior_ * len(lengths)
+            self.startprob_prior_ = np.full(nc, startprob_init)
+            self.startprob_posterior_ = random_state.dirichlet(
+                np.full(nc, uniform_prior)) * len(lengths)
 
         if (self._needs_init("t", "transmat_posterior_")
                 or self._needs_init("t", "transmat_prior_")):
@@ -1062,9 +1064,10 @@ def _init(self, X, lengths=None):
             else:
                 transmat_init = self.transmat_prior
             self.transmat_prior_ = np.full(
-                (self.n_components, self.n_components), transmat_init)
-            self.transmat_posterior_ = (
-                self.transmat_prior_ * sum(lengths) / self.n_components)
+                (nc, nc), transmat_init)
+            self.transmat_posterior_ = random_state.dirichlet(
+                np.full(nc, uniform_prior), size=nc)
+            self.transmat_posterior_ *= sum(lengths) / nc
 
         n_fit_scalars_per_param = self._get_n_fit_scalars_per_param()
         if n_fit_scalars_per_param is not None:
diff --git a/lib/hmmlearn/tests/__init__.py b/lib/hmmlearn/tests/__init__.py
@@ -85,3 +85,12 @@ def compare_variational_and_em_models(variational, em, sequences, lengths):
     vi_obs, vi_states = variational.sample(100, random_state=42)
     assert np.all(em_obs == vi_obs)
     assert np.all(em_states == vi_states)
+
+
+def vi_uniform_startprob_and_transmat(model, lengths):
+    nc = model.n_components
+    model.startprob_prior_ = np.full(nc, 1/nc)
+    model.startprob_posterior_ = np.full(nc, 1/nc) * len(lengths)
+    model.transmat_prior_ = np.full((nc, nc), 1/nc)
+    model.transmat_posterior_ = np.full((nc, nc), 1/nc)*sum(lengths)
+    return model
diff --git a/lib/hmmlearn/tests/test_variational_categorical.py b/lib/hmmlearn/tests/test_variational_categorical.py
@@ -4,7 +4,8 @@
 
 from hmmlearn import hmm, vhmm
 from . import (
-    assert_log_likelihood_increasing, compare_variational_and_em_models)
+    assert_log_likelihood_increasing, compare_variational_and_em_models,
+    vi_uniform_startprob_and_transmat)
 
 
 class TestVariationalCategorical:
@@ -218,8 +219,9 @@ def test_fit_and_compare_with_em(self, implementation):
         sequences, lengths = self.get_from_one_beal(7, 100, 1984)
         model = vhmm.VariationalCategoricalHMM(
             4, n_iter=500, random_state=1984,
+            init_params="e",
             implementation=implementation)
-
+        vi_uniform_startprob_and_transmat(model, lengths)
         model.fit(sequences, lengths)
 
         # The 1st hidden state will be "unused"
diff --git a/lib/hmmlearn/tests/test_variational_gaussian.py b/lib/hmmlearn/tests/test_variational_gaussian.py
@@ -5,7 +5,7 @@
 from hmmlearn import hmm, vhmm
 from . import (
     assert_log_likelihood_increasing, compare_variational_and_em_models,
-    make_covar_matrix, normalized)
+    make_covar_matrix, normalized, vi_uniform_startprob_and_transmat)
 
 
 def get_mcgrory_titterington():
@@ -21,20 +21,6 @@ def get_mcgrory_titterington():
     return m1
 
 
-def get_mcgrory_titterington2d():
-    """ A subtle variation on the 1D Case..."""
-    m1 = hmm.GaussianHMM(4, init_params="", covariance_type="tied")
-    m1.n_features = 4
-    m1.startprob_ = np.array([1/4., 1/4., 1/4., 1/4.])
-    m1.transmat_ = np.array([[0.2, 0.2, 0.3, 0.3],
-                             [0.3, 0.2, 0.2, 0.3],
-                             [0.2, 0.3, 0.3, 0.2],
-                             [0.3, 0.3, 0.2, 0.2]])
-    m1.means_ = np.array([[-1.5, -1.5], [0, 0], [1.5, 1.5], [3., 3]])
-    m1.covars_ = np.sqrt([[0.25, 0], [0, .25]])
-    return m1
-
-
 def get_sequences(length, N, model, rs=None):
     sequences = []
     lengths = []
@@ -55,34 +41,41 @@ def test_random_fit(self, implementation, params='stmc', n_features=3,
                         n_components=3, **kwargs):
         h = hmm.GaussianHMM(n_components, self.covariance_type,
                             implementation=implementation, init_params="")
-        rs = check_random_state(None)
+        rs = check_random_state(1)
         h.startprob_ = normalized(rs.rand(n_components))
         h.transmat_ = normalized(
             rs.rand(n_components, n_components), axis=1)
         h.means_ = rs.randint(-20, 20, (n_components, n_features))
         h.covars_ = make_covar_matrix(
             self.covariance_type, n_components, n_features, random_state=rs)
-
-        lengths = [200] * 20
+        lengths = [200] * 5
         X, _state_sequence = h.sample(sum(lengths), random_state=rs)
         # Now learn a model
         model = vhmm.VariationalGaussianHMM(
-            n_components, n_iter=1000, tol=1e-9, random_state=rs,
+            n_components, n_iter=50, tol=1e-9, random_state=rs,
             covariance_type=self.covariance_type,
             implementation=implementation)
-        assert_log_likelihood_increasing(model, X, lengths, n_iter=100)
+
+        # Depending on the random seed, the model may converge rather quickly,
+        # and throw an assertion in this test, as the function we call
+        # computes each iteration independently by calling fit() `n_iter`
+        # times.
+        assert_log_likelihood_increasing(model, X, lengths, n_iter=10)
 
     @pytest.mark.parametrize("implementation", ["scaling", "log"])
     def test_fit_mcgrory_titterington1d(self, implementation):
         random_state = check_random_state(234234)
+        # Setup to assure convergence
 
         sequences, lengths = get_sequences(500, 1,
                                            model=get_mcgrory_titterington(),
                                            rs=random_state)
         model = vhmm.VariationalGaussianHMM(
             5, n_iter=1000, tol=1e-9, random_state=random_state,
+            init_params="mc",
             covariance_type=self.covariance_type,
             implementation=implementation)
+        vi_uniform_startprob_and_transmat(model, lengths)
         model.fit(sequences, lengths)
         # Perform one check that we are converging to the right answer
         assert (model.means_posterior_[-1][0]
@@ -101,27 +94,6 @@ def test_fit_mcgrory_titterington1d(self, implementation):
 
         compare_variational_and_em_models(model, em_hmm, sequences, lengths)
 
-    @pytest.mark.parametrize("implementation", ["scaling", "log"])
-    def test_fit_mcgrory_titterington2d(self, implementation):
-        sequences, lengths = get_sequences(100, 1,
-                                           model=get_mcgrory_titterington2d())
-
-        model = vhmm.VariationalGaussianHMM(
-            5, n_iter=1000, tol=1e-9, random_state=None,
-            covariance_type=self.covariance_type,
-            implementation=implementation)
-        model.fit(sequences, lengths)
-
-        em_hmm = hmm.GaussianHMM(n_components=model.n_components,
-                                 implementation=implementation,
-                                 covariance_type=self.covariance_type)
-        em_hmm.startprob_ = model.startprob_
-        em_hmm.transmat_ = model.transmat_
-        em_hmm.means_ = model.means_posterior_
-        em_hmm.covars_ = model._covars_
-
-        compare_variational_and_em_models(model, em_hmm, sequences, lengths)
-
     @pytest.mark.parametrize("implementation", ["scaling", "log"])
     def test_common_initialization(self, implementation):
         sequences, lengths = get_sequences(50, 10,