From ea80914499c99a004b42a708c0bcf2747e243869 Mon Sep 17 00:00:00 2001 From: Aaron Jaech Date: Fri, 25 Jun 2021 23:53:13 -0700 Subject: [PATCH] bugfix: sort eigenvalues when computing pca vectors Summary: this is a bug reported on github https://github.com/facebookresearch/fastText/issues/1199 Differential Revision: D29413727 fbshipit-source-id: e930d55bf525b223843f944847c84e18cd6894b6 --- python/fasttext_module/fasttext/util/util.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/fasttext_module/fasttext/util/util.py b/python/fasttext_module/fasttext/util/util.py index 7218b3671..d7b17a866 100644 --- a/python/fasttext_module/fasttext/util/util.py +++ b/python/fasttext_module/fasttext/util/util.py @@ -103,8 +103,9 @@ def _reduce_matrix(X_orig, dim, eigv): X = X_orig[:mapping_size] X = X - X.mean(axis=0, dtype=np.float32) C = np.divide(np.matmul(X.T, X), X.shape[0] - 1, dtype=np.float32) - _, U = np.linalg.eig(C) - eigv = U[:, :dim] + V, U = np.linalg.eig(C) + ind = list(V.argsort()[-dim:]) # find the indices of the top k eigenvalues + eigv = U[:, ind] X_reduced = np.matmul(X_orig, eigv)