From 55549a1746cb0c0eb2aa79c9d60c71481d4c5c55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Ma=C4=8Duda?= <lukas.macuda@gmail.com>
Date: Fri, 7 Dec 2018 09:49:38 +0100
Subject: [PATCH 1/7] registrations of the stopwords files outside of the lib
 directory

---
 Readme.md    |  2 +-
 lib/lda.js   | 17 +++++++++++-
 package.json |  2 +-
 test4.js     | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 95 insertions(+), 3 deletions(-)
 create mode 100644 test4.js

diff --git a/Readme.md b/Readme.md
index 6d8e727..9c4538b 100644
--- a/Readme.md
+++ b/Readme.md
@@ -95,7 +95,7 @@ result = lda(documents, 2, 5, ['de']);
 result = lda(documents, 2, 5, ['en', 'de']);
 ```
 
-To add a new language-specific stop-words list, create a file /lda/lib/stopwords_XX.js where XX is the id for the language. For example, a French stop-words list could be named "stopwords_fr.js". The contents of the file should follow the format of an [existing](https://github.com/primaryobjects/lda/blob/master/lib/stopwords_en.js) stop-words list. The format is, as follows:
+To add a new language-specific stop-words list, register a file to the specific language. For example, a French stop-words register the language `lda.registerStopWords('fr', '/path/to/the/french/stopwords.js')`. The contents of the file should follow the format of an [existing](https://github.com/primaryobjects/lda/blob/master/lib/stopwords_en.js) stop-words list. The format is, as follows:
 
 ```javascript
 exports.stop_words = [
diff --git a/lib/lda.js b/lib/lda.js
index 081e854..4d795ab 100644
--- a/lib/lda.js
+++ b/lib/lda.js
@@ -1,5 +1,11 @@
 var stem = require('stem-porter');
 
+var STOP_WORDS_MAP = {
+    en: './stopwords_en.js',
+    de: './stopwords_de.js',
+    es: './stopwords_es.js',
+};
+
 //
 // Based on javascript implementation https://github.com/awaisathar/lda.js
 // Original code based on http://www.arbylon.net/projects/LdaGibbsSampler.java
@@ -22,7 +28,11 @@ var process = function(sentences, numberOfTopics, numberOfTermsPerTopic, languag
       var stopwords = new Array();
 
       languages.forEach(function(value) {
-          var stopwordsLang = require('./stopwords_' + value + ".js");
+          var path = STOP_WORDS_MAP[value];
+          if (!path) {
+              return;
+          }
+          var stopwordsLang = require(path);
           stopwords = stopwords.concat(stopwordsLang.stop_words);
       });
 
@@ -99,6 +109,11 @@ var process = function(sentences, numberOfTopics, numberOfTermsPerTopic, languag
     return result;
 }
 
+process.registerStopwords = function(language, path) {
+    STOP_WORDS_MAP[language] = path;
+    return this;
+};
+
 function makeArray(x) {
     var a = new Array();    
     for (var i=0;i<x;i++)  {
diff --git a/package.json b/package.json
index 4218389..c97b6a1 100644
--- a/package.json
+++ b/package.json
@@ -13,7 +13,7 @@
   },
   "main": "./lib",
   "dependencies": {
-  "stem-porter": "*"
+    "stem-porter": "*"
   },
   "engines": {
     "node": ">= 0.8.x"
diff --git a/test4.js b/test4.js
new file mode 100644
index 0000000..150fc59
--- /dev/null
+++ b/test4.js
@@ -0,0 +1,77 @@
+const lda = require('./lib/lda');
+const path = require('path');
+
+lda.registerStopwords('en_override', path.resolve(__dirname, './lib/stopwords_en.js'));
+
+const collection = [
+  [
+    'Ruby slippers are pretty and fun.',
+    'Long walks in the park are fun.',
+    '',
+    'Slippers are soft on your feet.'
+  ],
+  [
+    'Ruby slippers are pretty and fun.',
+    'Long walks in the park are fun.',
+    null,
+    'Slippers are soft on your feet.'
+  ],
+  [
+    '',
+    'Ruby slippers are pretty and fun.',
+    'Long walks in the park are fun.',
+    'Slippers are soft on your feet.'
+  ],
+  [
+    null,
+    'Ruby slippers are pretty and fun.',
+    'Long walks in the park are fun.',
+    'Slippers are soft on your feet.'
+  ],
+  [
+    'Ruby slippers are pretty and fun.',
+    'Long walks in the park are fun.',
+    'Slippers are soft on your feet.',
+    ''
+  ],
+  [
+    'Ruby slippers are pretty and fun.',
+    'Long walks in the park are fun.',
+    'Slippers are soft on your feet.',
+    null
+  ]
+];
+
+var probabilities = [];
+
+collection.forEach((documents, i) => {
+  const results = lda(documents, 3, 2, ['en_override'], null, null, 123);
+
+  // Save the probabilities for each group. The values should be the same, since we're using the same random seed.
+  const groupProbs = [];
+  results.forEach(group => {
+    group.forEach(row => {
+      groupProbs.push(row.probability);
+    });
+  });
+
+  // Store the entire group in an array.
+  probabilities.push(groupProbs);
+
+  //console.log('\nSet ' + (i + 1));
+  //console.log(results);
+});
+
+var success = true;
+
+// Verify the probabilities for each group are the same, even with empty and null values in the docs.
+probabilities.forEach((group, i) => {
+  if (group[0] !== 0.15 || group[1] !== 0.14 || group[2] !== 0.16 || group[3] !== 0.15 || group[4] !== 0.16 || group[5] !== 0.14) {
+    console.log('Failed expected values for group ' + i);
+    success = false;
+  }
+});
+
+if (success) {
+  console.log('\nResult OK.');
+}
\ No newline at end of file

From b85cd49536bafdde23e67c3f968b37903ea11894 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Ma=C4=8Duda?= <lukas.macuda@gmail.com>
Date: Fri, 7 Dec 2018 09:53:56 +0100
Subject: [PATCH 2/7] update of the readme

---
 Readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Readme.md b/Readme.md
index 9c4538b..1a3f214 100644
--- a/Readme.md
+++ b/Readme.md
@@ -95,7 +95,7 @@ result = lda(documents, 2, 5, ['de']);
 result = lda(documents, 2, 5, ['en', 'de']);
 ```
 
-To add a new language-specific stop-words list, register a file to the specific language. For example, a French stop-words register the language `lda.registerStopWords('fr', '/path/to/the/french/stopwords.js')`. The contents of the file should follow the format of an [existing](https://github.com/primaryobjects/lda/blob/master/lib/stopwords_en.js) stop-words list. The format is, as follows:
+To add a new language-specific stop-words list, register a file for the specific language. For example, a French stop-words register the language `lda.registerStopWords('fr', '/path/to/the/french/stopwords.js')`. The contents of the file should follow the format of an [existing](https://github.com/primaryobjects/lda/blob/master/lib/stopwords_en.js) stop-words list. The format is, as follows:
 
 ```javascript
 exports.stop_words = [

From a22a8f298f7f0663da163c4df12af4e8f1bdda54 Mon Sep 17 00:00:00 2001
From: Kory Becker <kbecker@primaryobjects.com>
Date: Mon, 15 Jul 2019 15:12:45 -0400
Subject: [PATCH 3/7] Updated documentation for languages.

---
 Readme.md    | 14 ++++++---
 package.json |  2 +-
 test5.js     | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 96 insertions(+), 5 deletions(-)
 create mode 100644 test5.js

diff --git a/Readme.md b/Readme.md
index 1a3f214..b3ac774 100644
--- a/Readme.md
+++ b/Readme.md
@@ -69,20 +69,20 @@ var result = lda(documents, 2, 5);
 for (var i in result) {
 	var row = result[i];
 	console.log('Topic ' + (parseInt(i) + 1));
-	
+
 	// For each term.
 	for (var j in row) {
 		var term = row[j];
 		console.log(term.term + ' (' + term.probability + '%)');
 	}
-	
+
 	console.log('');
 }
 ```
 
 ## Additional Languages
 
-LDA uses [stop-words](https://en.wikipedia.org/wiki/Stop_words) to ignore common terms in the text (for example: this, that, it, we). By default, the stop-words list uses English. To use additional languages, you can specify an array of language ids, as follows: 
+LDA uses [stop-words](https://en.wikipedia.org/wiki/Stop_words) to ignore common terms in the text (for example: this, that, it, we). By default, the stop-words list uses English. To use additional languages, you can specify an array of language ids, as follows:
 
 ```javascript
 // Use English (this is the default).
@@ -95,7 +95,13 @@ result = lda(documents, 2, 5, ['de']);
 result = lda(documents, 2, 5, ['en', 'de']);
 ```
 
-To add a new language-specific stop-words list, register a file for the specific language. For example, a French stop-words register the language `lda.registerStopWords('fr', '/path/to/the/french/stopwords.js')`. The contents of the file should follow the format of an [existing](https://github.com/primaryobjects/lda/blob/master/lib/stopwords_en.js) stop-words list. The format is, as follows:
+To add a new language-specific stop-words list, register a file for the specific language. For example, to register a French stop-words list use the following code.
+
+```js
+lda.registerStopWords('fr', '/path/to/the/french/stopwords.js')
+```
+
+The contents of the file should follow the format of an [existing](https://github.com/primaryobjects/lda/blob/master/lib/stopwords_en.js) stop-words list. The format is shown below.
 
 ```javascript
 exports.stop_words = [
diff --git a/package.json b/package.json
index c97b6a1..07ca2e6 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "lda",
-  "version": "0.2.0",
+  "version": "0.3.0",
   "description": "LDA topic modeling for node.js.",
   "author": {
     "name": "Kory Becker",
diff --git a/test5.js b/test5.js
new file mode 100644
index 0000000..76a97ed
--- /dev/null
+++ b/test5.js
@@ -0,0 +1,85 @@
+var lda = require('./lib/lda');
+
+var text = 'Hola, tu estas muy ocupada hoy? Esta bonita afuera, pero hace un poco de calor hoy. Tu tienes algo? Tu quieres ir a el banco? Tu puedes comprar algo aqui con tu dinero.';
+var documents = text.match( /[^\.!\?]+[\.!\?]+/g );
+
+var result_en = lda(documents, 2, 5, ['en'], null, null, 123);
+var result_es = lda(documents, 2, 5, ['es'], null, null, 123);
+var result_multi = lda(documents, 2, 5, ['invalid1', 'en', 'es', 'invalid2'], null, null, 123);
+
+var findTerm = function(term, topics) {
+  for (var i in topics) {
+    var row = topics[i];
+    console.log('Topic ' + (parseInt(i) + 1));
+
+    // For each term.
+    for (var j in row) {
+      var aterm = row[j];
+      console.log(aterm.term + ' (' + aterm.probability + '%)');
+
+      if (aterm.term === term) {
+        console.log('*** Found ' + term);
+        return term;
+      }
+    }
+
+    console.log('');
+  }
+
+  return null;
+};
+
+// For each topic.
+var success = true;
+var target_term = 'tu'; // Stop-words term that should be removed when using the designated stop-words list (i.e., spanish).
+
+// Look for the stop-word in the resulting topics using English and Spanish. The term should exist in English, but not in Spanish.
+console.log('Using English stop-words.');
+var result1 = findTerm(target_term, result_en);
+if (!result1) {
+  console.log('\nFailed English stop-words check. Failed to find expected stop-word: "' + target_term + '" as a topic.')
+  success = false;
+}
+
+console.log('\nUsing Spanish stop-words.');
+var result2 = findTerm(target_term, result_es)
+if (result2) {
+  console.log('\nFailed Spanish stop-words check. Found stop-word: "' + target_term + '" as a topic, when it should have been removed.')
+  success = false;
+}
+
+console.log('\nUsing English and Spanish stop-words.');
+var result3 = findTerm(target_term, result_multi);
+if (result3) {
+  console.log('\nFailed English, Spanish, invalid stop-words check. Found stop-word: "' + target_term + '" as a topic, when it should have been removed.')
+  success = false;
+}
+
+// Confirm the probabilities are equal when using the Spanish stop-words list and a list containing Spanish and invalid stop-word paths.
+const groupProbs1 = [];
+result_es.forEach(group => {
+  group.forEach(row => {
+    groupProbs1.push(row.probability);
+  });
+});
+
+const groupProbs2 = [];
+result_multi.forEach(group => {
+  group.forEach(row => {
+    groupProbs2.push(row.probability);
+  });
+});
+
+for (var i=0; i<groupProbs1.length; i++) {
+  if (groupProbs1[i] !== groupProbs2[i]) {
+    console.log('\nFailed probability check for Spanish stop-words multi list.')
+    console.log(groupProbs1[i]);
+    console.log(groupProbs2[i]);
+    success = false;
+    break;
+  }
+}
+
+if (success) {
+  console.log('\nResult OK.');
+}

From 6d13df93507e95398721f53c145888393b3165cf Mon Sep 17 00:00:00 2001
From: Kory Becker <kbecker@primaryobjects.com>
Date: Mon, 15 Jul 2019 15:20:29 -0400
Subject: [PATCH 4/7] Cleanup.

---
 test5.js | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/test5.js b/test5.js
index 76a97ed..30a0aaf 100644
--- a/test5.js
+++ b/test5.js
@@ -38,21 +38,21 @@ console.log('Using English stop-words.');
 var result1 = findTerm(target_term, result_en);
 if (!result1) {
   console.log('\nFailed English stop-words check. Failed to find expected stop-word: "' + target_term + '" as a topic.')
-  success = false;
+  return;
 }
 
 console.log('\nUsing Spanish stop-words.');
 var result2 = findTerm(target_term, result_es)
 if (result2) {
   console.log('\nFailed Spanish stop-words check. Found stop-word: "' + target_term + '" as a topic, when it should have been removed.')
-  success = false;
+  return;
 }
 
 console.log('\nUsing English and Spanish stop-words.');
 var result3 = findTerm(target_term, result_multi);
 if (result3) {
   console.log('\nFailed English, Spanish, invalid stop-words check. Found stop-word: "' + target_term + '" as a topic, when it should have been removed.')
-  success = false;
+  return;
 }
 
 // Confirm the probabilities are equal when using the Spanish stop-words list and a list containing Spanish and invalid stop-word paths.
@@ -75,8 +75,7 @@ for (var i=0; i<groupProbs1.length; i++) {
     console.log('\nFailed probability check for Spanish stop-words multi list.')
     console.log(groupProbs1[i]);
     console.log(groupProbs2[i]);
-    success = false;
-    break;
+    return;
   }
 }
 

From 8d6a4beb1dfc5905bd977973bf440ef38557a84b Mon Sep 17 00:00:00 2001
From: Kory Becker <kbecker@primaryobjects.com>
Date: Mon, 15 Jul 2019 15:23:13 -0400
Subject: [PATCH 5/7] Wording.

---
 test5.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test5.js b/test5.js
index 30a0aaf..c533393 100644
--- a/test5.js
+++ b/test5.js
@@ -51,7 +51,7 @@ if (result2) {
 console.log('\nUsing English and Spanish stop-words.');
 var result3 = findTerm(target_term, result_multi);
 if (result3) {
-  console.log('\nFailed English, Spanish, invalid stop-words check. Found stop-word: "' + target_term + '" as a topic, when it should have been removed.')
+  console.log('\nFailed Multiple stop-words check. Found stop-word: "' + target_term + '" as a topic, when it should have been removed.')
   return;
 }
 

From 6ea16fb15a88ad84fda179b5f052f49adb609222 Mon Sep 17 00:00:00 2001
From: Kory Becker <kbecker@primaryobjects.com>
Date: Mon, 15 Jul 2019 21:33:23 -0400
Subject: [PATCH 6/7] Added backwards compatibility for languages already
 existing with filename stopwords_xx.js.

---
 Readme.md  |   2 +-
 lib/lda.js |  62 ++++++++++++++++++--------------
 test5.js   |  14 ++++----
 test6.js   | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 147 insertions(+), 34 deletions(-)
 create mode 100644 test6.js

diff --git a/Readme.md b/Readme.md
index b3ac774..72ca6ba 100644
--- a/Readme.md
+++ b/Readme.md
@@ -98,7 +98,7 @@ result = lda(documents, 2, 5, ['en', 'de']);
 To add a new language-specific stop-words list, register a file for the specific language. For example, to register a French stop-words list use the following code.
 
 ```js
-lda.registerStopWords('fr', '/path/to/the/french/stopwords.js')
+lda.registerStopwords('fr', '/path/to/the/french/stopwords.js')
 ```
 
 The contents of the file should follow the format of an [existing](https://github.com/primaryobjects/lda/blob/master/lib/stopwords_en.js) stop-words list. The format is shown below.
diff --git a/lib/lda.js b/lib/lda.js
index 4d795ab..4f19228 100644
--- a/lib/lda.js
+++ b/lib/lda.js
@@ -28,11 +28,21 @@ var process = function(sentences, numberOfTopics, numberOfTermsPerTopic, languag
       var stopwords = new Array();
 
       languages.forEach(function(value) {
+          var stopwordsLang;
+
           var path = STOP_WORDS_MAP[value];
           if (!path) {
-              return;
+              // Try loading the file directly.
+              try {
+                  stopwordsLang = require('./stopwords_' + value + ".js");
+              }
+              catch {
+                  console.log('Warning: Ignoring invalid stop-word list "' + value + '". Please register your stop-words file using: lda.registerStopwords(\'' + value + '\', \'/path/to/stopwords_' + value + '.js\')');
+                  return;
+              }
           }
-          var stopwordsLang = require(path);
+
+          stopwordsLang = stopwordsLang || require(path);
           stopwords = stopwords.concat(stopwordsLang.stop_words);
       });
 
@@ -47,15 +57,15 @@ var process = function(sentences, numberOfTopics, numberOfTermsPerTopic, languag
               var w=words[wc].toLowerCase().replace(/[^a-z\'A-Z0-9\u00C0-\u00ff ]+/g, '');
               var wStemmed = stem(w);
               if (w=="" || !wStemmed || w.length==1 || stopwords.indexOf(w.replace("'", "")) > -1 || stopwords.indexOf(wStemmed) > -1 || w.indexOf("http")==0) continue;
-              if (f[wStemmed]) { 
+              if (f[wStemmed]) {
                   f[wStemmed]=f[wStemmed]+1;
-              } 
-              else if(wStemmed) { 
-                  f[wStemmed]=1; 
+              }
+              else if(wStemmed) {
+                  f[wStemmed]=1;
                   vocab.push(wStemmed);
                   vocabOrig[wStemmed] = w;
               };
-              
+
               documents[i].push(vocab.indexOf(wStemmed));
           }
       }
@@ -88,14 +98,14 @@ var process = function(sentences, numberOfTopics, numberOfTermsPerTopic, languag
 
           //console.log('Topic ' + (k + 1));
           var row = [];
-          
+
           for (var t = 0; t < topTerms; t++) {
               var topicTerm=things[t].split("_")[2];
               var prob=parseInt(things[t].split("_")[0]*100);
               if (prob<2) continue;
-              
+
               //console.log('Top Term: ' + topicTerm + ' (' + prob + '%)');
-              
+
               var term = {};
               term.term = topicTerm;
               term.probability = parseFloat(things[t].split("_")[0]);
@@ -105,7 +115,7 @@ var process = function(sentences, numberOfTopics, numberOfTermsPerTopic, languag
           result.push(row);
       }
     }
-    
+
     return result;
 }
 
@@ -115,7 +125,7 @@ process.registerStopwords = function(language, path) {
 };
 
 function makeArray(x) {
-    var a = new Array();    
+    var a = new Array();
     for (var i=0;i<x;i++)  {
         a[i]=0;
     }
@@ -123,7 +133,7 @@ function makeArray(x) {
 }
 
 function make2DArray(x,y) {
-    var a = new Array();    
+    var a = new Array();
     for (var i=0;i<x;i++)  {
         a[i]=new Array();
         for (var j=0;j<y;j++)
@@ -133,7 +143,7 @@ function make2DArray(x,y) {
 }
 
 var lda = new function() {
-    var documents,z,nw,nd,nwsum,ndsum,thetasum,phisum,V,K,alpha,beta; 
+    var documents,z,nw,nd,nwsum,ndsum,thetasum,phisum,V,K,alpha,beta;
     var THIN_INTERVAL = 20;
     var BURN_IN = 100;
     var ITERATIONS = 1000;
@@ -150,21 +160,21 @@ var lda = new function() {
         this.documents = docs;
         this.V = v;
         this.dispcol=0;
-        this.numstats=0; 
+        this.numstats=0;
     }
     this.initialState = function (K) {
         var i;
         var M = this.documents.length;
-        this.nw = make2DArray(this.V,K); 
-        this.nd = make2DArray(M,K); 
-        this.nwsum = makeArray(K); 
+        this.nw = make2DArray(this.V,K);
+        this.nd = make2DArray(M,K);
+        this.nwsum = makeArray(K);
         this.ndsum = makeArray(M);
         this.z = new Array();   for (i=0;i<M;i++) this.z[i]=new Array();
         for (var m = 0; m < M; m++) {
                 var N = this.documents[m].length;
                 this.z[m] = new Array();
                 for (var n = 0; n < N; n++) {
-                    var topic = parseInt(""+(this.getRandom() * K));                 
+                    var topic = parseInt(""+(this.getRandom() * K));
                     this.z[m][n] = topic;
                     this.nw[this.documents[m][n]][topic]++;
                     this.nd[m][topic]++;
@@ -173,7 +183,7 @@ var lda = new function() {
                 this.ndsum[m] = N;
         }
     }
-    
+
     this.gibbs = function (K,alpha,beta) {
         var i;
         this.K = K;
@@ -205,17 +215,17 @@ var lda = new function() {
             }
             if ((i > this.BURN_IN) && (this.SAMPLE_LAG > 0) && (i % this.SAMPLE_LAG == 0)) {
                 this.updateParams();
-                //document.write("|");                
+                //document.write("|");
                 if (i % this.THIN_INTERVAL != 0)
                     this.dispcol++;
             }
             if (this.dispcol >= 100) {
-                //document.write("*<br/>");                
+                //document.write("*<br/>");
                 this.dispcol = 0;
             }
         }
     }
-    
+
     this.sampleFullConditional = function(m,n) {
         var topic = this.z[m][n];
         this.nw[this.documents[m][n]][topic]--;
@@ -241,7 +251,7 @@ var lda = new function() {
         this.ndsum[m]++;
         return topic;
     }
-    
+
     this.updateParams =function () {
         for (var m = 0; m < this.documents.length; m++) {
             for (var k = 0; k < this.K; k++) {
@@ -255,7 +265,7 @@ var lda = new function() {
         }
         this.numstats++;
     }
-    
+
     this.getTheta = function() {
         var theta = new Array(); for(var i=0;i<this.documents.length;i++) theta[i] = new Array();
         if (this.SAMPLE_LAG > 0) {
@@ -273,7 +283,7 @@ var lda = new function() {
         }
         return theta;
     }
-    
+
     this.getPhi = function () {
         var phi = new Array(); for(var i=0;i<this.K;i++) phi[i] = new Array();
         if (this.SAMPLE_LAG > 0) {
diff --git a/test5.js b/test5.js
index c533393..f91cc36 100644
--- a/test5.js
+++ b/test5.js
@@ -10,15 +10,15 @@ var result_multi = lda(documents, 2, 5, ['invalid1', 'en', 'es', 'invalid2'], nu
 var findTerm = function(term, topics) {
   for (var i in topics) {
     var row = topics[i];
-    console.log('Topic ' + (parseInt(i) + 1));
+    //console.log('Topic ' + (parseInt(i) + 1));
 
     // For each term.
     for (var j in row) {
       var aterm = row[j];
-      console.log(aterm.term + ' (' + aterm.probability + '%)');
+      //console.log(aterm.term + ' (' + aterm.probability + '%)');
 
       if (aterm.term === term) {
-        console.log('*** Found ' + term);
+        console.log('Found "' + term + '"');
         return term;
       }
     }
@@ -37,21 +37,21 @@ var target_term = 'tu'; // Stop-words term that should be removed when using the
 console.log('Using English stop-words.');
 var result1 = findTerm(target_term, result_en);
 if (!result1) {
-  console.log('\nFailed English stop-words check. Failed to find expected stop-word: "' + target_term + '" as a topic.')
+  console.log('\nFailed English stop-words check! Failed to find expected stop-word: "' + target_term + '" as a topic.')
   return;
 }
 
 console.log('\nUsing Spanish stop-words.');
 var result2 = findTerm(target_term, result_es)
 if (result2) {
-  console.log('\nFailed Spanish stop-words check. Found stop-word: "' + target_term + '" as a topic, when it should have been removed.')
+  console.log('\nFailed Spanish stop-words check! Found stop-word: "' + target_term + '" as a topic, when it should have been removed.')
   return;
 }
 
 console.log('\nUsing English and Spanish stop-words.');
 var result3 = findTerm(target_term, result_multi);
 if (result3) {
-  console.log('\nFailed Multiple stop-words check. Found stop-word: "' + target_term + '" as a topic, when it should have been removed.')
+  console.log('\nFailed Multiple stop-words check! Found stop-word: "' + target_term + '" as a topic, when it should have been removed.')
   return;
 }
 
@@ -72,7 +72,7 @@ result_multi.forEach(group => {
 
 for (var i=0; i<groupProbs1.length; i++) {
   if (groupProbs1[i] !== groupProbs2[i]) {
-    console.log('\nFailed probability check for Spanish stop-words multi list.')
+    console.log('\nFailed probability check for Spanish stop-words multi list!')
     console.log(groupProbs1[i]);
     console.log(groupProbs2[i]);
     return;
diff --git a/test6.js b/test6.js
new file mode 100644
index 0000000..533255c
--- /dev/null
+++ b/test6.js
@@ -0,0 +1,103 @@
+var lda = require('./lib/lda');
+const path = require('path');
+const fs = require('fs');
+
+const filePath = './custom_lang.js';
+
+// Create a custom stop-words file.
+fs.writeFileSync(filePath, 'exports.stop_words = ["ignore"];');
+
+var findTerm = function(term, topics) {
+  for (var i in topics) {
+    var row = topics[i];
+
+    // For each term.
+    for (var j in row) {
+      var aterm = row[j];
+      if (aterm.term === term) {
+        console.log('Found "' + term + '"');
+        return term;
+      }
+    }
+
+    console.log('');
+  }
+
+  return null;
+};
+
+var text = 'Ignore the stop words to ignore within this ignore text and test the ignore feature.';
+var target_term = 'ignore'; // Stop-words term that should be removed when using the designated stop-words list.
+var documents = text.match( /[^\.!\?]+[\.!\?]+/g );
+var results;
+var success1 = false;
+var success2 = false;
+var success3 = false;
+
+console.log('Test 1: Run lda with the default stop-words list. Ignore warning.');
+
+results = lda(documents, 2, 5, ['custom_lang'], null, null, 123);
+
+// Look for the stop-word in the resulting topics.
+result = findTerm(target_term, results);
+if (result) {
+  success1 = true;
+}
+else {
+  console.log('\nFailed Test 1 stop-words check! Error finding stop-word "' + target_term + '" as a topic.')
+  fs.unlinkSync(filePath);
+  return;
+}
+
+console.log('\nTest 2: Run lda with a default stop-words list copied into the lib folder.');
+
+// Copy the language file to a default file in the lib folder.
+const copyPath = './lib/stopwords_' + filePath.replace('./', '');
+fs.copyFileSync(filePath, copyPath);
+
+results = lda(documents, 2, 5, ['custom_lang'], null, null, 123);
+
+// Look for the stop-word in the resulting topics. It should not be found.
+var result = findTerm(target_term, results);
+if (result) {
+  console.log('\nFailed Test 2 stop-words check! Found stop-word "' + target_term + '" as a topic.')
+  fs.unlinkSync(copyPath);
+  fs.unlinkSync(filePath);
+  return;
+}
+else {
+  success2 = true;
+}
+
+// Cleanup.
+fs.unlinkSync(copyPath);
+
+console.log('\nTest 3: Register the custom stop-words list.');
+lda.registerStopwords('custom_lang', path.resolve(__dirname, filePath));
+results = lda(documents, 2, 5, ['custom_lang'], null, null, 123);
+
+// Look for the stop-word in the resulting topics. It should not be found.
+result = findTerm(target_term, results);
+if (result) {
+  console.log('\nFailed Test 3 stop-words check! Found stop-word "' + target_term + '" as a topic.')
+  fs.unlinkSync(filePath);
+  return;
+}
+else {
+  success3 = true;
+}
+
+// Cleanup.
+fs.unlinkSync(filePath);
+
+if (success1) {
+  console.log('Result 1 OK.');
+}
+
+if (success2) {
+  console.log('Result 2 OK.');
+}
+
+if (success3) {
+  console.log('Result 3 OK.');
+}

From 821cc146d041e83dc0e6bc7db17c2a9fd936d5c5 Mon Sep 17 00:00:00 2001
From: Kory Becker <kbecker@primaryobjects.com>
Date: Mon, 15 Jul 2019 21:35:27 -0400
Subject: [PATCH 7/7] Added context.

---
 test6.js | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test6.js b/test6.js
index 533255c..4e686c6 100644
--- a/test6.js
+++ b/test6.js
@@ -34,7 +34,7 @@ var success1 = false;
 var success2 = false;
 var success3 = false;
 
-console.log('Test 1: Run lda with the default stop-words list. Ignore warning.');
+console.log('Test 1: Run lda without a custom stop-words list. Ignore warning.');
 
 results = lda(documents, 2, 5, ['custom_lang'], null, null, 123);
 
@@ -49,7 +49,7 @@ else {
   return;
 }
 
-console.log('\nTest 2: Run lda with a default stop-words list copied into the lib folder.');
+console.log('\nTest 2: Run lda with a custom stop-words list copied into the lib folder.');
 
 // Copy the language file to a default file in the lib folder.
 const copyPath = './lib/stopwords_' + filePath.replace('./', '');
@@ -72,7 +72,7 @@ else {
 // Cleanup.
 fs.unlinkSync(copyPath);
 
-console.log('\nTest 3: Register the custom stop-words list.');
+console.log('\nTest 3: Register a custom stop-words list programmatically.');
 lda.registerStopwords('custom_lang', path.resolve(__dirname, filePath));
 results = lda(documents, 2, 5, ['custom_lang'], null, null, 123);