apanimesh061
diff --git a/‎plugin.iml
Lines changed: 4 additions & 4 deletions b/‎plugin.iml
Lines changed: 4 additions & 4 deletions
diff --git a/‎pom.xml
Lines changed: 2 additions & 2 deletions b/‎pom.xml
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/main/checkstyle/checkstyle.xml
Lines changed: 5 additions & 7 deletions b/‎src/main/checkstyle/checkstyle.xml
Lines changed: 5 additions & 7 deletions
diff --git a/‎src/main/java/com/vader/sentiment/analyzer/SentimentAnalyzer.java
Lines changed: 172 additions & 157 deletions b/‎src/main/java/com/vader/sentiment/analyzer/SentimentAnalyzer.java
Lines changed: 172 additions & 157 deletions
diff --git a/‎src/main/java/com/vader/sentiment/processor/InputAnalyzer.java
Lines changed: 14 additions & 16 deletions b/‎src/main/java/com/vader/sentiment/processor/InputAnalyzer.java
Lines changed: 14 additions & 16 deletions
diff --git a/‎src/main/java/com/vader/sentiment/processor/InputAnalyzerInterface.java
Lines changed: 10 additions & 6 deletions b/‎src/main/java/com/vader/sentiment/processor/InputAnalyzerInterface.java
Lines changed: 10 additions & 6 deletions
diff --git a/‎src/main/java/com/vader/sentiment/processor/TextProperties.java
Lines changed: 66 additions & 55 deletions b/‎src/main/java/com/vader/sentiment/processor/TextProperties.java
Lines changed: 66 additions & 55 deletions
@@ -5,7 +5,7 @@
       <configuration sdkName="Python 2.7.11 (E:\Miniconda2\python.exe)" />
     </facet>
   </component>
-  <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8" inherit-compiler-output="false">
+  <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
     <output url="file://$MODULE_DIR$/target/classes" />
     <output-test url="file://$MODULE_DIR$/target/test-classes" />
     <content url="file://$MODULE_DIR$">
@@ -19,10 +19,10 @@
     <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />
     <orderEntry type="library" name="Python 2.7.11 (E:\Miniconda2\python.exe) interpreter library" level="application" />
-    <orderEntry type="library" name="Maven: org.apache.lucene:lucene-analyzers-common:6.4.1" level="project" />
-    <orderEntry type="library" name="Maven: org.apache.lucene:lucene-core:6.4.1" level="project" />
+    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.lucene:lucene-analyzers-common:6.4.1" level="project" />
+    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.lucene:lucene-core:6.4.1" level="project" />
     <orderEntry type="library" name="Maven: commons-lang:commons-lang:2.6" level="project" />
-    <orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
+    <orderEntry type="library" scope="PROVIDED" name="Maven: log4j:log4j:1.2.17" level="project" />
     <orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.12" level="project" />
     <orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" />
   </component>
 
@@ -153,7 +153,7 @@
             <groupId>org.apache.lucene</groupId>
             <artifactId>lucene-analyzers-common</artifactId>
             <version>6.4.1</version>
-            <!--<scope>provided</scope>-->
+            <scope>provided</scope>
         </dependency>
         <dependency>
             <groupId>commons-lang</groupId>
@@ -164,7 +164,7 @@
             <groupId>log4j</groupId>
             <artifactId>log4j</artifactId>
             <version>1.2.17</version>
-            <!--<scope>provided</scope>-->
+            <scope>provided</scope>
         </dependency>
         <dependency>
             <groupId>junit</groupId>
 
@@ -32,14 +32,11 @@
         <property name="acceptOnMatch" value="false"/>
     </module>
     <module name="SuppressionCommentFilter">
-        <!--
-          Use suppressions.xml for suppressions, this is only example.
-          checkFormat will prevent suppression comments from being valid.
-        -->
-        <property name="checkFormat" value="IGNORETHIS"/>
-        <property name="offCommentFormat" value="CSOFF\: .*"/>
-        <property name="onCommentFormat" value="CSON\: .*"/>
+        <property name="offCommentFormat" value="CHECKSTYLE.OFF\: ([\w\|]+)"/>
+        <property name="onCommentFormat" value="CHECKSTYLE.ON\: ([\w\|]+)"/>
+        <property name="checkFormat" value="$1"/>
     </module>
+
     <!--<module name="SuppressionFilter">-->
         <!--<property name="file" value="${checkstyle.suppressions.file}"/>-->
     <!--</module>-->
@@ -261,6 +258,7 @@
         <module name="RequireThis"/>
         <module name="ReturnCount">
             <property name="maxForVoid" value="0"/>
+            <property name="max" value="5"/>
         </module>
         <module name="SimplifyBooleanExpression"/>
         <module name="SimplifyBooleanReturn"/>
 
@@ -30,11 +30,11 @@
 import java.util.List;
 
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.miscellaneous.LengthFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.Tokenizer;
 
 /**
  * This class defines a Lucene analyzer that is applied on the input string in
@@ -46,7 +46,6 @@
 class InputAnalyzer implements InputAnalyzerInterface {
     /**
      * This function applies a Lucene analyzer that splits a string into a tokens.
-     * <p>
      * Here we are using two types of Lucene {@link Tokenizer}s:
      * 1. {@link WhitespaceTokenizer} which tokenizes from the white spaces
      * 2. {@link StandardTokenizer} which tokenizes from white space as well as removed any punctuations
@@ -57,15 +56,20 @@ class InputAnalyzer implements InputAnalyzerInterface {
      * @throws IOException if Lucene's analyzer encounters any error
      */
     private List<String> tokenize(String inputString, boolean removePunctuation) throws IOException {
-        StringReader reader = new StringReader(inputString);
-        Tokenizer currentTokenizer = (removePunctuation) ? new StandardTokenizer() : new WhitespaceTokenizer();
+        final StringReader reader = new StringReader(inputString);
+        final Tokenizer currentTokenizer;
+        if (removePunctuation) {
+            currentTokenizer = new StandardTokenizer();
+        } else {
+            currentTokenizer = new WhitespaceTokenizer();
+        }
         currentTokenizer.setReader(reader);
 
-        TokenStream tokenStream = new LengthFilter(currentTokenizer, 2, Integer.MAX_VALUE);
+        final TokenStream tokenStream = new LengthFilter(currentTokenizer, 2, Integer.MAX_VALUE);
         final CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
         tokenStream.reset();
 
-        ArrayList<String> tokenizedString = new ArrayList<>();
+        final List<String> tokenizedString = new ArrayList<>();
         while (tokenStream.incrementToken()) {
             tokenizedString.add(charTermAttribute.toString());
         }
@@ -77,25 +81,19 @@ private List<String> tokenize(String inputString, boolean removePunctuation) thr
     }
 
     /**
-     * This is {@link InputAnalyzer#tokenize(String, boolean)} with removePunctuation set as false. So, this
-     * method performs tokenization without removing punctuations.
+     * Implementation of {@link InputAnalyzerInterface#defaultSplit(String)}.
      *
-     * @param inputString The input string to be pre-processed with Lucene tokenizer
-     * @return tokens
-     * @throws IOException if Lucene's analyzer encounters any error
+     * {@inheritDoc}
      */
     @Override
     public List<String> defaultSplit(String inputString) throws IOException {
         return tokenize(inputString, false);
     }
 
     /**
-     * This is {@link InputAnalyzer#tokenize(String, boolean)} with removePunctuation set as false. So, this
-     * method performs tokenization without removing punctuations.
+     * Implementation of {@link InputAnalyzerInterface#removePunctuation(String)}.
      *
-     * @param inputString The input string to be pre-processed with Lucene tokenizer
-     * @return tokens
-     * @throws IOException if Lucene's analyzer encounters any error
+     * {@inheritDoc}
      */
     @Override
     public List<String> removePunctuation(String inputString) throws IOException {
 
@@ -33,18 +33,22 @@
  */
 interface InputAnalyzerInterface {
     /**
+     * This is {@link InputAnalyzer#tokenize(String, boolean)} with removePunctuation set as false. So, this
+     * method performs tokenization without removing punctuations.
      *
-     * @param inputString
-     * @return
-     * @throws IOException
+     * @param inputString The input string to be pre-processed with Lucene tokenizer
+     * @return tokens
+     * @throws IOException if Lucene's analyzer encounters any error
      */
     List<String> defaultSplit(String inputString) throws IOException;
 
     /**
+     * This is {@link InputAnalyzer#tokenize(String, boolean)} with removePunctuation set as false. So, this
+     * method performs tokenization without removing punctuations.
      *
-     * @param inputString
-     * @return
-     * @throws IOException
+     * @param inputString The input string to be pre-processed with Lucene tokenizer
+     * @return tokens
+     * @throws IOException if Lucene's analyzer encounters any error
      */
     List<String> removePunctuation(String inputString) throws IOException;
 }
@@ -24,24 +24,52 @@
 
 package com.vader.sentiment.processor;
 
-import com.vader.sentiment.util.Utils;
-
 import java.io.IOException;
 import java.util.Collections;
 import java.util.List;
 
+import com.vader.sentiment.util.Utils;
+
 /**
  * The TextProperties class implements the pre-processing steps of the input string for sentiment analysis.
- * It utilizes the Lucene analyzers
+ * It utilizes the Lucene analyzer to perform processing on the input string.
  *
  * @author Animesh Pandey
  *         Created on 4/10/2016.
  */
-public class TextProperties {
+public final class TextProperties {
+    /**
+     * String whose properties will be extracted.
+     */
     private String inputText;
+
+    /**
+     * List of tokens and emoticons extracted from the {@link TextProperties#inputText}.
+     */
     private List<String> wordsAndEmoticons;
+
+    /**
+     * List of tokens extracted from the {@link TextProperties#inputText}.
+     * Emoticons are removed here.
+     */
     private List<String> wordsOnly;
-    private boolean isCapDIff;
+
+    /**
+     * Flags that specifies if the current string has yelling words.
+     */
+    private boolean isCapDiff;
+
+    /**
+     * Parameterized constructor accepting the input string that will be processed.
+     *
+     * @param inputText the input string
+     * @throws IOException if there is an issue with the lucene analyzers
+     */
+    public TextProperties(String inputText) throws IOException {
+        this.inputText = inputText;
+        setWordsAndEmoticons();
+        setCapDiff(isAllCapDifferential());
+    }
 
     /**
      * This method tokenizes the input string, preserving the punctuation marks using
@@ -52,61 +80,43 @@ public class TextProperties {
     private void setWordsAndEmoticons() throws IOException {
         setWordsOnly();
 
-        List<String> wordsAndEmoticonsList = new InputAnalyzer().defaultSplit(inputText);
+        final List<String> wordsAndEmoticonsList = new InputAnalyzer().defaultSplit(inputText);
         for (String currentWord : wordsOnly) {
             for (String currentPunc : Utils.PUNCTUATION_LIST) {
-                String pWord = currentWord + currentPunc;
-                Integer pWordCount = Collections.frequency(wordsAndEmoticonsList, pWord);
-                while (pWordCount > 0) {
-                    int index = wordsAndEmoticonsList.indexOf(pWord);
-                    wordsAndEmoticonsList.remove(pWord);
+                final String wordPunct = currentWord + currentPunc;
+                Integer wordPunctCount = Collections.frequency(wordsAndEmoticonsList, wordPunct);
+                while (wordPunctCount > 0) {
+                    final int index = wordsAndEmoticonsList.indexOf(wordPunct);
+                    wordsAndEmoticonsList.remove(wordPunct);
                     wordsAndEmoticonsList.add(index, currentWord);
-                    pWordCount = Collections.frequency(wordsAndEmoticonsList, pWord);
+                    wordPunctCount = Collections.frequency(wordsAndEmoticonsList, wordPunct);
                 }
 
-                String wordP = currentPunc + currentWord;
-                Integer wordPCount = Collections.frequency(wordsAndEmoticonsList, wordP);
-                while (wordPCount > 0) {
-                    int index = wordsAndEmoticonsList.indexOf(wordP);
-                    wordsAndEmoticonsList.remove(wordP);
+                final String punctWord = currentPunc + currentWord;
+                Integer punctWordCount = Collections.frequency(wordsAndEmoticonsList, punctWord);
+                while (punctWordCount > 0) {
+                    final int index = wordsAndEmoticonsList.indexOf(punctWord);
+                    wordsAndEmoticonsList.remove(punctWord);
                     wordsAndEmoticonsList.add(index, currentWord);
-                    wordPCount = Collections.frequency(wordsAndEmoticonsList, wordP);
+                    punctWordCount = Collections.frequency(wordsAndEmoticonsList, punctWord);
                 }
             }
         }
         this.wordsAndEmoticons = wordsAndEmoticonsList;
     }
 
     /**
-     * This method tokenizes the input string, removing the special characters as well
+     * This method tokenizes the input string, removing the special characters as well.
      *
-     * @throws IOException
+     * @throws IOException iff there is an error which using Lucene analyzers.
      * @see InputAnalyzer#removePunctuation(String)
      */
     private void setWordsOnly() throws IOException {
         this.wordsOnly = new InputAnalyzer().removePunctuation(inputText);
     }
 
-    private void setCapDiff(boolean capDIff) {
-        isCapDIff = capDIff;
-    }
-
-    /**
-     * @return True iff the input has yelling words i.e. all caps in the tokens, but all the token should not be
-     * in upper case.
-     * e.g. [GET, THE, HELL, OUT] returns false
-     * [GET, the, HELL, OUT] returns true
-     * [get, the, hell, out] returns false
-     */
-    private boolean isAllCapDifferential() {
-        int countAllCaps = 0;
-        for (String s : wordsAndEmoticons) {
-            if (Utils.isUpper(s)) {
-                countAllCaps++;
-            }
-        }
-        int capDifferential = wordsAndEmoticons.size() - countAllCaps;
-        return (0 < capDifferential) && (capDifferential < wordsAndEmoticons.size());
+    private void setCapDiff(boolean capDiff) {
+        this.isCapDiff = capDiff;
     }
 
     public List<String> getWordsAndEmoticons() {
@@ -118,25 +128,26 @@ public List<String> getWordsOnly() {
     }
 
     public boolean isCapDiff() {
-        return isCapDIff;
+        return isCapDiff;
     }
 
     /**
+     * Return true iff the input has yelling words i.e. all caps in the tokens, but all the token should not be
+     * in upper case.
+     * e.g. [GET, THE, HELL, OUT] returns false
+     * [GET, the, HELL, OUT] returns true
+     * [get, the, hell, out] returns false
      *
-     * @param inputText
-     * @throws IOException
+     * @return boolean value
      */
-    public TextProperties(String inputText) throws IOException {
-        this.inputText = inputText;
-        setWordsAndEmoticons();
-        setCapDiff(isAllCapDifferential());
-    }
-
-    public static void main(String[] args) throws IOException {
-        String input = "The plot was good, but the characters are uncompelling and the dialog is not great. :( :(";
-        TextProperties properties = new TextProperties(input);
-        System.out.println(properties.getWordsOnly());
-        System.out.println(properties.getWordsAndEmoticons());
-        System.out.println(properties.isCapDiff());
+    private boolean isAllCapDifferential() {
+        int countAllCaps = 0;
+        for (String token : wordsAndEmoticons) {
+            if (Utils.isUpper(token)) {
+                countAllCaps++;
+            }
+        }
+        final int capDifferential = wordsAndEmoticons.size() - countAllCaps;
+        return (0 < capDifferential) && (capDifferential < wordsAndEmoticons.size());
     }
 }