diff --git a/pom.xml b/pom.xml index 8169ff7..b63f333 100644 --- a/pom.xml +++ b/pom.xml @@ -12,6 +12,10 @@ UTF-8 + + 7 + 7 + @@ -26,5 +30,15 @@ 4.7 test + + org.slf4j + slf4j-api + 1.7.5 + + + org.slf4j + slf4j-log4j12 + 1.7.5 + diff --git a/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java new file mode 100644 index 0000000..7f5fb3f --- /dev/null +++ b/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -0,0 +1,95 @@ +package nearsoft.academy.bigdata.recommendation; + +import com.google.common.collect.BiMap; +import com.google.common.collect.HashBiMap; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +import java.io.*; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.GZIPInputStream; + +public class MovieRecommender { + + private static final String CSV_FILE_PATH = "reviews.csv"; + private long totalUsers=0, totalProducts=0, totalReviews=0; + public BiMap hashUsers = HashBiMap.create(); + public BiMap hashProducts = HashBiMap.create(); + private String filePath="", score="", reviewId="", userId=""; + private File newCSV; + + public MovieRecommender(String resourceFile) throws IOException, TasteException { + this.filePath = resourceFile; + getTotals(filePath); + } + + public long getTotalProducts() { + return totalProducts; + } + + public long getTotalReviews() { + return totalReviews; + } + + public long getTotalUsers() { + return totalUsers; + } + + public void getTotals(String resourceFile) throws IOException, TasteException{ + + BufferedReader in = new BufferedReader(new InputStreamReader( + new GZIPInputStream(new FileInputStream(resourceFile)))); + + newCSV = new File(CSV_FILE_PATH); + Files.deleteIfExists(newCSV.toPath()); + FileWriter fileWriter = new FileWriter(CSV_FILE_PATH); + PrintWriter printWriter = new PrintWriter(fileWriter); + String actualLine; + while ((actualLine = in.readLine()) != null){ + if(actualLine.contains("review/userId")){ + userId=actualLine.substring(15); + if(!hashUsers.containsKey(userId)){ + totalUsers++; + hashUsers.put(userId, totalUsers); + } + } + if(actualLine.contains("product/productId:")){ + totalReviews++; + reviewId=actualLine.substring(19); + if(!hashProducts.containsKey(reviewId)){ + hashProducts.put(reviewId, totalProducts); + totalProducts++; + } + } + if (actualLine.contains("review/score:")) { + score = actualLine.substring(14); + printWriter.printf(hashUsers.get(userId) + "," +hashProducts.get(reviewId)+ "," + score+"\n"); + } + } + printWriter.close(); + } + + public List getRecommendationsForUser(String userId) throws IOException, TasteException { + List recommendationsUser = new ArrayList<>(); + DataModel model = new FileDataModel(newCSV); + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + List recommendations = recommender.recommend(hashUsers.get(userId), 3); + for (RecommendedItem recommendation : recommendations) { + recommendationsUser.add(hashProducts.inverse().get(recommendation.getItemID())); + } + return recommendationsUser; + } + +} diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..62b9791 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -11,14 +11,13 @@ import static org.junit.matchers.JUnitMatchers.hasItem; public class MovieRecommenderTest { + @Test public void testDataInfo() throws IOException, TasteException { - //download movies.txt.gz from - // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); - assertEquals(7911684, recommender.getTotalReviews()); - assertEquals(253059, recommender.getTotalProducts()); - assertEquals(889176, recommender.getTotalUsers()); + MovieRecommender recommender = new MovieRecommender("/home/fernanda/Documentos/Nearsoft/week3/movies.txt.gz"); + assertEquals(7911684, recommender.getTotalReviews()); + assertEquals(253059, recommender.getTotalProducts()); + assertEquals(889176, recommender.getTotalUsers()); List recommendations = recommender.getRecommendationsForUser("A141HP4LYPWMSR"); assertThat(recommendations, hasItem("B0002O7Y8U")); @@ -26,5 +25,4 @@ public void testDataInfo() throws IOException, TasteException { assertThat(recommendations, hasItem("B000063W82")); } - }