From b2acd793cc30b9f5e1004d7b77f8026a77dbfa99 Mon Sep 17 00:00:00 2001 From: Andree Amaro Date: Mon, 7 Oct 2019 15:09:56 -0600 Subject: [PATCH 1/3] Adding implementation for movie recommender using the TDD technique including a gitignore file for the idea folder, the cvs resulting file and the gz file used as input --- .gitignore | 6 + pom.xml | 61 ++++++++-- .../recommendation/MovieRecommender.java | 113 ++++++++++++++++++ .../recommendation/MovieRecommenderTest.java | 8 +- 4 files changed, 172 insertions(+), 16 deletions(-) create mode 100644 .gitignore create mode 100644 src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6b8d121 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.idea +.DS_Store +*.csv +target +*.gz + diff --git a/pom.xml b/pom.xml index 8169ff7..1f6e02c 100644 --- a/pom.xml +++ b/pom.xml @@ -15,16 +15,51 @@ - - org.apache.mahout - mahout-core - 0.9 - - - junit - junit - 4.7 - test - - - + + org.apache.mahout + mahout-core + 0.9 + + + + junit + junit + 4.7 + test + + + org.slf4j + slf4j-simple + 1.7.12 + + + org.slf4j + slf4j-jdk14 + 1.5.6 + test + + + + org.slf4j + slf4j-jdk14 + 1.5.6 + test + + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.0 + + 1.8 + 1.8 + + + + + \ No newline at end of file diff --git a/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java new file mode 100644 index 0000000..b715044 --- /dev/null +++ b/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -0,0 +1,113 @@ +package nearsoft.academy.bigdata.recommendation; + + + +import com.google.common.collect.BiMap; +import com.google.common.collect.HashBiMap; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.*; +import java.util.zip.GZIPInputStream; + +public class MovieRecommender { + + private int totalReviews = 0; + private int totalProducts = 0; + private int totalUsers = 0; + private float score = 0.0f; + private BiMap productsHash = HashBiMap.create(); + private BiMap usersHash = HashBiMap.create(); + + public MovieRecommender(String path) throws IOException { + writeFile(path); + } + + private void writeFile(String path) throws IOException { + boolean bandera = true; + Long thisProduct = 0l; + Long thisUser = 0l; + Files.deleteIfExists(Paths.get("Result.csv")); + File result = new File("Result.csv"); + InputStream fileReader = new GZIPInputStream(new FileInputStream(path)); + BufferedReader br = new BufferedReader(new InputStreamReader(fileReader)); + FileWriter fileWriter = new FileWriter(result); + BufferedWriter bw = new BufferedWriter(fileWriter); + String line; + String[] sp; + Long identificadorUsuario = 0l; + Long identificadorProducto = 0l; + String key, value; + while((line = br.readLine()) != null) { + if (line.length() >= 0) { + sp = line.split(" "); + key = sp[0]; + if (key.equals("product/productId:")) { + value = sp[1]; + if (!productsHash.containsKey(value)){ + productsHash.put(value,identificadorProducto++); + thisProduct = productsHash.get(value); + this.totalProducts++; + }else{ + thisProduct = productsHash.get(value); + } + }else if (key.equals("review/userId:")){ + value = sp[1]; + if (!usersHash.containsKey(value)){ + usersHash.put(value, identificadorUsuario++); + this.totalUsers++; + } + thisUser = usersHash.get(value); + }else if (key.equals("review/score:")){ + String score = sp[1]; + bw.write(thisUser + "," + thisProduct + "," + score + "\n"); + this.totalReviews ++; + } + } + } + br.close(); + bw.close(); + } + + public int getTotalReviews() { + return this.totalReviews; + } + + public int getTotalProducts() { + return this.totalProducts; + } + + public int getTotalUsers() { + return this.totalUsers; + } + + public List getRecommendationsForUser(String id) throws IOException, TasteException { + DataModel model = new FileDataModel(new File("result.csv")); + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + + Long userLongId = usersHash.get(id); + + List recommendations = recommender.recommend(userLongId, 3); + List recommendationsAsStr = new ArrayList<>(); + productsHash.inverse(); + + + for (RecommendedItem recommendation: recommendations) { + recommendationsAsStr.add(productsHash.inverse().get(recommendation.getItemID())); + } + return recommendationsAsStr; + } +} diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..5ca1bf1 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -13,9 +13,11 @@ public class MovieRecommenderTest { @Test public void testDataInfo() throws IOException, TasteException { - //download movies.txt.gz from + //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); + String test = System.getProperty("user.dir")+"/src/test/java/nearsoft/academy/bigdata/recommendation/movies.txt.gz"; + System.out.println("test = " + test); + MovieRecommender recommender = new MovieRecommender(test); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers()); @@ -27,4 +29,4 @@ public void testDataInfo() throws IOException, TasteException { } -} +} \ No newline at end of file From a4c7f4769993e50743146237aaaa61c770590032 Mon Sep 17 00:00:00 2001 From: Andree Amaro Date: Wed, 9 Oct 2019 10:02:39 -0600 Subject: [PATCH 2/3] Removing sout for the path to the .gz file --- .../academy/bigdata/recommendation/MovieRecommenderTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 5ca1bf1..8742e40 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -16,7 +16,6 @@ public void testDataInfo() throws IOException, TasteException { //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html String test = System.getProperty("user.dir")+"/src/test/java/nearsoft/academy/bigdata/recommendation/movies.txt.gz"; - System.out.println("test = " + test); MovieRecommender recommender = new MovieRecommender(test); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); From e9d0f8d55098dbfdc440d9405f12d88356fd8aed Mon Sep 17 00:00:00 2001 From: Andree Amaro Date: Wed, 9 Oct 2019 10:09:21 -0600 Subject: [PATCH 3/3] Removing .gitignore file --- .gitignore | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 .gitignore diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 6b8d121..0000000 --- a/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -.idea -.DS_Store -*.csv -target -*.gz -