Skip to content

Optimised clone list creation #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Programs/build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

<target name="test" depends="buildTest">
<mkdir dir="reports"/>
<junit fork="true" printsummary="yes" haltonfailure="yes">
<junit fork="true" printsummary="on" showoutput="yes" haltonfailure="no">
<classpath>
<pathelement location="${bin}"/>
<pathelement location="${junit.path}"/>
Expand Down
1 change: 1 addition & 0 deletions Programs/runner.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
java -ea -XX:+UseParallelGC -cp bin:external/json-simple-1.1.1.jar -Xms6G -Xmx20G notebooks.SccOutputAnalyzer --repro_file=/home/maka4186/notebook_disk/notebook-number_repo.csv --stats_file=/home/maka4186/notebook_disk/SourcererCC_output/files.stats --pair_file=/home/maka4186/notebook_disk/SourcererCC_output/clone.pairs.only.numbers --output_dir=OutputSCC2
56 changes: 34 additions & 22 deletions Programs/src/notebooks/Analyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import java.io.FileNotFoundException;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;
import java.io.*;

public class Analyzer {
protected String outputDir = ".";
Expand All @@ -14,26 +14,38 @@ public class Analyzer {
* @param fileName Name of file with mapping from notebook number to repro
* @return The map from notebook name to repro
*/
protected static Map<String, String> createReproMap(String fileName)
throws FileNotFoundException {
Map<String, String> result = new HashMap<String, String>();
Scanner scanner = new Scanner(new File(fileName));
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
String[] subStrings = line.split(",");
try {
int notebookNumber = Integer.parseInt(subStrings[0]);
String notebookName = "nb_" + notebookNumber + ".ipynb";
String reproName = subStrings[1];
result.put(notebookName, reproName);
} catch (NumberFormatException e) {
System.err.println("Notebook numbers in repro file must be integers! Notebook with \"number\" '"
+ subStrings[0] + "' is excluded from mapping!");
}
}
scanner.close();
return result;
}
protected static Map<String, String> createReproMap(String fileName)
throws FileNotFoundException {
Map<String, String> result = new HashMap<String, String>();
try {
BufferedReader input = new BufferedReader(new FileReader(new File(fileName)));

while (true) {
final String line = input.readLine();
if (line == null) break;

String[] subStrings = line.split(",");
try {
int notebookNumber = Integer.parseInt(subStrings[0]);
StringBuilder sb = new StringBuilder();
sb.append("nb_");
sb.append(notebookNumber);
sb.append(".ipynb");
String notebookName = sb.toString();
String reproName = subStrings[1];
result.put(notebookName, reproName);
} catch (NumberFormatException e) {
System.err.println("Notebook numbers in repro file must be integers! Notebook with \"number\" '"
+ subStrings[0] + "' is excluded from mapping!");
}
}

input.close();
} catch (IOException e) {
e.printStackTrace(System.err);
}
return result;
}

/**
* Get the part of arg located after the (first) '=' sign. If the '=' is
Expand All @@ -48,4 +60,4 @@ protected String getValueFromArgument(String arg) {
return arg.substring(eqIndex + 1);
}
}
}
}
74 changes: 45 additions & 29 deletions Programs/src/notebooks/CloneFileWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import java.util.concurrent.Future;

public class CloneFileWriter {
private LocalDateTime startTime = LocalDateTime.now();
private String outputDir;

public CloneFileWriter(String outputDir) {
Expand Down Expand Up @@ -50,7 +51,7 @@ public void write(Map<Notebook, SnippetCode[]> file2hashes,
}

private void printFile2hashes(Map<Notebook, SnippetCode[]> file2hashes) throws IOException {
Writer writer = new FileWriter(outputDir + "/file2hashesA" + LocalDateTime.now() + ".csv");
Writer writer = new FileWriter(outputDir + "/file2hashesA" + startTime + ".csv");
writer.write(file2hashesHeader());
for (Notebook notebook: file2hashes.keySet()) {
writer.write(notebook.getName());
Expand All @@ -64,7 +65,7 @@ private void printFile2hashes(Map<Notebook, SnippetCode[]> file2hashes) throws I
}

private void printHash2files(Map<SnippetCode, List<Snippet>> hash2files) throws IOException {
Writer writer = new FileWriter(outputDir + "/hash2filesA" + LocalDateTime.now() + ".csv");
Writer writer = new FileWriter(outputDir + "/hash2filesA" + startTime + ".csv");
writer.write(hash2filesHeader());
for (SnippetCode code: hash2files.keySet()) {
writer.write(code.getHash() + ", " + code.getLOC());
Expand All @@ -78,7 +79,7 @@ private void printHash2files(Map<SnippetCode, List<Snippet>> hash2files) throws

private void printCloneFrequencies(Map<Notebook, SnippetCode[]> file2hashes,
Map<SnippetCode, List<Snippet>> hash2files) throws IOException {
Writer writer = new FileWriter(outputDir + "/cloneFrequency" + LocalDateTime.now() + ".csv");
Writer writer = new FileWriter(outputDir + "/cloneFrequency" + startTime + ".csv");
writer.write(cloneFrequencyHeader());
for (Notebook notebook: file2hashes.keySet()) {
int numClones = 0, numUnique = 0, numClonesNE = 0;
Expand Down Expand Up @@ -146,32 +147,47 @@ private void printCloneFrequencies(Map<Notebook, SnippetCode[]> file2hashes,
* @param hash2files Mapping from snippets to position in notebooks
* @param NUM_NOTEBOOKS Maximum number of notebooks to print connection information for
*/
private void printConnectionsFile(Map<Notebook, SnippetCode[]> file2hashes,
Map<SnippetCode, List<Snippet>> hash2files, final int NUM_CONNECTIONS) throws IOException {
Writer writer = new FileWriter(outputDir + "/connections" + LocalDateTime.now() + ".csv");
writer.write(connectionsHeader());
List<Notebook> notebooks = new ArrayList<Notebook>(file2hashes.keySet());
Collections.shuffle(notebooks);
int connectionsToPrint = Math.min(NUM_CONNECTIONS, file2hashes.size());
List<Callable<String>> tasks = new ArrayList<Callable<String>>(connectionsToPrint);
for (int i=0; i<connectionsToPrint; i++) {
boolean heartBeat = 0 == i%10000;
tasks.add(new ConnectionsLineBuilder(notebooks.get(i), file2hashes, hash2files, heartBeat));
}
List<Future<String>> result = ThreadExecutor.getInstance().invokeAll(tasks);
for (int i=0; i<connectionsToPrint; i++) {
try {
writer.write(result.get(i).get());
} catch (InterruptedException e) {
System.err.println("Printing of connections for notebook " + notebooks.get(i).getName()
+ " was interrupted! " + e.getMessage());
} catch (ExecutionException e) {
System.err.println("Printing connections for notebook "
+ notebooks.get(i).getName() + " failed!" + e.toString());
}
}
writer.close();
}
private void printConnectionsFile(final Map<Notebook, SnippetCode[]> file2hashes,
final Map<SnippetCode, List<Snippet>> hash2files,
final int NUM_CONNECTIONS) throws IOException {

Writer writer = new FileWriter(outputDir + "/connections" + startTime + ".csv");
writer.write(connectionsHeader());
final List<Notebook> notebooks = new ArrayList<Notebook>(file2hashes.keySet());
///Collections.shuffle(notebooks);
final int connectionsToPrint = Math.min(NUM_CONNECTIONS, file2hashes.size());

// List<Callable<String>> tasks = new ArrayList<Callable<String>>(connectionsToPrint);
// for (int i=0; i<connectionsToPrint; i++) {
// boolean heartBeat = true; //0 == i%100000000;
// tasks.add(new ConnectionsLineBuilder(notebooks.get(i), file2hashes, hash2files, heartBeat));
// }

List<Callable<Void>> tasks = new ArrayList<Callable<Void>>(connectionsToPrint);
for (int i=0; i < 8; i++) {
boolean heartBeat = true; //0 == i%100000000;
final int start = i;
tasks.add(() -> {
for(int j = start; j < connectionsToPrint; j += 8) {
new ConnectionsLineBuilder(notebooks.get(j), file2hashes, hash2files, heartBeat).call();
}
return null;
});
}
List<Future<Void>> result = ThreadExecutor.getInstance().invokeAll(tasks);
// for (int i=0; i<connectionsToPrint; i++) {
// try {
// writer.write(result.get(i).get());
// } catch (InterruptedException e) {
// System.err.println("Printing of connections for notebook " + notebooks.get(i).getName()
// + " was interrupted! " + e.getMessage());
// } catch (ExecutionException e) {
// System.err.println("Printing connections for notebook "
// + notebooks.get(i).getName() + " failed!" + e.toString());
// }
// }
// writer.close();
}

/**
* Look in clones to decide whether snippet is a clone or a unique snippet
Expand Down
137 changes: 137 additions & 0 deletions Programs/src/notebooks/NotebookFile.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package notebooks;

import java.util.function.Consumer;

class NotebookFile {
private static NotebookFile[] directory = new NotebookFile[2800000]; /// FIXME

public static void register(int id, int notebookNumber) {
if (NotebookFile.directory[id] != null) return;

final NotebookFile nb = new NotebookFile(notebookNumber);
NotebookFile.directory[id] = nb;
}

public static void forAll(Consumer<NotebookFile> f) {
for (NotebookFile nb : directory) {
if (nb != null) {
f.accept(nb);
}
}
}

public static NotebookFile getById(int id) {
assert (0 <= id && id < NotebookFile.directory.length) : "Illegal repo id " + id;
assert (NotebookFile.directory[id] != null) : "Tried to lookup non-existing NotebookFile (id=" + id + ")";

return NotebookFile.directory[id];
}

private final int id;
/// Connections between snippets where inter/intra denotes different/same notebooks
private int intraConnections = 0;
private int intraEmptyConnections = 0;
private int interConnections = 0;
private int interEmptyConnections = 0;
/// Connections between snippets where inter/intra denotes different/same repo
private int intraRepoConnections = 0;
private int intraRepoEmptyConnections = 0;
private int interRepoConnections = 0;
private int interRepoEmptyConnections = 0;

public Repository getRepo() {
return Repository.getByNotebookNumber(this.id);
}

public NotebookFile(int id) {
this.id = id;
}

public void addIntraConnections(boolean empty) {
addIntraConnections(1, empty);
}

public void addInterConnections(boolean empty) {
addInterConnections(1, empty);
}

public void addIntraConnections(int value, boolean empty) {
if (empty) {
intraEmptyConnections += value;
} else {
intraConnections += value;
}
}

public void addInterConnections(int value, boolean empty) {
if (empty) {
interEmptyConnections += value;
} else {
interConnections += value;
}
}

public int intraConnections() {
return intraConnections;
}

public int intraEmptyConnections() {
return intraEmptyConnections;
}

public int interConnections() {
return interConnections;
}

public int interEmptyConnections() {
return interEmptyConnections;
}

public void addIntraRepoConnections(boolean empty) {
addIntraRepoConnections(1, empty);
}

public void addInterRepoConnections(boolean empty) {
addInterRepoConnections(1, empty);
}

public void addIntraRepoConnections(int value, boolean empty) {
if (empty) {
intraRepoEmptyConnections += value;
} else {
intraRepoConnections += value;
}
}

public void addInterRepoConnections(int value, boolean empty) {
if (empty) {
interRepoEmptyConnections += value;
} else {
interRepoConnections += value;
}
}

public int intraRepoConnections() {
return intraRepoConnections;
}

public int intraRepoEmptyConnections() {
return intraRepoEmptyConnections;
}

public int interRepoConnections() {
return interRepoConnections;
}

public int interRepoEmptyConnections() {
return interRepoEmptyConnections;
}

public String fileName() {
return new StringBuilder()
.append("nb_")
.append(this.id)
.append(".ipynb")
.toString();
}
}
Loading