Score ready

apiad · apiad · commit b9cf11e81aed · 2018-02-13T11:17:45.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -103,3 +103,5 @@ venv.bak/
 
 
 # End of https://www.gitignore.io/api/python
+
+submit/scores.txt
diff --git a/score.py b/score.py
@@ -4,41 +4,142 @@
 import sys
 import os
 import random
-
-
-import os
-import sys
 import pprint
 import collections
+
 from tools import (get_span,
                    read_input,
                    read_phrases,
-                   read_links,
                    read_labels,
+                   read_links,
                    compare_phrases,
-                   compare_links,
-                   compare_labels)
+                   compare_labels,
+                   compare_links)
+
+
+def evaluate(gold_A, gold_B, gold_C, submit_A, submit_B, submit_C):
+    result_A = compare_phrases(gold_A, submit_A)
+    result_B = compare_labels(gold_B, submit_B, result_A['mapping'])
+    result_C = compare_links(gold_C, submit_C, result_A['mapping'])
+
+    return dict(
+        correct_A=len(result_A['correct']),
+        correct_B=len(result_B['correct']),
+        correct_C=len(result_C['correct']),
+        partial_A=len(result_A['partial']),
+        missing_A=len(result_A['missing']),
+        missing_C=len(result_C['missing']),
+        spurious_A=len(result_A['spurious']),
+        spurious_C=len(result_C['spurious']),
+        incorrect_B=len(result_B['incorrect']),
+    )
+
+def evaluate_1(name, gold, submit):
+    gold_input = read_input(os.path.join(gold, 'input_%s' % name))
+
+    gold_A = read_phrases(os.path.join(gold, 'output_A_%s' % name))
+    gold_B = read_labels(os.path.join(gold, 'output_B_%s' % name))
+    gold_C = read_links(os.path.join(gold, 'output_C_%s' % name))
+
+    submit_A = read_phrases(os.path.join(submit, 'scenario1-ABC', 'output_A_%s' % name))
+    submit_B = read_labels(os.path.join(submit, 'scenario1-ABC', 'output_B_%s' % name))
+    submit_C = read_links(os.path.join(submit, 'scenario1-ABC', 'output_C_%s' % name))
+
+    return evaluate(gold_A, gold_B, gold_C, submit_A, submit_B, submit_C)
+
+
+def evaluate_2(name, gold, submit):
+    gold_input = read_input(os.path.join(gold, 'input_%s' % name))
+
+    gold_A = read_phrases(os.path.join(gold, 'output_A_%s' % name))
+    gold_B = read_labels(os.path.join(gold, 'output_B_%s' % name))
+    gold_C = read_links(os.path.join(gold, 'output_C_%s' % name))
+
+    submit_A = gold_A
+    submit_B = read_labels(os.path.join(submit, 'scenario2-BC', 'output_B_%s' % name))
+    submit_C = read_links(os.path.join(submit, 'scenario2-BC', 'output_C_%s' % name))
+
+    return evaluate(gold_A, gold_B, gold_C, submit_A, submit_B, submit_C)
 
-from os.path import abspath, join, exists
 
+def evaluate_3(name, gold, submit):
+    gold_input = read_input(os.path.join(gold, 'input_%s' % name))
 
-def evaluate_1(fname, gold, submit):
-    pass
+    gold_A = read_phrases(os.path.join(gold, 'output_A_%s' % name))
+    gold_B = read_labels(os.path.join(gold, 'output_B_%s' % name))
+    gold_C = read_links(os.path.join(gold, 'output_C_%s' % name))
+
+    submit_A = gold_A
+    submit_B = gold_B
+    submit_C = read_links(os.path.join(submit, 'scenario3-C', 'output_C_%s' % name))
+
+    return evaluate(gold_A, gold_B, gold_C, submit_A, submit_B, submit_C)
+
+
+def update(dict_1, dict_2):
+    for k,v in dict_1.items():
+        dict_2[k] += v
 
 
 if __name__ == '__main__':
     gold = sys.argv[1] if len(sys.argv) > 1 else 'gold'
     submit = sys.argv[2] if len(sys.argv) > 2 else 'submit'
 
+    totals1 = collections.defaultdict(lambda: 0)
+    totals2 = collections.defaultdict(lambda: 0)
+    totals3 = collections.defaultdict(lambda: 0)
+
     for fname in os.listdir(gold):
-        if fname.endswith('_input.txt'):
-            scenario1 = evaluate_1(fname, gold, submit)
-            scenario2 = evaluate_2(fname, gold, submit)
-            scenario3 = evaluate_3(fname, gold, submit)
+        if fname.startswith('input_'):
+            name = fname[6:]
+
+            scenario1 = evaluate_1(name, gold, submit)
+            update(scenario1, totals1)
+
+            scenario2 = evaluate_2(name, gold, submit)
+            update(scenario2, totals2)
+
+            scenario3 = evaluate_3(name, gold, submit)
+            update(scenario3, totals3)
+
+    pprint.pprint(('Scenario 1', totals1))
+    pprint.pprint(('Scenario 2', totals2))
+    pprint.pprint(('Scenario 3', totals3))
+
+    correct_1 = sum([totals1['correct_A'], totals1['correct_B'], totals1['correct_C'], 0.5 * totals1['partial_A']])
+    subtotal_1 = sum([totals1['partial_A'], totals1['correct_A'], totals1['correct_B'], totals1['incorrect_B'], totals1['correct_C']])
+
+    abc_prec = correct_1 / sum([subtotal_1, totals1['spurious_A'], totals1['spurious_C']])
+    abc_rec = correct_1 / sum([subtotal_1, totals1['missing_A'], totals1['missing_C']])
+    abc_f1 = 2 * abc_prec * abc_rec / ( abc_prec + abc_rec )
+
+    correct_2 = sum([totals2['correct_B'], totals2['correct_C']])
+    subtotal_2 = sum([totals2['correct_B'], totals2['incorrect_B'], totals2['correct_C']])
+
+    bc_prec = correct_2 / sum([subtotal_2, totals2['spurious_C']])
+    bc_rec = correct_2 / sum([subtotal_2, totals2['missing_C']])
+    bc_f1 = 2 * bc_prec * bc_rec / ( bc_prec + bc_rec )
+
+    correct_3 = totals3['correct_C']
+    subtotal_3 = totals3['correct_C']
+
+    c_prec = correct_3 / sum([subtotal_3, totals2['spurious_C']])
+    c_rec = correct_3 / sum([subtotal_3, totals2['missing_C']])
+    c_f1 = 2 * c_prec * c_rec / ( c_prec + c_rec )
+
+    macro = sum([abc_f1, bc_f1, c_f1]) / 3
+
+    with open(os.path.join(submit, 'scores.txt'), 'w') as fp:
+        fp.write('abc_prec:%.5f\n'% abc_prec)
+        fp.write('abc_rec:%.5f\n' % abc_rec)
+        fp.write('abc_f1:%.5f\n'  % abc_f1)
+
+        fp.write('bc_prec:%.5f\n' % bc_prec)
+        fp.write('bc_rec:%.5f\n'  % bc_rec)
+        fp.write('bc_f1:%.5f\n'   % bc_f1)
 
-    with open(os.path.join(sys.argv[2], 'scores.txt'), 'wb') as fp:
-        for label in "abc bc c".split():
-            for val in "f1 prec rec".split():
-                fp.write('%s_%s:%.5f\n' % (label, val, random.uniform(0,1)))
+        fp.write('c_prec:%.5f\n'  % c_prec)
+        fp.write('c_rec:%.5f\n'   % c_rec)
+        fp.write('c_f1:%.5f\n'    % c_f1)
 
-        fp.write('macro:%.5f\n' % random.uniform(0,1))
+        fp.write('macro:%.5f\n'   % macro)

Original file line number	Diff line number	Diff line change
`@@ -103,3 +103,5 @@ venv.bak/`
`103`	`103`
`104`	`104`
`105`	`105`	`# End of https://www.gitignore.io/api/python`
	`106`	`+`
	`107`	`+submit/scores.txt`