Skip to content
This repository was archived by the owner on May 30, 2025. It is now read-only.

Commit b9cf11e

Browse files
committed
Score ready
1 parent 9c29390 commit b9cf11e

File tree

2 files changed

+122
-19
lines changed

2 files changed

+122
-19
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,5 @@ venv.bak/
103103

104104

105105
# End of https://www.gitignore.io/api/python
106+
107+
submit/scores.txt

score.py

Lines changed: 120 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,41 +4,142 @@
44
import sys
55
import os
66
import random
7-
8-
9-
import os
10-
import sys
117
import pprint
128
import collections
9+
1310
from tools import (get_span,
1411
read_input,
1512
read_phrases,
16-
read_links,
1713
read_labels,
14+
read_links,
1815
compare_phrases,
19-
compare_links,
20-
compare_labels)
16+
compare_labels,
17+
compare_links)
18+
19+
20+
def evaluate(gold_A, gold_B, gold_C, submit_A, submit_B, submit_C):
21+
result_A = compare_phrases(gold_A, submit_A)
22+
result_B = compare_labels(gold_B, submit_B, result_A['mapping'])
23+
result_C = compare_links(gold_C, submit_C, result_A['mapping'])
24+
25+
return dict(
26+
correct_A=len(result_A['correct']),
27+
correct_B=len(result_B['correct']),
28+
correct_C=len(result_C['correct']),
29+
partial_A=len(result_A['partial']),
30+
missing_A=len(result_A['missing']),
31+
missing_C=len(result_C['missing']),
32+
spurious_A=len(result_A['spurious']),
33+
spurious_C=len(result_C['spurious']),
34+
incorrect_B=len(result_B['incorrect']),
35+
)
36+
37+
def evaluate_1(name, gold, submit):
38+
gold_input = read_input(os.path.join(gold, 'input_%s' % name))
39+
40+
gold_A = read_phrases(os.path.join(gold, 'output_A_%s' % name))
41+
gold_B = read_labels(os.path.join(gold, 'output_B_%s' % name))
42+
gold_C = read_links(os.path.join(gold, 'output_C_%s' % name))
43+
44+
submit_A = read_phrases(os.path.join(submit, 'scenario1-ABC', 'output_A_%s' % name))
45+
submit_B = read_labels(os.path.join(submit, 'scenario1-ABC', 'output_B_%s' % name))
46+
submit_C = read_links(os.path.join(submit, 'scenario1-ABC', 'output_C_%s' % name))
47+
48+
return evaluate(gold_A, gold_B, gold_C, submit_A, submit_B, submit_C)
49+
50+
51+
def evaluate_2(name, gold, submit):
52+
gold_input = read_input(os.path.join(gold, 'input_%s' % name))
53+
54+
gold_A = read_phrases(os.path.join(gold, 'output_A_%s' % name))
55+
gold_B = read_labels(os.path.join(gold, 'output_B_%s' % name))
56+
gold_C = read_links(os.path.join(gold, 'output_C_%s' % name))
57+
58+
submit_A = gold_A
59+
submit_B = read_labels(os.path.join(submit, 'scenario2-BC', 'output_B_%s' % name))
60+
submit_C = read_links(os.path.join(submit, 'scenario2-BC', 'output_C_%s' % name))
61+
62+
return evaluate(gold_A, gold_B, gold_C, submit_A, submit_B, submit_C)
2163

22-
from os.path import abspath, join, exists
2364

65+
def evaluate_3(name, gold, submit):
66+
gold_input = read_input(os.path.join(gold, 'input_%s' % name))
2467

25-
def evaluate_1(fname, gold, submit):
26-
pass
68+
gold_A = read_phrases(os.path.join(gold, 'output_A_%s' % name))
69+
gold_B = read_labels(os.path.join(gold, 'output_B_%s' % name))
70+
gold_C = read_links(os.path.join(gold, 'output_C_%s' % name))
71+
72+
submit_A = gold_A
73+
submit_B = gold_B
74+
submit_C = read_links(os.path.join(submit, 'scenario3-C', 'output_C_%s' % name))
75+
76+
return evaluate(gold_A, gold_B, gold_C, submit_A, submit_B, submit_C)
77+
78+
79+
def update(dict_1, dict_2):
80+
for k,v in dict_1.items():
81+
dict_2[k] += v
2782

2883

2984
if __name__ == '__main__':
3085
gold = sys.argv[1] if len(sys.argv) > 1 else 'gold'
3186
submit = sys.argv[2] if len(sys.argv) > 2 else 'submit'
3287

88+
totals1 = collections.defaultdict(lambda: 0)
89+
totals2 = collections.defaultdict(lambda: 0)
90+
totals3 = collections.defaultdict(lambda: 0)
91+
3392
for fname in os.listdir(gold):
34-
if fname.endswith('_input.txt'):
35-
scenario1 = evaluate_1(fname, gold, submit)
36-
scenario2 = evaluate_2(fname, gold, submit)
37-
scenario3 = evaluate_3(fname, gold, submit)
93+
if fname.startswith('input_'):
94+
name = fname[6:]
95+
96+
scenario1 = evaluate_1(name, gold, submit)
97+
update(scenario1, totals1)
98+
99+
scenario2 = evaluate_2(name, gold, submit)
100+
update(scenario2, totals2)
101+
102+
scenario3 = evaluate_3(name, gold, submit)
103+
update(scenario3, totals3)
104+
105+
pprint.pprint(('Scenario 1', totals1))
106+
pprint.pprint(('Scenario 2', totals2))
107+
pprint.pprint(('Scenario 3', totals3))
108+
109+
correct_1 = sum([totals1['correct_A'], totals1['correct_B'], totals1['correct_C'], 0.5 * totals1['partial_A']])
110+
subtotal_1 = sum([totals1['partial_A'], totals1['correct_A'], totals1['correct_B'], totals1['incorrect_B'], totals1['correct_C']])
111+
112+
abc_prec = correct_1 / sum([subtotal_1, totals1['spurious_A'], totals1['spurious_C']])
113+
abc_rec = correct_1 / sum([subtotal_1, totals1['missing_A'], totals1['missing_C']])
114+
abc_f1 = 2 * abc_prec * abc_rec / ( abc_prec + abc_rec )
115+
116+
correct_2 = sum([totals2['correct_B'], totals2['correct_C']])
117+
subtotal_2 = sum([totals2['correct_B'], totals2['incorrect_B'], totals2['correct_C']])
118+
119+
bc_prec = correct_2 / sum([subtotal_2, totals2['spurious_C']])
120+
bc_rec = correct_2 / sum([subtotal_2, totals2['missing_C']])
121+
bc_f1 = 2 * bc_prec * bc_rec / ( bc_prec + bc_rec )
122+
123+
correct_3 = totals3['correct_C']
124+
subtotal_3 = totals3['correct_C']
125+
126+
c_prec = correct_3 / sum([subtotal_3, totals2['spurious_C']])
127+
c_rec = correct_3 / sum([subtotal_3, totals2['missing_C']])
128+
c_f1 = 2 * c_prec * c_rec / ( c_prec + c_rec )
129+
130+
macro = sum([abc_f1, bc_f1, c_f1]) / 3
131+
132+
with open(os.path.join(submit, 'scores.txt'), 'w') as fp:
133+
fp.write('abc_prec:%.5f\n'% abc_prec)
134+
fp.write('abc_rec:%.5f\n' % abc_rec)
135+
fp.write('abc_f1:%.5f\n' % abc_f1)
136+
137+
fp.write('bc_prec:%.5f\n' % bc_prec)
138+
fp.write('bc_rec:%.5f\n' % bc_rec)
139+
fp.write('bc_f1:%.5f\n' % bc_f1)
38140

39-
with open(os.path.join(sys.argv[2], 'scores.txt'), 'wb') as fp:
40-
for label in "abc bc c".split():
41-
for val in "f1 prec rec".split():
42-
fp.write('%s_%s:%.5f\n' % (label, val, random.uniform(0,1)))
141+
fp.write('c_prec:%.5f\n' % c_prec)
142+
fp.write('c_rec:%.5f\n' % c_rec)
143+
fp.write('c_f1:%.5f\n' % c_f1)
43144

44-
fp.write('macro:%.5f\n' % random.uniform(0,1))
145+
fp.write('macro:%.5f\n' % macro)

0 commit comments

Comments
 (0)