|
4 | 4 | import sys
|
5 | 5 | import os
|
6 | 6 | import random
|
7 |
| - |
8 |
| - |
9 |
| -import os |
10 |
| -import sys |
11 | 7 | import pprint
|
12 | 8 | import collections
|
| 9 | + |
13 | 10 | from tools import (get_span,
|
14 | 11 | read_input,
|
15 | 12 | read_phrases,
|
16 |
| - read_links, |
17 | 13 | read_labels,
|
| 14 | + read_links, |
18 | 15 | compare_phrases,
|
19 |
| - compare_links, |
20 |
| - compare_labels) |
| 16 | + compare_labels, |
| 17 | + compare_links) |
| 18 | + |
| 19 | + |
| 20 | +def evaluate(gold_A, gold_B, gold_C, submit_A, submit_B, submit_C): |
| 21 | + result_A = compare_phrases(gold_A, submit_A) |
| 22 | + result_B = compare_labels(gold_B, submit_B, result_A['mapping']) |
| 23 | + result_C = compare_links(gold_C, submit_C, result_A['mapping']) |
| 24 | + |
| 25 | + return dict( |
| 26 | + correct_A=len(result_A['correct']), |
| 27 | + correct_B=len(result_B['correct']), |
| 28 | + correct_C=len(result_C['correct']), |
| 29 | + partial_A=len(result_A['partial']), |
| 30 | + missing_A=len(result_A['missing']), |
| 31 | + missing_C=len(result_C['missing']), |
| 32 | + spurious_A=len(result_A['spurious']), |
| 33 | + spurious_C=len(result_C['spurious']), |
| 34 | + incorrect_B=len(result_B['incorrect']), |
| 35 | + ) |
| 36 | + |
| 37 | +def evaluate_1(name, gold, submit): |
| 38 | + gold_input = read_input(os.path.join(gold, 'input_%s' % name)) |
| 39 | + |
| 40 | + gold_A = read_phrases(os.path.join(gold, 'output_A_%s' % name)) |
| 41 | + gold_B = read_labels(os.path.join(gold, 'output_B_%s' % name)) |
| 42 | + gold_C = read_links(os.path.join(gold, 'output_C_%s' % name)) |
| 43 | + |
| 44 | + submit_A = read_phrases(os.path.join(submit, 'scenario1-ABC', 'output_A_%s' % name)) |
| 45 | + submit_B = read_labels(os.path.join(submit, 'scenario1-ABC', 'output_B_%s' % name)) |
| 46 | + submit_C = read_links(os.path.join(submit, 'scenario1-ABC', 'output_C_%s' % name)) |
| 47 | + |
| 48 | + return evaluate(gold_A, gold_B, gold_C, submit_A, submit_B, submit_C) |
| 49 | + |
| 50 | + |
| 51 | +def evaluate_2(name, gold, submit): |
| 52 | + gold_input = read_input(os.path.join(gold, 'input_%s' % name)) |
| 53 | + |
| 54 | + gold_A = read_phrases(os.path.join(gold, 'output_A_%s' % name)) |
| 55 | + gold_B = read_labels(os.path.join(gold, 'output_B_%s' % name)) |
| 56 | + gold_C = read_links(os.path.join(gold, 'output_C_%s' % name)) |
| 57 | + |
| 58 | + submit_A = gold_A |
| 59 | + submit_B = read_labels(os.path.join(submit, 'scenario2-BC', 'output_B_%s' % name)) |
| 60 | + submit_C = read_links(os.path.join(submit, 'scenario2-BC', 'output_C_%s' % name)) |
| 61 | + |
| 62 | + return evaluate(gold_A, gold_B, gold_C, submit_A, submit_B, submit_C) |
21 | 63 |
|
22 |
| -from os.path import abspath, join, exists |
23 | 64 |
|
| 65 | +def evaluate_3(name, gold, submit): |
| 66 | + gold_input = read_input(os.path.join(gold, 'input_%s' % name)) |
24 | 67 |
|
25 |
| -def evaluate_1(fname, gold, submit): |
26 |
| - pass |
| 68 | + gold_A = read_phrases(os.path.join(gold, 'output_A_%s' % name)) |
| 69 | + gold_B = read_labels(os.path.join(gold, 'output_B_%s' % name)) |
| 70 | + gold_C = read_links(os.path.join(gold, 'output_C_%s' % name)) |
| 71 | + |
| 72 | + submit_A = gold_A |
| 73 | + submit_B = gold_B |
| 74 | + submit_C = read_links(os.path.join(submit, 'scenario3-C', 'output_C_%s' % name)) |
| 75 | + |
| 76 | + return evaluate(gold_A, gold_B, gold_C, submit_A, submit_B, submit_C) |
| 77 | + |
| 78 | + |
| 79 | +def update(dict_1, dict_2): |
| 80 | + for k,v in dict_1.items(): |
| 81 | + dict_2[k] += v |
27 | 82 |
|
28 | 83 |
|
29 | 84 | if __name__ == '__main__':
|
30 | 85 | gold = sys.argv[1] if len(sys.argv) > 1 else 'gold'
|
31 | 86 | submit = sys.argv[2] if len(sys.argv) > 2 else 'submit'
|
32 | 87 |
|
| 88 | + totals1 = collections.defaultdict(lambda: 0) |
| 89 | + totals2 = collections.defaultdict(lambda: 0) |
| 90 | + totals3 = collections.defaultdict(lambda: 0) |
| 91 | + |
33 | 92 | for fname in os.listdir(gold):
|
34 |
| - if fname.endswith('_input.txt'): |
35 |
| - scenario1 = evaluate_1(fname, gold, submit) |
36 |
| - scenario2 = evaluate_2(fname, gold, submit) |
37 |
| - scenario3 = evaluate_3(fname, gold, submit) |
| 93 | + if fname.startswith('input_'): |
| 94 | + name = fname[6:] |
| 95 | + |
| 96 | + scenario1 = evaluate_1(name, gold, submit) |
| 97 | + update(scenario1, totals1) |
| 98 | + |
| 99 | + scenario2 = evaluate_2(name, gold, submit) |
| 100 | + update(scenario2, totals2) |
| 101 | + |
| 102 | + scenario3 = evaluate_3(name, gold, submit) |
| 103 | + update(scenario3, totals3) |
| 104 | + |
| 105 | + pprint.pprint(('Scenario 1', totals1)) |
| 106 | + pprint.pprint(('Scenario 2', totals2)) |
| 107 | + pprint.pprint(('Scenario 3', totals3)) |
| 108 | + |
| 109 | + correct_1 = sum([totals1['correct_A'], totals1['correct_B'], totals1['correct_C'], 0.5 * totals1['partial_A']]) |
| 110 | + subtotal_1 = sum([totals1['partial_A'], totals1['correct_A'], totals1['correct_B'], totals1['incorrect_B'], totals1['correct_C']]) |
| 111 | + |
| 112 | + abc_prec = correct_1 / sum([subtotal_1, totals1['spurious_A'], totals1['spurious_C']]) |
| 113 | + abc_rec = correct_1 / sum([subtotal_1, totals1['missing_A'], totals1['missing_C']]) |
| 114 | + abc_f1 = 2 * abc_prec * abc_rec / ( abc_prec + abc_rec ) |
| 115 | + |
| 116 | + correct_2 = sum([totals2['correct_B'], totals2['correct_C']]) |
| 117 | + subtotal_2 = sum([totals2['correct_B'], totals2['incorrect_B'], totals2['correct_C']]) |
| 118 | + |
| 119 | + bc_prec = correct_2 / sum([subtotal_2, totals2['spurious_C']]) |
| 120 | + bc_rec = correct_2 / sum([subtotal_2, totals2['missing_C']]) |
| 121 | + bc_f1 = 2 * bc_prec * bc_rec / ( bc_prec + bc_rec ) |
| 122 | + |
| 123 | + correct_3 = totals3['correct_C'] |
| 124 | + subtotal_3 = totals3['correct_C'] |
| 125 | + |
| 126 | + c_prec = correct_3 / sum([subtotal_3, totals2['spurious_C']]) |
| 127 | + c_rec = correct_3 / sum([subtotal_3, totals2['missing_C']]) |
| 128 | + c_f1 = 2 * c_prec * c_rec / ( c_prec + c_rec ) |
| 129 | + |
| 130 | + macro = sum([abc_f1, bc_f1, c_f1]) / 3 |
| 131 | + |
| 132 | + with open(os.path.join(submit, 'scores.txt'), 'w') as fp: |
| 133 | + fp.write('abc_prec:%.5f\n'% abc_prec) |
| 134 | + fp.write('abc_rec:%.5f\n' % abc_rec) |
| 135 | + fp.write('abc_f1:%.5f\n' % abc_f1) |
| 136 | + |
| 137 | + fp.write('bc_prec:%.5f\n' % bc_prec) |
| 138 | + fp.write('bc_rec:%.5f\n' % bc_rec) |
| 139 | + fp.write('bc_f1:%.5f\n' % bc_f1) |
38 | 140 |
|
39 |
| - with open(os.path.join(sys.argv[2], 'scores.txt'), 'wb') as fp: |
40 |
| - for label in "abc bc c".split(): |
41 |
| - for val in "f1 prec rec".split(): |
42 |
| - fp.write('%s_%s:%.5f\n' % (label, val, random.uniform(0,1))) |
| 141 | + fp.write('c_prec:%.5f\n' % c_prec) |
| 142 | + fp.write('c_rec:%.5f\n' % c_rec) |
| 143 | + fp.write('c_f1:%.5f\n' % c_f1) |
43 | 144 |
|
44 |
| - fp.write('macro:%.5f\n' % random.uniform(0,1)) |
| 145 | + fp.write('macro:%.5f\n' % macro) |
0 commit comments