-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoutput.py
More file actions
100 lines (90 loc) · 3.13 KB
/
output.py
File metadata and controls
100 lines (90 loc) · 3.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import nltk
import copy
import json
import re
import math
import numpy
from matplotlib import pylab
from parameter import Parameter
def plot(s, ss, d, b, t):
# print (b, t)
pylab.xlabel("Sentence Gap index")
pylab.ylabel("Gap Scores")
pylab.plot(range(len(s)), s, label="Gap Scores")
pylab.plot(range(len(ss)), ss, label="Smoothed Gap scores")
pylab.plot(range(len(d)), d, label="Depth scores")
pylab.stem(range(len(b)), b)
pylab.stem(range(len(t)), t, '-.')
pylab.legend()
pylab.show()
def export(new_text):
f = open('test.out','w')
index = 0
for para in new_text:
index+=1
output = para + '\n\n\n' + '<<BREAK>>' + '\n\n\n'
f.write(output)
f.close()
def getPredictedTimestamps(timestamps_file, new_text):
# (sentence beginning, start time)
sentence_starts = []
with open(timestamps_file) as f:
iter_f = iter(f)
prev_line = ''
for line in iter_f:
if str(line[0]).isupper() and prev_line[0] == '0':
sentence_starts.append((line[:-1], prev_line[:8]))
prev_line = line[:8]
# {
# 'Heading1': {
# 'text': 'text within boundary',
# 'start': '##:##:##',
# }
segment_timestamps = {}
n = 1
for segment in new_text:
segment_begin = segment.split('.')[0]
# print '------------------------------------'
# print 'Looking for segment:', segment_begin
for candidate in sentence_starts:
if candidate[0][:-1] in segment_begin:
# print 'Found matching candidate:', candidate[1], '\n', candidate[0]
segment_timestamps['Heading'+str(n)] = {}
segment_timestamps['Heading'+str(n)]['text'] = segment
segment_timestamps['Heading'+str(n)]['start'] = candidate[1]
n += 1
break
return segment_timestamps
def getGoldTimestamps(gold_times_file):
# {
# 'Heading1': {
# 'heading': 'actual heading title',
# 'start': '##:##:##',
# 'end': '##:##:##' }
# }
gold_timestamps = {}
with open(gold_times_file) as f:
iter_f = iter(f)
n = 1
for line in iter_f:
if line[:9] == 'Heading: ':
gold_timestamps['Heading'+str(n)] = {}
head = line[9:-1]
gold_timestamps['Heading'+str(n)]['heading'] = head
line = next(iter_f)
start = line[7:-1]
gold_timestamps['Heading'+str(n)]['start'] = start
line = next(iter_f)
end = line[5:-1]
gold_timestamps['Heading'+str(n)]['end'] = end
n += 1
return gold_timestamps
def generateJSONTimestamps(gold_times_file, timestamps_file, new_text):
gold_timestamps = getGoldTimestamps(gold_times_file)
predicted_timestamps = getPredictedTimestamps(timestamps_file, new_text)
with open('gold_JSON.json', 'w') as out_f:
json.dump(gold_timestamps, out_f)
out_f.close()
with open('predicted_JSON.json', 'w') as out_f:
json.dump(predicted_timestamps, out_f)
out_f.close()