-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathQ4.py
More file actions
172 lines (143 loc) · 6.52 KB
/
Q4.py
File metadata and controls
172 lines (143 loc) · 6.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
"""
111901030
Mayank Singla
Coding Assignment 1 - Q4
"""
# %%
from random import choices
def handleError(method):
"""
Decorator Factory function.
Returns a decorator that normally calls the method of a class by forwarding all its arguments to the method.
It surrounds the method calling in try-except block to handle errors gracefully.
"""
def decorator(ref, *args, **kwargs):
"""
Decorator function that surrounds the method of a class in try-except block and call the methods and handles error gracefully.
"""
try:
method(ref, *args, **kwargs)
except Exception as err:
print(type(err))
print(err)
return decorator
class TextGenerator:
prefDict = {} # Prefix Dictionary for each pair
_freqDist = {} # Frequency Distribution for each pair
def _createFreqDist(self):
"""
Creates the frequency distribution of next words for each pair of words
"""
self._freqDist.clear() # Clearing the previous frequency distribution
# Looping all the (key, val) in prefDict
for p, words in self.prefDict.items():
if not (
p in self._freqDist
): # Adding the pair if it is not in the dictionary
self._freqDist[p] = {}
# Looping through all the words and storing their frequency in a dictionary
for word in words:
if not (word in self._freqDist[p]):
self._freqDist[p][word] = 1
else:
self._freqDist[p][word] += 1
# Storing the list of words and their frequencies for each pair
self._freqDist[p] = [
list(self._freqDist[p].keys()),
list(self._freqDist[p].values()),
]
def assimilateText(self, filename):
"""
Takes filename as its argument and reads all the text in the file.
Creates a prefix dictionary that maps a pair (2-tuple) of words to a list of words which follow that pair in the text.
"""
self.prefDict.clear() # Clearing the prefix Dictionary
# Read all the contents of the file
with open(filename) as inputFile:
text = inputFile.read()
words = text.split() # Extracting the list of words
numWords = len(words) # Number of words in the file
if numWords < 3: # If number of words are less than 3, then return
return
first, second = words[0], words[1] # first and second word
# Creating the prefix dictionary that maps a pair (2-tuple) of words to a list of words which follow that pair in the text.
for i in range(2, numWords):
currWord = words[i] # current word
if not (
(first, second) in self.prefDict
): # If that pair is not in the dictionary already
self.prefDict[(first, second)] = []
# Adding the current word to the list of words of the current pair
self.prefDict[(first, second)].append(currWord)
first = second # Updating the first member of the pair
second = currWord # Updating the second member of the pair
# Creating the frequency distribution for the list of words for each pair
self._createFreqDist()
@handleError
def generateText(self, n: int, startWord=""):
"""
Creates random text based on the triplets contained in the prefix dictionary.
Args:
n(int): Number of words of the text to generate
startWord(str)?: Starting word of the text to generate
"""
pairs = list(self.prefDict.keys()) # List of all the pairs of words
currPair = () # The current pair in the text
if not startWord:
# If start word is not provided
currPair = choices(pairs)[0] # choosing any random pair as the current pair
else:
# If start word is provided
# Finding list of pairs having start word as first member
startList = list(filter(lambda p: p[0] == startWord, pairs))
if len(startList) == 0:
# If there is no pair found, we can't make the text
raise Exception("Unable to produce text with the specified start word.")
# Choosing any random pair from the found pairs as the current pair
currPair = choices(startList)[0]
if n == 1:
# If only one word needs to be generated
print(currPair[0])
return
# Building the final text generated
text = "{first} {second}".format(first=currPair[0], second=currPair[1])
wordCount = 2 # Current number of words added to the final text
# Looping till number of words in text are less than n
while wordCount < n:
text += " " # Adding space character b/w the words
if currPair in self._freqDist:
# If currPair of word in present in the dictionary
# Generating a random next word from the list of next words of currPair and based on the weights(frequencies) of the next word
nextWord = choices(
self._freqDist[currPair][0], self._freqDist[currPair][1]
)[0]
text += nextWord # Appending that word to the text
currPair = (currPair[1], nextWord) # Updating the current pair of words
wordCount += 1 # Updating the word count
else:
# If current pair of word in not present in the dictionary
currPair = choices(pairs)[
0
] # Choosing any random pair of word as the next word
if wordCount != n - 1:
# If it is not the last word to be added to the text
text += currPair[0] + " " + currPair[1] # Appending both the words
wordCount += 2 # Updating the word count
else:
# If it is the last word to be added to the text
text += currPair[0] # Appending the word to the text
wordCount += 1 # Updating the word count
# Printing the final random text generated
print(text)
if __name__ == "__main__":
tg = TextGenerator()
tg.assimilateText("sherlock.txt")
# Sample Test Case 1
tg.generateText(100)
print()
# Sample Test Case 2
tg.generateText(100, "London")
print()
# Sample Test Case 3
tg.generateText(50, "Wedge")
print()