-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathword_collocation_using_mean_variance.py
More file actions
65 lines (58 loc) · 1.76 KB
/
word_collocation_using_mean_variance.py
File metadata and controls
65 lines (58 loc) · 1.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
""" This Pyhton program aims to identify if the two words in a set of sentences are
fixed collocation, flexible collocation or not a collocation using mean and variance method"""
import re
import math
from nltk.tokenize import word_tokenize
def difference(main_list,key_1,key_2):
diff= []
temp= []
num1= 0
num2= 0
mu=0
list_1= []
for item in main_list:
for string in item:
list_1= word_tokenize(string)
n= len(list_1)
#End of string splitting
for i in range(len(list_1)):
for j in range(len(list_1)):
if list_1[i]==key_1[0]:
num1= i
elif list_1[j]==key_2[0]:
num2= j
x= num1-num2
diff.append(abs(x))
diff_len= len(diff)
#formula for collocation
mu= sum(diff)/diff_len
for i in range(diff_len):
p= abs(diff[i]- mu)
x = math.pow(p,2)
temp.append(x)
s= math.sqrt(sum(temp)/diff_len)
if s>0 and mu !=0:
print("It is a flexible collocation.")
elif s==0:
print("It is a fixed collocation.")
else:
print("It is not a collocation.")
return print("The value of µ is:",mu,"\n","The value of S is:",s)
main_list= []
n= int(input())
for i in range(0,n):
aa= [input()]
main_list.append(aa)
key1= int(input())
key2= input()
difference(main_list,key1,key2)
"""
Input values used for the example:
main_list= [["knocked on the door"],['knocked at the door'],["knocked on John's door."],['knocked on the metal front door']]
key1= ['knocked']
key2= ['door']
Output:
It is a flexible collocation.
The value of µ is: 4.0
The value of S is: 1.2909944487358056
"""