-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathKNN.py
More file actions
51 lines (40 loc) · 1.4 KB
/
KNN.py
File metadata and controls
51 lines (40 loc) · 1.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import pickle
np.random.seed(0)
FILENAME = 'movie_metadata_filtered_aftercsv.csv'
THRESHOLD_PREDICTION = 1
def _make_in_format(filename):
datadf = pd.read_csv(filename)
#separate classes and stuffs
y = np.array(datadf['imdb_score'])
datadf = datadf.drop(datadf.columns[[0,9]],axis=1)
#normalize
datadf = (datadf-datadf.mean())/(datadf.max()-datadf.min())
X = np.array(datadf)
return X,y
def _pickle_it(model,filename):
a = pickle.dumps(model)
write_file = open('models/'+filename,'w')
write_file.write((str)(a))
def accuracy_score(y_test,predictions):
correct = 0
for i in range(len(y_test)):
if y_test[i]>=predictions[i]-THRESHOLD_PREDICTION and y_test[i]<=predictions[i]+THRESHOLD_PREDICTION:
correct+=1
accuracy = correct*1.0/len(y_test)
return accuracy
def Knn():
X,y = _make_in_format(FILENAME)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=0)
model = KNeighborsClassifier(algorithm='ball_tree')
model.fit(X_train,y_train)
predictions = model.predict(X_test)
_pickle_it(model,"Knn_thre1")
print("knn score ",accuracy_score(y_test,predictions)*100)
def main():
Knn()
if __name__ == '__main__':
main()