Classification-Machine-Learning-Models/Source Code.py at main · KieranYogaraj15/Classification-Machine-Learning-Models · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#Import the Diabetes Dataset
import pandas as pd

diabetes = pd.read_csv("Diabetes Dataset.csv", sep=",")


#Data Exploration
diabetes.head()
diabetes.shape
diabetes.info() #No missing data

#What is the distribution of the Outcome variable?
import matplotlib.pyplot as plt
plt.bar(["0","1"], diabetes.Outcome.value_counts())

#What is the relationship between Outcome and the other variables?
import seaborn as sn
sn.heatmap(diabetes.corr(), annot=True) #View the correlation between all variables


#Feature Selection
X = diabetes[['Glucose','BMI','Age']]
y = diabetes.Outcome


#Split Dataset into Train and Test datasets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,train_size = 0.8,random_state=0)


#KNN Model
#Building the model
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(3) #K=3
model.fit(X_train,y_train)

y_pred = model.predict(X_test)


#Evaluating the model
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.metrics import classification_report

cm_knn = confusion_matrix(y_test, y_pred)
print(cm_knn)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred, average ='weighted'))
print("Recall:",metrics.recall_score(y_test, y_pred, average = 'weighted'))
print("F1-score:",metrics.f1_score(y_test, y_pred, average = 'weighted'))

report_knn = classification_report(y_test, y_pred)
print(report_knn)


#Parameter Tunning using GridSearchCV
from sklearn.model_selection import GridSearchCV

model = KNeighborsClassifier()
params = {'n_neighbors': range(1,10)}

grs = GridSearchCV(model, param_grid=params, cv=5)
grs.fit(X_train, y_train)

print("Best Hyper Parameters:",grs.best_params_) #The best K is 8

y_pred=grs.predict(X_test)


#Evaluate model with new K
cm_knn = confusion_matrix(y_test, y_pred)
print(cm_knn)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred, average ='weighted'))
print("Recall:",metrics.recall_score(y_test, y_pred, average = 'weighted'))
print("F1-score:",metrics.f1_score(y_test, y_pred, average = 'weighted'))

report_knn = classification_report(y_test, y_pred)
print(report_knn)


#Decision Tree Model
#Building the model
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(random_state=21)

model.fit(X_train,y_train)
y_pred= model.predict(X_test)


#Evaluating the model
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.metrics import classification_report

cm_tree = confusion_matrix(y_test, y_pred)
print(cm_tree)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred, average ='weighted'))
print("Recall:",metrics.recall_score(y_test, y_pred, average = 'weighted'))
print("F1-score:",metrics.f1_score(y_test, y_pred, average = 'weighted'))

report_tree = classification_report(y_test, y_pred)
print(report_tree)


#Fine Tuning the Parameter
from sklearn.model_selection import GridSearchCV

model = DecisionTreeClassifier(random_state=0)

params = {'criterion': ['gini', 'entropy'], 'max_depth': range(1,10)}

grs = GridSearchCV(model, param_grid=params, cv=5)

grs.fit(X_train, y_train)

print("Best Hyper Parameters:",grs.best_params_) #max_depth = 4 and criterion = "gini"

model = grs.best_estimator_
y_pred=model.predict(X_test)


#Evaluating the model with the new parameters
cm_tree = confusion_matrix(y_test, y_pred)
print(cm_tree)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred, average ='weighted'))
print("Recall:",metrics.recall_score(y_test, y_pred, average = 'weighted'))
print("F1-score:",metrics.f1_score(y_test, y_pred, average = 'weighted'))

report_tree = classification_report(y_test, y_pred)
print(report_tree)