-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathRandomForestClassifier.py
More file actions
111 lines (86 loc) · 5.41 KB
/
RandomForestClassifier.py
File metadata and controls
111 lines (86 loc) · 5.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.grid_search import GridSearchCV
import ClassifierHelperFunctions as hf
def run_random_forest_classifier(sports=['Badminton', 'Basketball', 'Foosball', 'Running', 'Skating', 'Walking'],
featuresFile='../Data/featuresFinal.csv',
labelsFile='../Data/labelsFinal.csv',
freqDims=150,
timeDims=13,
channels=6,
num_pca_components=200,
pca_whiten=True,
algoSwitch=0):
print '\nLoading data...'
data = hf.load_data(featuresFile, labelsFile)
train_features = data['train_features'].reshape((-1, freqDims * timeDims, channels))
train_labels = data['train_labels']
test_features = data['test_features'].reshape((-1, freqDims * timeDims, channels))
test_labels = data['test_labels']
if algoSwitch > 1:
newPerson_data = hf.load_data('../Data/newPersonFeaturesFinal.csv', '../Data/newPersonLabelsFinal.csv',
newPerson=True)
newPerson_test_features = newPerson_data['test_features'].reshape((-1, freqDims * timeDims, channels))
newPerson_test_labels = newPerson_data['test_labels']
print 'Data loaded!'
print 'Training set: ', train_features.shape
print 'Test set: ', test_features.shape
if algoSwitch > 1:
print 'New person test set:', newPerson_test_features.shape[0]
random_forest=RandomForestClassifier()
tuned_parameters = [{'classifier__min_samples_split' : [5,10],
'classifier__criterion' : ['gini','entropy'],
'classifier__n_estimators' : [15,25]}] #prepared the range of parameters to search over for GridSearchCV
print '\nReducing dimensionality using PCA...'
if num_pca_components > freqDims*timeDims:
num_pca_components = freqDims*timeDims
reduced_train_features = np.zeros((train_features.shape[0], num_pca_components, channels))
reduced_test_features = np.zeros((test_features.shape[0], num_pca_components, channels))
if algoSwitch > 1:
reduced_newPerson_test_features = np.zeros((newPerson_test_features.shape[0], num_pca_components, channels))
for c in range(channels):
pca = PCA(n_components=num_pca_components, whiten=pca_whiten)
reduced_train_features[:,:,c] = pca.fit_transform(train_features[:,:,c])
reduced_test_features[:,:,c] = pca.transform(test_features[:,:,c])
if algoSwitch > 1:
reduced_newPerson_test_features[:, :, c] = pca.transform(newPerson_test_features[:, :, c])
print 'Dimensionality reduced!'
print 'Old feature dimensions:', train_features.shape[1]*train_features.shape[2]
print 'New feature dimensions:', reduced_train_features.shape[1]*reduced_train_features.shape[2]
pipeline = Pipeline([("classifier", random_forest)])
pipeline_grid = GridSearchCV(pipeline, tuned_parameters, cv = 5, n_jobs=-1)
train_labels = np.argmax(train_labels, 1)
reduced_train_features=reduced_train_features.reshape((reduced_train_features.shape[0],-1))
reduced_test_features=reduced_test_features.reshape((reduced_test_features.shape[0],-1))
if algoSwitch > 1:
reduced_newPerson_test_features = reduced_newPerson_test_features.reshape((reduced_newPerson_test_features.shape[0], -1))
pipeline_grid.fit(reduced_train_features,train_labels)
print pipeline_grid.best_params_
predictions=pipeline_grid.predict(reduced_test_features)
print predictions.shape
if algoSwitch > 1:
newPerson_predictions = pipeline_grid.predict(reduced_newPerson_test_features)
print newPerson_predictions.shape
print '\nTEST ERRORS (# of errors / # of test examples)'
predicted_vs_truth_count_matrix = np.zeros([len(sports), len(sports)], dtype='int')
for i in range(predictions.shape[0]):
predicted_vs_truth_count_matrix[np.argmax(test_labels, 1)[i], predictions[i]] += 1
for i in range(len(sports)):
num_correctly_predicted = predicted_vs_truth_count_matrix[i,i]
num_actually_were = np.sum(predicted_vs_truth_count_matrix[i, :])
num_wrongly_predicted = num_actually_were - num_correctly_predicted
print sports[i] + ':\t', num_wrongly_predicted, '/', num_actually_were, '\t', predicted_vs_truth_count_matrix[i,:]
if algoSwitch > 1:
print '\n\nTEST ERRORS FOR NEW PERSON (# of errors / # of test examples)'
newPerson_predicted_vs_truth_count_matrix = np.zeros([len(sports), len(sports)], dtype='int')
for i in range(newPerson_predictions.shape[0]):
newPerson_predicted_vs_truth_count_matrix[np.argmax(newPerson_test_labels, 1)[i], newPerson_predictions[i]] += 1
for i in range(len(sports)):
num_correctly_predicted = newPerson_predicted_vs_truth_count_matrix[i, i]
num_actually_were = np.sum(newPerson_predicted_vs_truth_count_matrix[i, :])
num_wrongly_predicted = num_actually_were - num_correctly_predicted
print sports[i] + ':\t', num_wrongly_predicted, '/', num_actually_were, '\t', newPerson_predicted_vs_truth_count_matrix[i, :]