diff --git a/stopsigncnn.py b/stopsigncnn.py new file mode 100644 index 0000000..6512b94 --- /dev/null +++ b/stopsigncnn.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- +"""stopSignCNN.ipynb + +Automatically generated by Colaboratory. + +Original file is located at + https://colab.research.google.com/drive/1BLR4DHO2qn7DOEyZ-ysSMOTFPWsSZWf7 +""" + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torchvision +from torch.utils.data.sampler import SubsetRandomSampler +import torchvision.transforms as transforms +import matplotlib.pyplot as plt +import os +import time + +#mount googledrive +'''from google.colab import drive +drive.mount('/content/gdrive',force_remount=True)''' + +#More helper functions +#Transforms: +''' compose , torchvision.transforms.RandomChoice(transforms) +color jitter +figure out what random affine means. +random perspective +random rotation +LinearTransformation Chandra was talking about? +RandomErasing +''' +np.random.seed(None) +U1 = np.random.random() +U2 = np.random.random() +U3 = np.random.random() +U4 = np.random.random() + +transformations = []; + +#color jitters + +t1 = torchvision.transforms.ColorJitter(brightness=U1, contrast=U2, saturation=U3, hue=0.1) +t2 = torchvision.transforms.ColorJitter(brightness=U2, contrast=U3, saturation=U4, hue=0.2) +t3 = torchvision.transforms.ColorJitter(brightness=U3, contrast=U4, saturation=U1, hue=0.3) +t4 = torchvision.transforms.ColorJitter(brightness=U4, contrast=U1, saturation=U2, hue=0.4) + +jitterList = [t1,t2,t3,t4]; + +#No Transformation +tn = torchvision.transforms.Resize((200,200),interpolation=2) + +#Rotations +degrees = (-30,30) +t5 = torchvision.transforms.RandomRotation(degrees, resample=False, expand=False, center=None, fill=0) + +#Random Perspectives +t6 = torchvision.transforms.RandomPerspective(distortion_scale=0.5, p=1, interpolation=3, fill=0) + +#Affine (basically a shear) +t7 = torchvision.transforms.RandomAffine(degrees, translate=None, scale=None, shear=degrees, resample=False, fillcolor=0) + +#compose +transformationsList = [t5,t6,t7,tn]; # now adding t8 to test + +#Creating two sets of transformation for images +randomJitter = torchvision.transforms.RandomChoice(jitterList); +randomTransformation = torchvision.transforms.RandomChoice(transformationsList); + +transformations.append(randomJitter) +transformations.append(randomTransformation) + +#Path to Dataset +master_path = '--------- INSERT LOCAL PATH OF DATASET HERE --------' + +#Helper Functions +def get_relevant_indices(dataset, classes, target_classes): + indices = [] + for i in range(len(dataset)): + # Check if the label is in the target classes + label_index = dataset[i][1] # ex: 3 + label_class = classes[label_index] # ex: 'cat' + if label_class in target_classes: + indices.append(i) + return indices + + +def get_data_loader(target_classes, batch_size): #TRANSFORMATIONS APPLIED HERE + transform = transforms.Compose([transforms.Resize((200,200),interpolation=2),randomJitter,randomTransformation, + transforms.ToTensor()]) + + + # classes are folders in each directory with these names + classes = ["stopSign","notStopSign"] + + #Creating the entire Training Dataset + trainset = torchvision.datasets.ImageFolder(master_path, transform=transform) + + #Getting the indices for training set inorder to split to validation and training + relevant_indices = get_relevant_indices(trainset,classes,target_classes) + + #Split into train and validation + np.random.seed(1000) + np.random.shuffle(relevant_indices) + split = int(len(relevant_indices) * 0.70) #split at 70% + + '''Currently do not have sufficient data for a test set''' + + #split into train and validation indices + relevant_train_indices, relevant_val_indices = relevant_indices[:split], relevant_indices[split:] + train_sampler = SubsetRandomSampler(relevant_train_indices) + train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, + num_workers=1, sampler=train_sampler) + val_sampler = SubsetRandomSampler(relevant_val_indices) + val_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, + num_workers=1, sampler=val_sampler) + + return train_loader, val_loader, classes +train_loader, val_loader, classes = get_data_loader(target_classes=["stopSign","notStopSign"],batch_size=8) + +def get_accuracy(model, data_loader): + correct = 0 + total = 0 + for imgs, labels in data_loader: + + if use_cuda and torch.cuda.is_available(): + imgs = imgs.cuda() + labels = labels.cuda() + + output = model(imgs) + #select index with maximum prediction score + pred = output.max(1, keepdim=True)[1] + correct += pred.eq(labels.view_as(pred)).sum().item() + total += imgs.shape[0] + return correct / total + +# Training Curve +def plot_training_curve(path): + """ Plots the training curve for a model run, given the csv files + containing the train/validation error/loss. + Args: + path: The base path of the csv files produced during training + """ + + train_acc = np.loadtxt("{}_train_acc.csv".format(path)) + val_acc = np.loadtxt("{}_val_acc.csv".format(path)) + plt.title("Train vs Validation Accuracy") + n = len(train_acc) # number of epochs + plt.plot(range(1,n+1), train_acc, label="Train") + plt.plot(range(1,n+1), val_acc, label="Validation") + plt.xlabel("Epoch") + plt.ylabel("Accuracy") + plt.legend(loc='best') + plt.show() + +#use_cuda = True +#train(CNNClassifier(), train_loader, val_loader, batch_size=8, num_epochs=10, learn_rate=0.00025) + +k = 0 +for images, labels in train_loader: + # since batch_size = 1, there is only 1 image in `images` + image = images[0] + # place the colour channel at the end, instead of at the beginning + img = np.transpose(image, [1,2,0]) + # normalize pixel intensity values to [0, 1] + img = img / 2 + 0.5 + plt.subplot(10, 10, k+1) + plt.axis('off') + plt.imshow(img) + + k += 1 + if k > 99: + break + +class stopSignCNN(nn.Module): + def __init__(self): + super(stopSignCNN, self).__init__() + self.name = "stopSignCNN" + self.conv1 = nn.Conv2d(3, 5, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(5, 10, 5) + self.fc1 = nn.Linear(10 * 47 * 47, 220) + self.fc2 = nn.Linear(220, 2) #number of things to classify + + def forward(self, img): + x = self.pool(F.relu(self.conv1(img))) + x = self.pool(F.relu(self.conv2(x))) + x = x.view(-1, 10 * 47 * 47) + x = F.relu(self.fc1(x)) + x = self.fc2(x) + return x + +use_cuda = True +train(stopSignCNN(), train_loader, val_loader, batch_size=8, num_epochs=10, learn_rate=0.00025) \ No newline at end of file