Commit f8f7f514 authored by Lupo Pasini, Massimiliano's avatar Lupo Pasini, Massimiliano
Browse files

Merge branch 'tensors_to_device' into 'master'

Tensors to device

See merge request accelerated_training/accelerated_deeplearning_training!13
parents b20dd4d8 aa120870
dataset: 'MNIST'
number_runs: 2
dataset: 'nonlinear'
verbose: False
display: True
subsample: 0.1
classification: True
model: 'CNN'
subsample: 0.99
classification: False
model: 'MLP'
neurons: 64
layers: 3
activation: 'RELU'
......@@ -12,7 +13,7 @@ optimizer: 'SGD'
epochs: 50
learning_rate: 0.001
threshold: 1e-8
batch: 64
batch: 10
penalization: 0.0
acceleration: 'ANDERSON'
history_depth: 5
......
......@@ -7,14 +7,15 @@
Usage:
main.py (-h | --help)
main.py [-c CONFIG_FILE] [--verbose] [--display] [--dataset] [--subsample] [--classification] [--model] [--neurons] [--layers] [-a ACTIVATION] [-b BIAS]
main.py [-c CONFIG_FILE] [--number_runs] [--verbose] [--display] [--dataset] [--subsample] [--classification] [--model] [--neurons] [--layers] [-a ACTIVATION] [-b BIAS]
[--optimizer] [-e EPOCHS] [-l LEARNING_RATE] [--threshold] [--batch] [-p PENALIZATION] [--acceleration] [-d DEPTH] [-w WAIT_ITERATIONS] [-f FREQUENCY]
[-s STORE_EACH] [-r REGULARIZATION] [--relaxation]
Options:
-h, --help Show this screen
--number_runs Number of runs, each one using a different fixed random seed
--version Show version
--verbose Show version
--verbose Level of verbosity
--display Use matplotlib to plot results
-c, --config=<str> Filename containing configuration parameters
--dataset Dataset used for training. GRADUATE_ADMISSION, MNIST, CIFAR10 [default: MNIST]
......@@ -54,10 +55,12 @@ sys.path.append("./modules")
from modules.NN_models import MLP, CNN2D
from modules.optimizers import FixedPointIteration, DeterministicAcceleration
from utils.dataloaders import generate_dataloaders
from matplotlib.pyplot import cm
import numpy
from utils.gpu_detection import get_gpu
plt.rcParams.update({'font.size': 16})
def merge_args(cmdline_args, config_args):
for key in config_args.keys():
if key not in cmdline_args:
......@@ -99,6 +102,7 @@ def get_options():
if __name__ == '__main__':
config = get_options()
number_runs = int(config['number_runs'])
verbose = bool(config['verbose'])
classification_problem = bool(config['classification'])
......@@ -131,6 +135,11 @@ if __name__ == '__main__':
reg_acc = float(config['regularization'])
relaxation = float(config['relaxation'])
# The only reason why I do this workaround (not necessary now) is because
# I am thinking to the situation where one MPI process has multiple gpus available
# In that case, the argument passed to get_gpu may be a numberID > 0
available_device = get_gpu(0)
# Generate dataloaders for training and validation
(
input_dim,
......@@ -139,125 +148,80 @@ if __name__ == '__main__':
validation_dataloader,
) = generate_dataloaders(dataset_name, subsample_factor, batch_size)
# Define deep learning model
if model_name == 'mlp':
model_classic = MLP(
input_dim,
output_dim,
num_neurons_list,
use_bias,
activation,
classification_problem,
)
elif model_name == 'cnn':
model_classic = CNN2D(
input_dim,
output_dim,
num_neurons_list,
use_bias,
activation,
classification_problem,
)
else:
raise RuntimeError('Model type not recognized')
color = cm.rainbow(numpy.linspace(0, 1, number_runs))
model_anderson = deepcopy(model_classic)
for iteration in range(0, number_runs):
# For classification problems, the loss function is the negative log-likelihood (nll)
# For regression problems, the loss function is the mean squared error (mse)
if classification_problem:
loss_function_name = 'nll'
else:
loss_function_name = 'mse'
torch.manual_seed(iteration)
# Define the standard optimizer which is used as point of reference to assess the improvement provided by the
# acceleration
optimizer_classic = FixedPointIteration(
training_dataloader,
validation_dataloader,
learning_rate,
weight_decay,
verbose,
)
# Define deep learning model
if model_name == 'mlp':
model_classic = MLP(input_dim,output_dim,num_neurons_list,use_bias,activation,classification_problem,available_device)
elif model_name == 'cnn':
model_classic = CNN2D(input_dim,output_dim,num_neurons_list,use_bias,activation,classification_problem,available_device)
else:
raise RuntimeError('Model type not recognized')
optimizer_classic.import_model(model_classic)
optimizer_classic.set_loss_function(loss_function_name)
optimizer_classic.set_optimizer(optimizer_name)
model_anderson = deepcopy(model_classic)
(
training_classic_loss_history,
validation_classic_loss_history,
) = optimizer_classic.train(epochs, threshold, batch_size)
# For classification problems, the loss function is the negative log-likelihood (nll)
# For regression problems, the loss function is the mean squared error (mse)
loss_function_name = 'nll' if classification_problem else 'mse'
optimizer_anderson = DeterministicAcceleration(
training_dataloader,
validation_dataloader,
acceleration,
learning_rate,
relaxation,
weight_decay,
wait_iterations,
history_depth,
frequency,
reg_acc,
store_each_nth,
verbose,
)
optimizer_anderson.import_model(model_anderson)
optimizer_anderson.set_loss_function(loss_function_name)
optimizer_anderson.set_optimizer(optimizer_name)
# Define the standard optimizer which is used as point of reference to assess the improvement provided by the
# acceleration
optimizer_classic = FixedPointIteration(training_dataloader,validation_dataloader,learning_rate,weight_decay,verbose)
(
training_anderson_loss_history,
validation_anderson_loss_history,
) = optimizer_anderson.train(epochs, threshold, batch_size)
if config['display']:
epochs1 = range(1, len(training_classic_loss_history) + 1)
epochs2 = range(1, len(training_anderson_loss_history) + 1)
plt.figure(1)
plt.plot(
epochs1,
optimizer_classic.import_model(model_classic)
optimizer_classic.set_loss_function(loss_function_name)
optimizer_classic.set_optimizer(optimizer_name)
(
training_classic_loss_history,
color='b',
linestyle='-',
label='training loss - Fixed Point',
)
plt.plot(
epochs2,
training_anderson_loss_history,
color='r',
linestyle='-',
label='training loss - Anderson',
)
plt.yscale('log')
plt.title('Training loss function')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.draw()
plt.savefig('training_loss_plot')
plt.figure(2)
plt.plot(
epochs1,
validation_classic_loss_history,
color='b',
linestyle='-',
label='validation loss - Fixed Point',
validation_classic_accuracy_history,
) = optimizer_classic.train(epochs, threshold, batch_size)
optimizer_anderson = DeterministicAcceleration(
training_dataloader,
validation_dataloader,
acceleration,
learning_rate,
relaxation,
weight_decay,
wait_iterations,
history_depth,
frequency,
reg_acc,
store_each_nth,
verbose,
)
plt.plot(
epochs2,
optimizer_anderson.import_model(model_anderson)
optimizer_anderson.set_loss_function(loss_function_name)
optimizer_anderson.set_optimizer(optimizer_name)
(
training_anderson_loss_history,
validation_anderson_loss_history,
color='r',
linestyle='-',
label='validation loss - Anderson',
)
plt.yscale('log')
plt.title('Validation loss function')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.draw()
plt.savefig('validation_loss_plot')
validation_anderson_accuracy_history,
) = optimizer_anderson.train(epochs, threshold, batch_size)
if config['display']:
epochs1 = range(1, len(training_classic_loss_history) + 1)
epochs2 = range(1, len(training_anderson_loss_history) + 1)
if len(validation_classic_accuracy_history) > 0:
plt.plot(epochs1,validation_classic_accuracy_history,color=color[iteration],linestyle='-')
plt.plot(epochs2,validation_anderson_accuracy_history,color=color[iteration],linestyle='--')
else:
plt.plot(epochs1,validation_classic_loss_history,color=color[iteration],linestyle='-')
plt.plot(epochs2,validation_anderson_loss_history,color=color[iteration],linestyle='--')
plt.yscale('log')
plt.title('Validation loss function')
plt.xlabel('Epochs')
plt.ylabel('Loss')
#plt.legend()
plt.draw()
plt.savefig('validation_loss_plot')
plt.tight_layout()
......@@ -12,6 +12,8 @@ def activation_function(name_activation):
return torch.nn.Softmax()
elif name_activation == 'tanh':
return torch.nn.Tanh()
elif name_activation == 'leakyrelu':
return torch.nn.LeakyReLU(1e-2)
else:
raise ValueError("Activation function not recognized")
......@@ -20,7 +22,7 @@ def activation_function(name_activation):
class NeuralNetwork(torch.nn.Module, metaclass=ABCMeta):
def __init__(self, input_dim: int, output_dim: int, num_neurons_list: list, use_bias: bool, activation: str,
classification: bool, device='cpu'):
classification: bool, device=torch.device('cpu')):
"""
:param input_dim: :type int
......@@ -32,7 +34,8 @@ class NeuralNetwork(torch.nn.Module, metaclass=ABCMeta):
"""
super().__init__()
self.device = torch.device(device)
assert isinstance(device, torch.device)
self.device = device
assert isinstance(input_dim, int) or isinstance(input_dim, tuple)
self.input_dim = input_dim
......@@ -118,7 +121,7 @@ class NeuralNetwork(torch.nn.Module, metaclass=ABCMeta):
class MLP(NeuralNetwork, ABC):
def __init__(self, input_dim: int, output_dim: int, num_neurons_list: list, use_bias: bool, activation: str,
classification=False, device='cpu'):
classification=False, device=torch.device('cpu')):
"""
:param input_dim: :type int
......@@ -151,11 +154,14 @@ class MLP(NeuralNetwork, ABC):
# Multilayer perceptron
self.model = torch.nn.Sequential(*self.layers)
# map the constructed model to the device it is assigned to
self.model.to(self.device)
class CNN2D(NeuralNetwork, ABC):
def __init__(self, input_dim: int, output_dim: int, num_neurons_list: list, use_bias: bool, activation: str,
classification=False, device='cpu', **kwargs):
classification=False, device=torch.device('cpu'), **kwargs):
"""
:param input_dim: :type int
......@@ -163,7 +169,8 @@ class CNN2D(NeuralNetwork, ABC):
:param num_neurons_list: :type list
:param use_bias: :type bool
"""
super(CNN2D, self).__init__(input_dim, output_dim, num_neurons_list, use_bias, activation, classification, device,
super(CNN2D, self).__init__(input_dim, output_dim, num_neurons_list, use_bias, activation, classification,
device,
**kwargs)
self.kernel_size_list = kwargs.get("kernel_size_list", None)
......@@ -228,3 +235,6 @@ class CNN2D(NeuralNetwork, ABC):
# Convolutional neural network
self.model = torch.nn.Sequential(*self.layers)
# map the constructed model to the device it is assigned to
self.model.to(self.device)
......@@ -79,6 +79,7 @@ class FixedPointIteration(object):
self.training_loss_history = []
self.validation_loss_history = []
self.validation_accuracy = []
while epoch_counter < num_epochs and value_loss > threshold:
......@@ -86,7 +87,6 @@ class FixedPointIteration(object):
# Training
for batch_idx, (data, target) in enumerate(self.training_dataloader):
self.accelerate()
data, target = (data.to(self.model.get_device()),target.to(self.model.get_device()))
self.optimizer.zero_grad()
output = self.model.forward(data)
......@@ -111,6 +111,7 @@ class FixedPointIteration(object):
train_loss = loss.item()
self.training_loss_history.append(train_loss)
self.accelerate()
# Validation
with torch.no_grad():
......@@ -121,43 +122,39 @@ class FixedPointIteration(object):
for batch_idx, (data, target) in enumerate(self.validation_dataloader):
count_val = count_val + 1
data, target = (
data.to(self.model.get_device()),
target.to(self.model.get_device()),
)
data, target = (data.to(self.model.get_device()),target.to(self.model.get_device()))
output = self.model.forward(data)
loss = self.criterion(output, target)
val_loss = val_loss + loss
"""
pred = output.argmax(
dim=1, keepdim=True
) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()
"""
if self.loss_name == 'nll':
pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()
val_loss = val_loss / count_val
self.validation_loss_history.append(val_loss)
"""
self.print_verbose(
'\n Epoch: '
+ str(epoch_counter)
+ ' - Training Loss: '
+ str(train_loss)
+ ' - Validation - Loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
val_loss,
correct,
len(self.validation_dataloader.dataset),
100.0 * correct / len(self.validation_dataloader.dataset),
if self.loss_name == 'nll':
self.print_verbose(
'\n Epoch: '
+ str(epoch_counter)
+ ' - Training Loss: '
+ str(train_loss)
+ ' - Validation - Loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
val_loss,
correct,
len(self.validation_dataloader.dataset),
100.0 * correct / len(self.validation_dataloader.dataset),
)
)
)
self.print_verbose("###############################")
"""
self.print_verbose("###############################")
value_loss = val_loss
epoch_counter = epoch_counter + 1
return self.training_loss_history, self.validation_loss_history
return self.training_loss_history, self.validation_loss_history, self.validation_accuracy
def set_loss_function(self, criterion_string):
......
......@@ -30,7 +30,7 @@ def linear_regression(slope, intercept, num_points, optimizer_str):
optimizer_classic.import_model(model)
optimizer_classic.set_loss_function('mse')
optimizer_classic.set_optimizer(optimizer_str)
training_classic_loss_history, validation_classic_loss_history = optimizer_classic.train(epochs, threshold, batch_size)
training_classic_loss_history, validation_classic_loss_history, _ = optimizer_classic.train(epochs, threshold, batch_size)
weights = list(model.get_model().parameters())
......@@ -61,7 +61,7 @@ def linear_regression_anderson(slope, intercept, num_points, optimizer_str):
optimizer_anderson.import_model(model)
optimizer_anderson.set_loss_function('mse')
optimizer_anderson.set_optimizer(optimizer_str)
training_anderson_loss_history, validation_anderson_loss_history = optimizer_anderson.train(epochs, threshold, batch_size)
training_anderson_loss_history, validation_anderson_loss_history, _ = optimizer_anderson.train(epochs, threshold, batch_size)
weights = list(model.get_model().parameters())
......
......@@ -36,7 +36,7 @@ def neural_network_linear_regression(slope, intercept, num_points, optimizer_str
optimizer_classic.import_model(model)
optimizer_classic.set_loss_function('mse')
optimizer_classic.set_optimizer(optimizer_str)
training_classic_loss_history, validation_classic_loss_history = optimizer_classic.train(epochs, threshold, batch_size)
training_classic_loss_history, validation_classic_loss_history, _ = optimizer_classic.train(epochs, threshold, batch_size)
weights = list(model.get_model().parameters())
......@@ -66,14 +66,11 @@ def neural_network_linear_regression_anderson(slope, intercept, num_points, opti
validation_dataloader = torch.utils.data.DataLoader(dataset, batch_size)
model = MLP(inputDim,outputDim,num_neurons_list,use_bias,activation,classification_problem)
optimizer_anderson = DeterministicAcceleration(training_dataloader,validation_dataloader,'anderson',learning_rate,relaxation,weight_decay,wait_iterations,history_depth,
frequency,reg_acc,store_each_nth)
optimizer_anderson = DeterministicAcceleration(training_dataloader,validation_dataloader,'anderson',learning_rate,relaxation,weight_decay,wait_iterations,history_depth,frequency,reg_acc,store_each_nth)
optimizer_anderson.import_model(model)
optimizer_anderson.set_loss_function('mse')
optimizer_anderson.set_optimizer(optimizer_str)
training_anderson_loss_history, validation_anderson_loss_history = optimizer_anderson.train(
epochs, threshold, batch_size
)
training_anderson_loss_history, validation_anderson_loss_history, _ = optimizer_anderson.train(epochs, threshold, batch_size)
weights = list(model.get_model().parameters())
......
......@@ -28,10 +28,10 @@ def anderson(X, relaxation=1.0):
if relaxation!=1:
assert relaxation>0, "relaxation must be positive"
# compute solution of the contraint optimization problem s.t. gamma = X[:,1:]@alpha
alpha = torch.zeros(gamma.numel()+1)
alpha = torch.zeros(gamma.numel()+1).to(DX.device)
alpha[0] = gamma[0]
alpha[1:-1] = gamma[1:] - gamma[:-1]
alpha[-1] = 1 - gamma[-1]
extr = relaxation*extr + (1-relaxation)*X[:,:-1]@alpha
return extr
\ No newline at end of file
return extr
......@@ -2,6 +2,7 @@ import os
import numpy
import torch
import random
import math
from torch.utils.data import Dataset
from GraduateAdmission import GraduateAdmission
from torchvision import transforms, datasets
......@@ -9,8 +10,8 @@ from torch.utils.data.sampler import SubsetRandomSampler
def get_indices_regression(dataset, subset_portion):
num_elements= int(subset_portion * (len(dataset.targets) ))
indices = random.sample(list(range(0,len(dataset.targets))), num_elements)
num_elements = int(subset_portion * (len(dataset.targets)))
indices = random.sample(list(range(0,len(dataset.targets))),num_elements)
return indices
......@@ -24,7 +25,7 @@ def get_indices_classification(dataset, subset_portion):
for i in range(len(dataset.targets)):
if dataset.targets[i] == j:
indices_class.append(i)
subset = random.sample(indices_class, num_elements_per_class)
subset = random.sample(indices_class,num_elements_per_class)
indices.extend(subset)
return indices
......@@ -49,13 +50,6 @@ def linear_regression(slope, intercept, n: int = 10):
class LinearData(Dataset):
def __init__(self, slope, intercept, num_points: int = 10):
super(LinearData, self).__init__()
"""
Args:
csv_file (string): Path to the csv file with annotations.
root_dir (string): Directory with all the images.
transform (callable, optional): Optional transform to be applied
on a sample.
"""
self.slope = slope
self.intercept = intercept
......@@ -85,21 +79,77 @@ class LinearData(Dataset):
def linear_data(slope, intercept, num_points: int = 10):
input_dim = 1
output_dim = 1
return (input_dim,output_dim,LinearData(slope, intercept, num_points=num_points))
return input_dim, output_dim, LinearData(slope, intercept, num_points=num_points)
###############################################################################
def nonlinear_regression(n: int = 10):
# create dummy data for training
x_values = numpy.linspace(-1.0, +1.0, num=n)
x_train = numpy.array(x_values, dtype=numpy.float32)
x_train = x_train.reshape(-1, 1)
y_values = [(math.sin(math.pi*i))*(1+i) for i in x_values]
y_train = numpy.array(y_values, dtype=numpy.float32)
y_train = y_train.reshape(-1, 1)
return x_train, y_train
class NonLinearData(Dataset):
def __init__(self, num_points: int = 10):
super(NonLinearData, self).__init__()
self.num_points = num_points
x_sample, y_sample = nonlinear_regression(self.num_points)
self.x_sample = x_sample
self.y_values = y_sample
self.y_values = numpy.reshape(self.y_values, (len(self.y_values), 1))
def __len__(self):
return self.y_values.shape[0]
def __getitem__(self, index):
x_sample = self.x_sample[index, :]
y_sample = self.y_values[index]
# Doubles must be converted to Floats before passing them to a neural network model
x_sample = torch.from_numpy(x_sample).float()
y_sample = torch.from_numpy(y_sample).float()
return x_sample, y_sample
def nonlinear_data(num_points: int = 10):
input_dim = 1
output_dim = 1
return input_dim, output_dim, NonLinearData(num_points=num_points)
###############################################################################
def graduate_admission_data():
input_dim = 7
output_dim = 1
return (input_dim,output_dim,GraduateAdmission('graduate_admission.csv', './datasets/', transform=True))
return input_dim, output_dim, GraduateAdmission('graduate_admission.csv', './datasets/',transform=True)
###############################################################################
def mnist_data(subsample_factor, rand_rotation=False, max_degree=90):
if rand_rotation == True:
if rand_rotation:
compose = transforms.Compose([transforms.Resize(28),transforms.RandomRotation(max_degree),transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))])
else:
compose = transforms.Compose([transforms.Resize(28),transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))])
out_dir = '{}/datasets'.format(os.getcwd())
input_dim = (1, 28, 28)
output_dim = int(10)
......@@ -109,11 +159,15 @@ def mnist_data(subsample_factor, rand_rotation=False, max_degree=90):
return input_dim, output_dim, train_dataset, test_dataset
###############################################################################
def cifar10_data(subsample_factor, rand_rotation=False, max_degree=90):
if rand_rotation == True:
if rand_rotation:
compose = transforms.Compose([transforms.Resize(32),transforms.RandomRotation(max_degree),transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
else:
compose = transforms.Compose([transforms.Resize(32),transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
out_dir = '{}/datasets'.format(os.getcwd())
input_dim = (3, 32, 32)
output_dim = int(10)
......@@ -123,11 +177,15 @@ def cifar10_data(subsample_factor, rand_rotation=False, max_degree=90):