Skip to content
Snippets Groups Projects
Commit 32c466a9 authored by Massimiliano Lupo Pasini's avatar Massimiliano Lupo Pasini
Browse files

subsampler options added

parent f1ccec05
No related branches found
No related tags found
1 merge request!11Subsampler and qr
This commit is part of merge request !11. Comments created here will be created in the context of that merge request.
dataset: 'MNIST'
verbose: False
display: True
subsample: 0.1
classification: True
model: 'CNN'
neurons: 10
layers: 2
neurons: 64
layers: 3
activation: 'RELU'
bias: True
optimizer: 'ADAM'
epochs: 200
learning_rate: 0.01
threshold: 0.0001
batch: 10
optimizer: 'SGD'
epochs: 50
learning_rate: 0.001
threshold: 1e-8
batch: 64
penalization: 0.0
acceleration: 'ANDERSON'
history_depth: 5
wait_iterations: 1
frequency: 1
store_each_nth: 1
regularization: 0.0
display: True
relaxation: 0.1
......@@ -7,33 +7,37 @@
Usage:
main.py (-h | --help)
main.py [-c CONFIG_FILE] [--display] [--dataset] [--classification] [--model] [--neurons] [--layers] [-a ACTIVATION] [-b BIAS]
[--optimizer] [-e EPOCHS] [-l LEARNING_RATE] [--threshold] [--batch] [-p PENALIZATION] [-d DEPTH] [-w WAIT_ITERATIONS] [-f FREQUENCY]
[-s STORE_EACH] [-r REGULARIZATION]
main.py [-c CONFIG_FILE] [--verbose] [--display] [--dataset] [--subsample] [--classification] [--model] [--neurons] [--layers] [-a ACTIVATION] [-b BIAS]
[--optimizer] [-e EPOCHS] [-l LEARNING_RATE] [--threshold] [--batch] [-p PENALIZATION] [--acceleration] [-d DEPTH] [-w WAIT_ITERATIONS] [-f FREQUENCY]
[-s STORE_EACH] [-r REGULARIZATION] [--relaxation]
Options:
-h, --help Show this screen
--version Show version
--verbose Show version
--display Use matplotlib to plot results
-c, --config=<str> Filename containing configuration parameters
--dataset Dataset used for training. GRADUATE_ADMISSION, MNIST, CIFAR10 [default: MNIST]
--subsample Number going 0 through 1 for the percentage of the original data used [default: 1.0]
--classification Type of problem: classification or regression
--model Implementation of NN model. Multi-layer perceptrons NN (MLP), convolutional NN (CNN)
--neurons Number of neurons per layer
--layers Number of hidden layers
-a, --activation=<str> Type of activation function [default: RELU]
-b, --bias Use bias in the regression model [default: True]
--optimizer Optimizer name
-e, --epochs=<n> Number of epochs [default: 100]
--optimizer Optimizer name [default: SGD]
-e, --epochs=<n> Number of epochs [default: 1]
-l, --learning_rate=<f> Learning rate [default: 0.01]
--threshold Stopping criterion for the training
--batch Size of the batch for the optimizer
--threshold Stopping criterion for the training [default: 1e-4]
--batch Size of the batch for the optimizer [default: 1]
--acceleration Type of accelerarion performed. ANDERSON, RNA [default: ANDERSON]
-p, --penalization=<f> Weight decay for the L2 penalization during the training of the neural network [default: 0.0]
-d, --history_depth=<m> Depth of window history for anderson [default: 5]
-d, --history_depth=<m> Depth of window history for anderson [default: 5]
-w, --wait_iterations=<n> Wait an initial number of classic optimizer iterations before starting with anderson [default: 1]
-f, --frequency=<n> Number of epochs performed between two consecutive anderson accelerations [default: 1]
-s, --store_each_nth=<n> Number of epochs performed between two consecutive storages of the iterations in the columns of matrix R to perform least squares [default: 1]
-r, --regularization=<f> Regularization parameter for L2 penalization for the least squares problem solved to perform anderson acceleration [default: 0.0]
-s, --store_each_nth=<n> Number of epochs performed between two consecutive storages of the iterations in the columns of matrix R to perform least squares [default: 1]
-r, --regularization=<f> Regularization parameter for L2 penalization for the least squares problem solved to perform the acceleration [default: 0.0]
--relaxation relaxation parameter to mix the past iterate with the acceleration update and generate the new iterate [default: 0.1]
"""
from docopt import docopt
......@@ -49,7 +53,7 @@ sys.path.append("./utils")
sys.path.append("./modules")
from modules.NN_models import MLP, CNN2D
from modules.optimizers import FixedPointIteration, DeterministicAcceleration
from utils.dataloaders import graduate_admission_data, mnist_data, cifar10_data
from utils.dataloaders import generate_dataloaders
plt.rcParams.update({'font.size': 16})
......@@ -95,8 +99,13 @@ def get_options():
if __name__ == '__main__':
config = get_options()
verbose = bool(config['verbose'])
classification_problem = bool(config['classification'])
# Specify name of the dataset and percentage of the entire data volume to sample
dataset_name = lower(config['dataset'])
subsample_factor = float(config['subsample'])
# Setting for the neural network
model_name = str(lower(config['model']))
num_neurons = int(config['neurons'])
......@@ -113,72 +122,142 @@ if __name__ == '__main__':
batch_size = int(config['batch'])
weight_decay = float(config['penalization'])
# Parameters for RNA optimizer
# Parameters for acceleration
acceleration = str(lower(config['acceleration']))
wait_iterations = int(config['wait_iterations'])
history_depth = int(config['history_depth'])
frequency = int(config['frequency'])
store_each_nth = int(config['store_each_nth'])
reg_acc = float(config['regularization'])
relaxation = float(config['relaxation'])
# Import data
if str(lower(config['dataset'])) == 'graduate_admission':
input_dim, output_dim, dataset = graduate_admission_data()
elif str(lower(config['dataset'])) == 'mnist':
input_dim, output_dim, dataset = mnist_data()
n_classes = 10
elif str(lower(config['dataset'])) == 'cifar10':
input_dim, output_dim, dataset = cifar10_data()
n_classes = 10
else:
raise RuntimeError('Dataset not recognized')
dataloader = torch.utils.data.DataLoader(dataset, batch_size)
# Generate dataloaders for training and validation
(
input_dim,
output_dim,
training_dataloader,
validation_dataloader,
) = generate_dataloaders(dataset_name, subsample_factor, batch_size)
# Define deep learning model
if model_name == 'mlp':
model_classic = MLP(input_dim, output_dim, num_neurons_list, use_bias, activation, classification_problem)
model_classic = MLP(
input_dim,
output_dim,
num_neurons_list,
use_bias,
activation,
classification_problem,
)
elif model_name == 'cnn':
model_classic = CNN2D(input_dim, output_dim, num_neurons_list, use_bias, activation, classification_problem)
model_classic = CNN2D(
input_dim,
output_dim,
num_neurons_list,
use_bias,
activation,
classification_problem,
)
else:
raise RuntimeError('Model type not recognized')
model_anderson = deepcopy(model_classic)
# For classification problems, the loss function is the cross entropy (ce)
# For classification problems, the loss function is the negative log-likelihood (nll)
# For regression problems, the loss function is the mean squared error (mse)
if classification_problem:
loss_function_name = 'ce'
loss_function_name = 'nll'
else:
loss_function_name = 'mse'
# Define the standard optimizer which is used as point of reference to assess the improvement provided by the
# acceleration
optimizer_classic = FixedPointIteration(dataloader, learning_rate, weight_decay)
optimizer_classic = FixedPointIteration(
training_dataloader,
validation_dataloader,
learning_rate,
weight_decay,
verbose,
)
optimizer_classic.import_model(model_classic)
optimizer_classic.set_loss_function(loss_function_name)
optimizer_classic.set_optimizer(optimizer_name )
training_classic_loss_history = optimizer_classic.train(epochs, threshold, batch_size)
optimizer_anderson = DeterministicAcceleration(dataloader, 'anderson', learning_rate, 0.1, weight_decay, wait_iterations, history_depth, frequency,
reg_acc, store_each_nth)
optimizer_classic.set_optimizer(optimizer_name)
(
training_classic_loss_history,
validation_classic_loss_history,
) = optimizer_classic.train(epochs, threshold, batch_size)
optimizer_anderson = DeterministicAcceleration(
training_dataloader,
validation_dataloader,
acceleration,
learning_rate,
relaxation,
weight_decay,
wait_iterations,
history_depth,
frequency,
reg_acc,
store_each_nth,
verbose,
)
optimizer_anderson.import_model(model_anderson)
optimizer_anderson.set_loss_function(loss_function_name)
optimizer_anderson.set_optimizer(optimizer_name)
training_anderson_loss_history = optimizer_anderson.train(epochs, threshold, batch_size)
(
training_anderson_loss_history,
validation_anderson_loss_history,
) = optimizer_anderson.train(epochs, threshold, batch_size)
if config['display']:
epochs1 = range(1, len(training_classic_loss_history) + 1)
epochs2 = range(1, len(training_anderson_loss_history) + 1)
plt.plot(epochs1, training_classic_loss_history, label='training loss - Fixed Point')
plt.plot(epochs2, training_anderson_loss_history, label='training loss - Anderson')
plt.figure(1)
plt.plot(
epochs1,
training_classic_loss_history,
color='b',
linestyle='-',
label='training loss - Fixed Point',
)
plt.plot(
epochs2,
training_anderson_loss_history,
color='r',
linestyle='-',
label='training loss - Anderson',
)
plt.yscale('log')
plt.title('Training loss function')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.draw()
plt.savefig('training_loss_plot')
plt.figure(2)
plt.plot(
epochs1,
validation_classic_loss_history,
color='b',
linestyle='-',
label='validation loss - Fixed Point',
)
plt.plot(
epochs2,
validation_anderson_loss_history,
color='r',
linestyle='-',
label='validation loss - Anderson',
)
plt.yscale('log')
plt.title('Training accuracy')
plt.title('Validation loss function')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.draw()
plt.savefig('loss_plot')
plt.savefig('validation_loss_plot')
......@@ -6,8 +6,8 @@ Created on Fri Dec 4 11:52:48 2020
@author: 7ml
"""
import math
import numpy as np
from numpy import linalg as LA
def anderson(X, reg=0):
......@@ -15,7 +15,7 @@ def anderson(X, reg=0):
# Take a matrix X of iterates, where X[:,i] is the difference between the {i+1}th and the ith iterations of the
# fixed-point operation
# x_i = g(x_{i-1})
#
#
# r_i = x_{i+1} - x_i
# X[:,i] = r_i
#
......@@ -32,44 +32,44 @@ def anderson(X, reg=0):
DX = np.diff(X)
DR = np.diff(DX)
projected_residual = np.matmul(DR.T, DX[:,k-1])
DX = DX[:,:-1]
# "Square" the matrix, and normalize it
RR = np.matmul(np.transpose(DR), DR)
projected_residual = DX[:, k - 1]
DX = DX[:, :-1]
# Solve (R'R + lambda I)z = 1
(extr, c) = anderson_precomputed(DX, RR, projected_residual, reg)
(extr, c) = anderson_precomputed(DX, DR, projected_residual, reg)
# Compute the extrapolation / weigthed mean "sum_i c_i x_i", and return
return extr, c
def anderson_precomputed(DX, RR, residual, reg=0):
def anderson_precomputed(DX, DR, residual, reg=0):
# Regularized Nonlinear Acceleration, with RR precomputed
# Same than rna, but RR is computed only once
# Recovers parameters
(d, k) = DX.shape
# Solve (R'R + lambda I)z = 1
reg_I = reg * np.eye(k)
# In case of singular matrix, we solve using least squares instead
try:
z = np.linalg.solve(RR + reg_I, residual)
except LA.linalg.LinAlgError:
z = np.linalg.lstsq(RR+reg_I, residual, -1)
RR = np.matmul(np.transpose(DR), DR)
if math.sqrt(np.linalg.cond(RR, 'fro')) < 1e5:
# In case of singular matrix, we solve using least squares instead
q, r = np.linalg.qr(DR)
new_residual = np.matmul(np.transpose(q), residual)
z = np.linalg.lstsq(r, new_residual, reg)
z = z[0]
# Recover weights c, where sum(c) = 1
if np.abs(np.sum(z)) < 1e-10:
z = np.ones((k,1))
alpha = np.asmatrix(z / np.sum(z))
# Recover weights c, where sum(c) = 1
if np.abs(np.sum(z)) < 1e-10:
z = np.ones((k, 1))
alpha = np.asmatrix(z / np.sum(z))
else:
alpha = np.zeros((DX.shape[1],1))
# Compute the extrapolation / weigthed mean "sum_i c_i x_i", and return
extr = np.matmul(DX, alpha)
return np.array(extr), alpha
return np.array(extr), alpha
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment