subsampler options added

32c466a9 · Massimiliano Lupo Pasini · f1ccec05 · 32c466a9 · 32c466a9 · 32c466a9
Commit 32c466a9 authored 4 years ago by Massimiliano Lupo Pasini
--- a/example_input.cfg
+++ b/example_input.cfg
 dataset: 'MNIST'
+verbose: False
+display: True
+subsample: 0.1
 classification: True
 model: 'CNN'
-neurons: 10
-layers: 2
+neurons: 64
+layers: 3
 activation: 'RELU'
 bias: True
-optimizer: 'ADAM'
-epochs: 200
-learning_rate: 0.01
-threshold: 0.0001
-batch: 10
+optimizer: 'SGD'
+epochs: 50
+learning_rate: 0.001
+threshold: 1e-8
+batch: 64
 penalization: 0.0
+acceleration: 'ANDERSON'
 history_depth: 5
 wait_iterations: 1
 frequency: 1
 store_each_nth: 1
 regularization: 0.0
-display: True
+relaxation: 0.1
+
--- a/main.py
+++ b/main.py
@@ -7,33 +7,37 @@

 Usage:
  main.py (-h | --help)
-  main.py [-c CONFIG_FILE] [--display] [--dataset] [--classification] [--model] [--neurons] [--layers] [-a ACTIVATION] [-b BIAS]
-          [--optimizer] [-e EPOCHS] [-l LEARNING_RATE] [--threshold] [--batch] [-p PENALIZATION] [-d DEPTH] [-w WAIT_ITERATIONS] [-f FREQUENCY]
-          [-s STORE_EACH] [-r REGULARIZATION]
+  main.py [-c CONFIG_FILE] [--verbose] [--display] [--dataset] [--subsample] [--classification] [--model] [--neurons] [--layers] [-a ACTIVATION] [-b BIAS]
+          [--optimizer] [-e EPOCHS] [-l LEARNING_RATE] [--threshold] [--batch] [-p PENALIZATION] [--acceleration] [-d DEPTH] [-w WAIT_ITERATIONS] [-f FREQUENCY]
+          [-s STORE_EACH] [-r REGULARIZATION] [--relaxation]

 Options:
  -h, --help                  Show this screen
  --version                   Show version
+  --verbose                   Show version
  --display                   Use matplotlib to plot results
  -c, --config=<str>          Filename containing configuration parameters
  --dataset                   Dataset used for training. GRADUATE_ADMISSION, MNIST, CIFAR10 [default: MNIST]
+  --subsample                 Number going 0 through 1 for the percentage of the original data used [default: 1.0]
  --classification            Type of problem: classification or regression
  --model                     Implementation of NN model. Multi-layer perceptrons NN (MLP), convolutional NN (CNN)
  --neurons                   Number of neurons per layer
  --layers                    Number of hidden layers
  -a, --activation=<str>      Type of activation function [default: RELU]
  -b, --bias                  Use bias in the regression model [default: True]
-  --optimizer                 Optimizer name
-  -e, --epochs=<n>            Number of epochs [default: 100]
+  --optimizer                 Optimizer name [default: SGD]
+  -e, --epochs=<n>            Number of epochs [default: 1]
  -l, --learning_rate=<f>     Learning rate [default: 0.01]
-  --threshold                 Stopping criterion for the training
-  --batch                     Size of the batch for the optimizer
+  --threshold                 Stopping criterion for the training [default: 1e-4]
+  --batch                     Size of the batch for the optimizer [default: 1]
+  --acceleration              Type of accelerarion performed. ANDERSON, RNA [default: ANDERSON]
  -p, --penalization=<f>      Weight decay for the L2 penalization during the training of the neural network [default: 0.0]
-  -d, --history_depth=<m>             Depth of window history for anderson [default: 5]
+  -d, --history_depth=<m>     Depth of window history for anderson [default: 5]
  -w, --wait_iterations=<n>   Wait an initial number of classic optimizer iterations before starting with anderson [default: 1]
  -f, --frequency=<n>         Number of epochs performed between two consecutive anderson accelerations [default: 1]
-  -s, --store_each_nth=<n>        Number of epochs performed between two consecutive storages of the iterations in the columns of matrix R to perform least squares [default: 1]
-  -r, --regularization=<f>    Regularization parameter for L2 penalization for the least squares problem solved to perform anderson acceleration [default: 0.0]
+  -s, --store_each_nth=<n>    Number of epochs performed between two consecutive storages of the iterations in the columns of matrix R to perform least squares [default: 1]
+  -r, --regularization=<f>    Regularization parameter for L2 penalization for the least squares problem solved to perform the acceleration [default: 0.0]
+  --relaxation                relaxation parameter to mix the past iterate with the acceleration update and generate the new iterate [default: 0.1]
 """

 from docopt import docopt
@@ -49,7 +53,7 @@ sys.path.append("./utils")
 sys.path.append("./modules")
 from modules.NN_models import MLP, CNN2D
 from modules.optimizers import FixedPointIteration, DeterministicAcceleration
-from utils.dataloaders import graduate_admission_data, mnist_data, cifar10_data
+from utils.dataloaders import generate_dataloaders

 plt.rcParams.update({'font.size': 16})

@@ -95,8 +99,13 @@ def get_options():
 if __name__ == '__main__':
    config = get_options()

+    verbose = bool(config['verbose'])
    classification_problem = bool(config['classification'])

+    # Specify name of the dataset and percentage of the entire data volume to sample
+    dataset_name = lower(config['dataset'])
+    subsample_factor = float(config['subsample'])
+
    # Setting for the neural network
    model_name = str(lower(config['model']))
    num_neurons = int(config['neurons'])
@@ -113,72 +122,142 @@ if __name__ == '__main__':
    batch_size = int(config['batch'])
    weight_decay = float(config['penalization'])

-    # Parameters for RNA optimizer
+    # Parameters for acceleration
+    acceleration = str(lower(config['acceleration']))
    wait_iterations = int(config['wait_iterations'])
    history_depth = int(config['history_depth'])
    frequency = int(config['frequency'])
    store_each_nth = int(config['store_each_nth'])
    reg_acc = float(config['regularization'])
+    relaxation = float(config['relaxation'])

-    # Import data
-    if str(lower(config['dataset'])) == 'graduate_admission':
-        input_dim, output_dim, dataset = graduate_admission_data()
-    elif str(lower(config['dataset'])) == 'mnist':
-        input_dim, output_dim, dataset = mnist_data()
-        n_classes = 10
-    elif str(lower(config['dataset'])) == 'cifar10':
-        input_dim, output_dim, dataset = cifar10_data()
-        n_classes = 10
-    else:
-        raise RuntimeError('Dataset not recognized')
-
-    dataloader = torch.utils.data.DataLoader(dataset, batch_size)
+    # Generate dataloaders for training and validation
+    (
+        input_dim,
+        output_dim,
+        training_dataloader,
+        validation_dataloader,
+    ) = generate_dataloaders(dataset_name, subsample_factor, batch_size)

    # Define deep learning model
    if model_name == 'mlp':
-        model_classic = MLP(input_dim, output_dim, num_neurons_list, use_bias, activation, classification_problem)
+        model_classic = MLP(
+            input_dim,
+            output_dim,
+            num_neurons_list,
+            use_bias,
+            activation,
+            classification_problem,
+        )
    elif model_name == 'cnn':
-        model_classic = CNN2D(input_dim, output_dim, num_neurons_list, use_bias, activation, classification_problem)
+        model_classic = CNN2D(
+            input_dim,
+            output_dim,
+            num_neurons_list,
+            use_bias,
+            activation,
+            classification_problem,
+        )
    else:
        raise RuntimeError('Model type not recognized')

    model_anderson = deepcopy(model_classic)

-    # For classification problems, the loss function is the cross entropy (ce)
+    # For classification problems, the loss function is the negative log-likelihood (nll)
    # For regression problems, the loss function is the mean squared error (mse)
    if classification_problem:
-        loss_function_name = 'ce'
+        loss_function_name = 'nll'
    else:
        loss_function_name = 'mse'

    # Define the standard optimizer which is used as point of reference to assess the improvement provided by the
    # acceleration
-    optimizer_classic = FixedPointIteration(dataloader, learning_rate, weight_decay)
+    optimizer_classic = FixedPointIteration(
+        training_dataloader,
+        validation_dataloader,
+        learning_rate,
+        weight_decay,
+        verbose,
+    )

    optimizer_classic.import_model(model_classic)
    optimizer_classic.set_loss_function(loss_function_name)
-    optimizer_classic.set_optimizer(optimizer_name )
-
-    training_classic_loss_history = optimizer_classic.train(epochs, threshold, batch_size)
-
-    optimizer_anderson = DeterministicAcceleration(dataloader, 'anderson', learning_rate, 0.1, weight_decay, wait_iterations, history_depth, frequency,
-                                          reg_acc, store_each_nth)
+    optimizer_classic.set_optimizer(optimizer_name)
+
+    (
+        training_classic_loss_history,
+        validation_classic_loss_history,
+    ) = optimizer_classic.train(epochs, threshold, batch_size)
+
+    optimizer_anderson = DeterministicAcceleration(
+        training_dataloader,
+        validation_dataloader,
+        acceleration,
+        learning_rate,
+        relaxation,
+        weight_decay,
+        wait_iterations,
+        history_depth,
+        frequency,
+        reg_acc,
+        store_each_nth,
+        verbose,
+    )

    optimizer_anderson.import_model(model_anderson)
    optimizer_anderson.set_loss_function(loss_function_name)
    optimizer_anderson.set_optimizer(optimizer_name)

-    training_anderson_loss_history = optimizer_anderson.train(epochs, threshold, batch_size)
+    (
+        training_anderson_loss_history,
+        validation_anderson_loss_history,
+    ) = optimizer_anderson.train(epochs, threshold, batch_size)

    if config['display']:
        epochs1 = range(1, len(training_classic_loss_history) + 1)
        epochs2 = range(1, len(training_anderson_loss_history) + 1)
-        plt.plot(epochs1, training_classic_loss_history, label='training loss - Fixed Point')
-        plt.plot(epochs2, training_anderson_loss_history, label='training loss - Anderson')
+        plt.figure(1)
+        plt.plot(
+            epochs1,
+            training_classic_loss_history,
+            color='b',
+            linestyle='-',
+            label='training loss - Fixed Point',
+        )
+        plt.plot(
+            epochs2,
+            training_anderson_loss_history,
+            color='r',
+            linestyle='-',
+            label='training loss - Anderson',
+        )
+        plt.yscale('log')
+        plt.title('Training loss function')
+        plt.xlabel('Epochs')
+        plt.ylabel('Loss')
+        plt.legend()
+        plt.draw()
+        plt.savefig('training_loss_plot')
+        
+        plt.figure(2)
+        plt.plot(
+            epochs1,
+            validation_classic_loss_history,
+            color='b',
+            linestyle='-',
+            label='validation loss - Fixed Point',
+        )
+        plt.plot(
+            epochs2,
+            validation_anderson_loss_history,
+            color='r',
+            linestyle='-',
+            label='validation loss - Anderson',
+        )
        plt.yscale('log')
-        plt.title('Training accuracy')
+        plt.title('Validation loss function')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.draw()
-        plt.savefig('loss_plot')
+        plt.savefig('validation_loss_plot')
--- a/utils/anderson_acceleration.py
+++ b/utils/anderson_acceleration.py
@@ -6,8 +6,8 @@ Created on Fri Dec  4 11:52:48 2020
 @author: 7ml
 """

+import math
 import numpy as np
-from numpy import linalg as LA


 def anderson(X, reg=0):
@@ -15,7 +15,7 @@ def anderson(X, reg=0):
    # Take a matrix X of iterates, where X[:,i] is the difference between the {i+1}th and the ith iterations of the
    # fixed-point operation
    #   x_i = g(x_{i-1})
-    #   
+    #
    #   r_i = x_{i+1} - x_i
    #   X[:,i] = r_i
    #
@@ -32,44 +32,44 @@ def anderson(X, reg=0):
    DX = np.diff(X)
    DR = np.diff(DX)

-    projected_residual = np.matmul(DR.T, DX[:,k-1])
-    DX = DX[:,:-1]
-
-    # "Square" the matrix, and normalize it
-    RR = np.matmul(np.transpose(DR), DR)
+    projected_residual = DX[:, k - 1]
+    DX = DX[:, :-1]

    # Solve (R'R + lambda I)z = 1
-    (extr, c) = anderson_precomputed(DX, RR, projected_residual, reg)
+    (extr, c) = anderson_precomputed(DX, DR, projected_residual, reg)

    # Compute the extrapolation / weigthed mean  "sum_i c_i x_i", and return
    return extr, c


-def anderson_precomputed(DX, RR, residual, reg=0):
+def anderson_precomputed(DX, DR, residual, reg=0):
    # Regularized Nonlinear Acceleration, with RR precomputed
    # Same than rna, but RR is computed only once

    # Recovers parameters
    (d, k) = DX.shape
-
-    # Solve (R'R + lambda I)z = 1
-    reg_I = reg * np.eye(k)
-
-    # In case of singular matrix, we solve using least squares instead
-    try:
-        z = np.linalg.solve(RR + reg_I, residual)
-    except LA.linalg.LinAlgError:
-        z = np.linalg.lstsq(RR+reg_I, residual, -1)
+   
+    RR = np.matmul(np.transpose(DR), DR)
+    
+    if math.sqrt(np.linalg.cond(RR, 'fro')) < 1e5:
+
+        # In case of singular matrix, we solve using least squares instead
+        q, r = np.linalg.qr(DR)   
+        
+        new_residual = np.matmul(np.transpose(q), residual)      
+        z = np.linalg.lstsq(r, new_residual, reg)
        z = z[0]
-
-    # Recover weights c, where sum(c) = 1
-    if np.abs(np.sum(z)) < 1e-10:
-        z = np.ones((k,1))
-
-    alpha = np.asmatrix(z / np.sum(z))
-
+        
+        # Recover weights c, where sum(c) = 1
+        if np.abs(np.sum(z)) < 1e-10:
+            z = np.ones((k, 1))
+    
+        alpha = np.asmatrix(z / np.sum(z))
+        
+    else:
+        
+        alpha  = np.zeros((DX.shape[1],1))
+        
    # Compute the extrapolation / weigthed mean  "sum_i c_i x_i", and return
    extr = np.matmul(DX, alpha)
-    return np.array(extr), alpha
-
-
+    return np.array(extr), alpha
\ No newline at end of file