Commit 6ff723bb authored by Laanait, Nouamane's avatar Laanait, Nouamane
Browse files

Update README, clean up unused modules, and add some util functions

parent 03974ced
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -108,3 +108,4 @@ venv.bak/
.gitignore
.idea
.vscode
.code-workspace*
+26 −14
Original line number Diff line number Diff line
# STEMDL
A python package for deep learning-based analysis of Scanning Transmission Electron Microscopy. 
Longer description coming one of those days...     
To get started see __scripts__ folder for the following:  
# `STEMDL`
A Python package for distributed deep learning with a special focus on inverse problems in materials imaging.  
`stemdl` was used in the following (applied and fundamental) deep learning research projects:   
1. *3-D reconstruction of Structural Distortions from Electron Microscopy* ([Link to Paper](https://arxiv.org/abs/1902.06876))  
2. *27,600 V100 GPUs and 7MW(h) of Power to solve an age-old scientific inverse problem* ([Link to Paper](https://arxiv.org/abs/1909.11150) and [Medium story](https://medium.com/syncedreview/nvidia-ornl-researchers-train-ai-model-on-worlds-top-supercomputer-using-27-600-nvidia-gpus-1165e0d5da7b) )
3. *YNet: a Physics-Constrainted and Semi-Supervised Learning Approach to Inverse Problems*
---
#### Getting Started
See __scripts__ folder for the following:  
1. __stemdl_run.py__:    
  Python script. Runs from the CLI to setup Neural Nets and start training/evaluation operations.
2. __generate_json.py__:  
  Python script. Generates .json files needed as input for stemdl_run.py

Here's a brief description of current modules:  
---
#### Brief description of Modules:  
1. __inputs.py__:  
  Classes to read training/evaluation data, create training batches, and image transformations.  
  Can handle I/O ops on TFRecords and numpy arrays.
  Can handle I/O ops on TFRecords, numpy arrays, and lmdb files
2. __network.py__:  
  Classes to setup various kinds of Neural Nets (ConvNets, ResNets, etc...)  
3. __runtime.py__:  
  Functions that perform network training/evaluation, generation of Tensorboard summaries,  also sets FLAGS that describe data, saving, training params.  
  Functions and Classes to perform (low-level) network training/evaluation
4. __io_utils.py__:  
  Functions to generate .json files for Neural Nets architecture input files and their hyper-parameters.    
  
  Functions to generate .json files for model architectures input files, hyperparameters, and training runs configurations
5. __losses.py__:   
   Functions to generate and manipulate loss functions
6. __optimizers.py__:   
   Optimizer setup and gradients pre-processing and reduction
7. __automatic_loss_scaler.py__:  
   Python module for dynamic loss scaling during fp16 training (taken as is from [OpenSeq2Seq](https://nvidia.github.io/OpenSeq2Seq/html/index.html))
---
#### Software Requirements:  
1. __numpy__ >= 1.13.  
2. __tensorflow__ >=1.2.
3. __python__ 3
1. __numpy__ >= 1.13  
2. __tensorflow__ >=1.2
3. __python__ 3.6
4. __horovod__ >=0.16

#### Hardware Requirements:  
1. CUDA compatible GPU >=1.
1. CUDA compatible GPU >=1


 
+2 −2
Original line number Diff line number Diff line
@@ -10,9 +10,9 @@ import horovod.tensorflow as hvd
# JSON utility functions


def print_rank(self, *args, **kwargs):
def print_rank(*args, **kwargs):
    if hvd.rank() == 0 :
        print_rank(*args, **kwargs)
        print(*args, **kwargs)

def write_json_network_config(file, layer_keys, layer_params):
    """
+16 −17
Original line number Diff line number Diff line
@@ -83,25 +83,18 @@ def calc_loss(n_net, scope, hyper_params, params, labels, step=None, images=None
        probe_re = n_net.model_output['decoder_RE']
        pot = n_net.model_output['inverter']
        pot_labels, probe_labels_re, probe_labels_im = [tf.expand_dims(itm, axis=1) for itm in tf.unstack(labels, axis=1)]
        # weight=0.10
        inverter_loss = calculate_loss_regressor(pot, pot_labels, params, hyper_params, weight=weight)
        # weight=1
        # probe_shape = probe_labels_re.shape.as_list()
        # mask = np.ones(probe_shape, dtype=np.float32)
        # snapshot = slice(probe_shape[-1]// 4, 3 * probe_shape[-1]//4)
        # mask[:,:, snapshot, snapshot] = 100.0
        # #mask = np.expand_dims(np.expand_dims(mask, axis=0), axis=0)
        # weight = tf.constant(mask)1
        decoder_loss_im = calculate_loss_regressor(probe_im, probe_labels_im, params, hyper_params, weight=weight)
        decoder_loss_re = calculate_loss_regressor(probe_re, probe_labels_re, params, hyper_params, weight=weight)
        # psi_comp = tf.fft2d(tf.cast(probe_re, tf.complex64) * tf.exp( 1.j * tf.cast(probe_im, tf.complex64)))
        # pot_frac = tf.exp(1.j * tf.cast(pot, tf.complex64))
        # reg_term = tf.fft2d(psi_comp * pot_frac / np.prod(psi_comp.shape.as_list()))
        # reg_term = tf.cast(tf.abs(reg_term), tf.float32)
        # reg_loss = calculate_loss_regressor(reg_term, tf.reduce_mean(images, axis=[1], keepdims=True), 
        #             params, hyper_params, weight=weight)
        # tf.summary.image('Regularization', tf.transpose(reg_term, perm=[0,2,3,1]), max_outputs=4)
        # tf.summary.image('Pot_realspace', tf.transpose(tf.abs(psi_comp), perm=[0,2,3,1]), max_outputs=4)
        psi_comp = fftshift(tf.fft2d(tf.cast(probe_re, tf.complex64) * tf.exp( 1.j * tf.cast(probe_im, tf.complex64))))
        pot_frac = tf.exp(1.j * tf.cast(pot, tf.complex64))
        reg_term = fftshift(tf.fft2d(psi_comp * pot_frac / np.prod(psi_comp.shape.as_list())))
        reg_term = tf.cast(tf.abs(reg_term), tf.float32)
        reg_loss = calculate_loss_regressor(reg_term, tf.reduce_mean(images, axis=[1], keepdims=True), 
                    params, hyper_params, weight=weight)
        tf.summary.image('Regularization', tf.transpose(reg_term, perm=[0,2,3,1]), max_outputs=1)
        tf.summary.image('Pot_realspace', tf.transpose(tf.abs(psi_comp), perm=[0,2,3,1]), max_outputs=1)
        tf.summary.scalar('Regularization loss (raw)', reg_loss)
        tf.summary.scalar('Inverter loss (raw)', inverter_loss)
        tf.summary.scalar('Decoder loss (IM)', decoder_loss_im)
        tf.summary.scalar('Decoder loss (RE)', decoder_loss_re)
@@ -120,7 +113,7 @@ def calc_loss(n_net, scope, hyper_params, params, labels, step=None, images=None
    #Assemble all of the losses.
    losses = tf.get_collection(tf.GraphKeys.LOSSES)
    if hyper_params['network_type'] == 'YNet':
        losses = [inverter_loss , decoder_loss_re, decoder_loss_im]
        losses = [inverter_loss , decoder_loss_re, decoder_loss_im, reg_loss]
        # losses, prefac = ynet_adjusted_losses(losses, step)
        # tf.summary.scalar("prefac_inverter", prefac)
        # losses = [inverter_loss]
@@ -261,3 +254,9 @@ def ynet_adjusted_losses(losses, global_step):
        tf.summary.scalar("prefac_inverter", prefac)
        losses = [inv_loss , prefac * dec_re_loss, prefac * dec_im_loss]
        return losses, prefac

def fftshift(tensor, tens_format='NCHW'):
    dims = [2,3] if tens_format == 'NCHW' else [1,2]
    shift = [int((tensor.shape[dim]) // 2) for dim in dims]
    shift_tensor = manip_ops.roll(tensor, shift, dims)
    return shift_tensor
 No newline at end of file

stemdl/mp_wrapper.py

deleted100644 → 0
+0 −133
Original line number Diff line number Diff line
# Copyright (c) 2018 NVIDIA Corporation
#from __future__ import absolute_import
#from __future__ import division
#from __future__ import print_function
#from __future__ import unicode_literals

import tensorflow as tf

from .automatic_loss_scaler import AutomaticLossScaler


# pylint: disable=abstract-method
class MixedPrecisionOptimizerWrapper(tf.train.Optimizer):
  def __init__(self, optimizer, loss_scale=None):
    super(MixedPrecisionOptimizerWrapper, self).__init__(
        optimizer._use_locking,
        optimizer._name + '-MP',
    )
    self._optimizer = optimizer
    self._fp32_to_fp16 = {}
    self._loss_scaler = None
    if loss_scale is None:
      self._loss_scale = 1.0
    elif isinstance(loss_scale, float):
      self._loss_scale = loss_scale
    elif isinstance(loss_scale, AutomaticLossScaler):
      self._loss_scaler = loss_scale
      self._loss_scale = self._loss_scaler.loss_scale

  def compute_gradients(self, loss, var_list=None,
                        gate_gradients=tf.train.Optimizer.GATE_OP,
                        aggregation_method=None,
                        colocate_gradients_with_ops=False,
                        grad_loss=None):
    loss *= self._loss_scale
    grads_and_vars_fp16 = self._optimizer.compute_gradients(
        loss, var_list=var_list,
        gate_gradients=gate_gradients,
        aggregation_method=aggregation_method,
        colocate_gradients_with_ops=colocate_gradients_with_ops,
        grad_loss=grad_loss,
    )

    # collecting regularization functions
    reg_var_funcs = tf.get_collection('REGULARIZATION_FUNCTIONS')
    print(reg_var_funcs)
    reg_funcs = dict(map(lambda x: (x[0].name, x[1]), reg_var_funcs))
    # reg_funcs = [None]
    # creating FP-32 variables and filling the fp32 dict
    grads_and_vars_fp32 = []
    with tf.variable_scope('FP32-master-copy'):
      for grad, var in grads_and_vars_fp16:
        if var.dtype.base_dtype == tf.float16:
          fp32_var = tf.Variable(
              initial_value=tf.cast(var.initialized_value(), tf.float32),
              name=var.name.split(':')[0],
              expected_shape=var.shape,
              dtype=tf.float32,
              trainable=False,
              # necessary for cudnn_rnn layers which have unknown shape
              validate_shape=bool(var.get_shape()),
              collections=[tf.GraphKeys.GLOBAL_VARIABLES,
                           "FP32_MASTER_COPIES"],
          )
          self._fp32_to_fp16[fp32_var.name] = var
          fp32_grad = tf.cast(grad, tf.float32)
          # adding regularization part with respect to fp32 copy
          if var.name in reg_funcs:
            fp32_grad += self._loss_scale * tf.gradients(
                # pylint: disable=no-member
                tf.contrib.layers.apply_regularization(
                    reg_funcs[var.name],
                    [fp32_var],
                ),
                fp32_var,
            )[0]
          grads_and_vars_fp32.append((fp32_grad, fp32_var))
        else:
          grads_and_vars_fp32.append((grad, var))

    grads_and_vars_fp32 = _scale_grads(grads_and_vars_fp32,
                                       1.0 / self._loss_scale)
    return grads_and_vars_fp32

  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    def apply_ops_wrapper():
      update_op = self._optimizer.apply_gradients(grads_and_vars,
                                                  global_step, name)
      apply_ops = []
      with tf.control_dependencies([update_op]):
        for grad, var in grads_and_vars:
          if var.name in self._fp32_to_fp16:
            dst_var = self._fp32_to_fp16[var.name]
            apply_ops.append(
                tf.assign(dst_var, tf.saturate_cast(var, tf.float16))
            )
      if apply_ops:
        return tf.group(apply_ops)
      return update_op

    if self._loss_scaler:
      grad_has_nans, grad_amax = AutomaticLossScaler.check_grads(grads_and_vars)
      should_skip_update = tf.logical_or(tf.is_inf(grad_amax), grad_has_nans)
      loss_scale_update_op = self._loss_scaler.update_op(grad_has_nans,
                                                         grad_amax)
      with tf.control_dependencies([loss_scale_update_op]):
        return tf.cond(should_skip_update, tf.no_op, apply_ops_wrapper)
    else:
      return apply_ops_wrapper()


def mp_regularizer_wrapper(regularizer):
  def func_wrapper(weights):
    if weights.dtype.base_dtype == tf.float16:
      tf.add_to_collection('REGULARIZATION_FUNCTIONS', (weights, regularizer))
      # disabling the inner regularizer
      return None
    return regularizer(weights)

  return func_wrapper


def _scale_grads(grads_and_vars, scale):
  scaled_grads_and_vars = []
  for grad, var in grads_and_vars:
    if grad is not None:
      if isinstance(grad, tf.IndexedSlices):
        grad_values = grad.values * scale
        grad = tf.IndexedSlices(grad_values, grad.indices, grad.dense_shape)
      else:
        grad *= scale
    scaled_grads_and_vars.append((grad, var))
  return scaled_grads_and_vars
Loading