Commit 21a4ab2e authored by Laanait, Nouamane's avatar Laanait, Nouamane
Browse files

changes to ynet branches


Former-commit-id: b3072bd7
parent 691cc156
Loading
Loading
Loading
Loading
+81 −96
Original line number Diff line number Diff line
@@ -2609,15 +2609,8 @@ class YNet(FCDenseNet, FCNet):
        fully_connected = params['fc_params']
        num_conv = params['n_conv_layers']
        num_fc = params['n_fc_layers']
        # conv_1by1 = OrderedDict({'type': 'conv_2D', 'stride': [2, 2], 'kernel': [4, 4], 
        #                         'features': 16,
        #                         'activation': 'relu', 'padding': 'SAME', 'batch_norm': True, 'dropout': 0.0})
        # pool = OrderedDict({'type': 'pooling', 'stride': [2, 2], 'kernel': [2, 2], 'pool_type': 'max','padding':'SAME'})
        # fully_connected = OrderedDict({'type': 'fully_connected','weights': 1024,'bias': 1024, 'activation': 'relu',
        #                            'regularize': True})
        self.print_verbose("\t>>> Adding CVAE: " )
        self.print_verbose('\t\t    input: %s' %format(inputs.get_shape().as_list()))
        # pre_ops = deepcopy(self.ops)
        def CVAE(tens):
            #TODO, turn this into a full denoising VAE
            for i in range(num_conv):
@@ -2625,28 +2618,20 @@ class YNet(FCDenseNet, FCNet):
                    tens , _ = self._conv(input=tens, params=conv_1by1)
                    # tens = self._pool(input=tens, params=pool)
                    tens = self._activate(input=tens, params=conv_1by1)
                    tens = self._batch_norm(input=tens)
            # if tens.shape[-2:] != [32, 32]:
            #     tens = tf.transpose(tens, perm=[0, 2, 3, 1])
            #     tens = tf.image.resize(tens, [32, 32], method=tf.image.ResizeMethod.BILINEAR)
            #     if self.params['IMAGE_FP16']:
            #         tens = tf.saturate_cast(tens, tf.float16)
            #     tens = tf.transpose(tens, perm=[0, 3, 1, 2])
            # self.print_rank('shape inside CVAE', tens.get_shape())
            # for i in range(num_fc):
            #     with tf.variable_scope('CVAE_fc_%d' %i, reuse=self.reuse) as _ :
            #         tens = self._linear(input=tens, params=fully_connected)
            #         tens = self._activate(input=tens, params=fully_connected)
            # # tens = tf.reshape(tens, [new_shape[0], -1])
                    # tens = self._batch_norm(input=tens)
            for i in range(num_fc):
                with tf.variable_scope('CVAE_fc_%d' %i, reuse=self.reuse) as _ :
                    tens = self._linear(input=tens, params=fully_connected)
                    tens = self._activate(input=tens, params=fully_connected)
            return tens

        # post_ops = deepcopy(self.ops)
        # self.print_rank("post pre, cvae ops: ", pre_ops - post_ops)
        out = tf.map_fn(CVAE, tensor_slices, back_prop=True)
        # self.print_rank('output of CVAE', out.get_shape())
        # out = tf.transpose(out, perm= [1, 2, 0])
        # dim = int(math.sqrt(self.images.shape.as_list()[1]))
        # out = tf.reshape(out, [self.params['batch_size'], -1, dim, dim])
        self.print_rank('output of CVAE', out.get_shape())
        out = tf.transpose(out, perm= [1, 2, 0])
        dim = int(math.sqrt(self.images.shape.as_list()[1]))
        out = tf.reshape(out, [self.params['batch_size'], -1, dim, dim])
        # out = tf.transpose(out, perm=[1,0,2,3])
        self.print_rank('output of Encoder', out.get_shape())
        self.model_output['encoder'] = out 
@@ -2765,9 +2750,9 @@ class YNet(FCDenseNet, FCNet):
                if layer_params['type'] == 'deconv_2D':
                    self.print_verbose(">>> Adding de-Conv Layer: %s" % layer_name)
                    self.print_verbose('    input: %s' %format(out.get_shape().as_list()))
                    if subnet == 'inverter':
                        out = self._upscale(inputs=out, params=layer_params)
                    else:
                    # if subnet == 'inverter':
                    #     out = self._upscale(inputs=out, params=layer_params)
                    # else:
                    out, _ = self._deconv(input=out, params=layer_params)
                    self.print_verbose('    output: %s' %format(out.get_shape().as_list()))
                    if self.summary: 
@@ -2869,35 +2854,35 @@ class YNet(FCDenseNet, FCNet):
        'activation': 'relu', 
        'padding': 'VALID', 
        'batch_norm': True, 'dropout':0.0})
        if True:
            def fc_map(tens):
                for i in range(num_fc):
                    with tf.variable_scope('%s_fc_%d' %(subnet, i), reuse=self.reuse) as scope :
                        tens = self._linear(input=tens, params=fully_connected)
                        tens = self._activate(input=tens, params=fully_connected)
                        # scopes_list.append(scope)
                return tens
            out = tf.map_fn(fc_map, out, back_prop=True)
            out = tf.transpose(out, perm= [1, 2, 0])
            dim = int(math.sqrt(self.images.shape.as_list()[1]))
            out = tf.reshape(out, [self.params['batch_size'], -1, dim, dim])
        else:
            out = tf.reshape(out, [out_shape[0]*out_shape[1], out_shape[2], out_shape[3], out_shape[4]])
            with tf.variable_scope('%s_conv_1by1_1' % subnet, reuse=self.reuse) as scope:
                out, _ = self._conv(input=out, params=conv_1by1_1)
                out = tf.reshape(out, [out_shape[0], out_shape[1], out_shape[3], out_shape[4]])
                out = tf.transpose(out, perm=[1,0,2,3])
        # if True:
        #     def fc_map(tens):
        #         for i in range(num_fc):
        #             with tf.variable_scope('%s_fc_%d' %(subnet, i), reuse=self.reuse) as scope :
        #                 tens = self._linear(input=tens, params=fully_connected)
        #                 tens = self._activate(input=tens, params=fully_connected)
        #                 # scopes_list.append(scope)
        #         return tens
        #     out = tf.map_fn(fc_map, out, back_prop=True)
        #     out = tf.transpose(out, perm= [1, 2, 0])
        #     dim = int(math.sqrt(self.images.shape.as_list()[1]))
        #     out = tf.reshape(out, [self.params['batch_size'], -1, dim, dim])
        # else:
        #     out = tf.reshape(out, [out_shape[0]*out_shape[1], out_shape[2], out_shape[3], out_shape[4]])
        #     with tf.variable_scope('%s_conv_1by1_1' % subnet, reuse=self.reuse) as scope:
        #         out, _ = self._conv(input=out, params=conv_1by1_1)
        #         out = tf.reshape(out, [out_shape[0], out_shape[1], out_shape[3], out_shape[4]])
        #         out = tf.transpose(out, perm=[1,0,2,3])

                # scopes_list.append(scope)
        with tf.variable_scope('%s_conv_1by1_1024' % subnet, reuse=self.reuse) as scope:
            out, _ = self._conv(input=out, params=conv_1by1_1024) 
            out = self._activate(input=out, params=conv_1by1_1024)
            do_bn = conv_1by1_1024.get('batch_norm', False)
            if do_bn:
                out = self._batch_norm(input=out)
            else:
                out = self._add_bias(input=out, params=conv_1by1_1024)
            # scopes_list.append(scope)
        #         # scopes_list.append(scope)
        # with tf.variable_scope('%s_conv_1by1_1024' % subnet, reuse=self.reuse) as scope:
        #     out, _ = self._conv(input=out, params=conv_1by1_1024) 
        #     out = self._activate(input=out, params=conv_1by1_1024)
        #     do_bn = conv_1by1_1024.get('batch_norm', False)
        #     if do_bn:
        #         out = self._batch_norm(input=out)
        #     else:
        #         out = self._add_bias(input=out, params=conv_1by1_1024)
        #     # scopes_list.append(scope)

        self._build_branch(subnet=subnet, inputs=out)

@@ -2921,45 +2906,45 @@ class YNet(FCDenseNet, FCNet):
        fully_connected = params['fc_params']
        num_fc = params['n_fc_layers']
        scopes_list = []
        if True:
            def fc_map(tens):
                for i in range(num_fc):
                    with tf.variable_scope('Inverter_fc_%d' %i, reuse=self.reuse) as scope :
                        tens = self._linear(input=tens, params=fully_connected)
                        tens = self._activate(input=tens, params=fully_connected)
                        # scopes_list.append(scope)
                return tens
            out = tf.map_fn(fc_map, out, back_prop=True, swap_memory=True, parallel_iterations=256)
            out = tf.transpose(out, perm= [1, 2, 0])
            dim = int(math.sqrt(self.images.shape.as_list()[1]))
            out = tf.reshape(out, [self.params['batch_size'], -1, dim, dim])
        else:
            conv_1by1_1 = OrderedDict({'type': 'conv_2D', 'stride': [1, 1], 'kernel': [1, 1], 
                                'features': 1,
                                'activation': 'relu', 
                                'padding': 'VALID', 
                                'batch_norm': True, 'dropout':0.0})
            out_shape = out.shape.as_list()
            out = tf.reshape(out, [out_shape[0]*out_shape[1], out_shape[2], out_shape[3], out_shape[4]])
            with tf.variable_scope('%s_conv_1by1_1' % 'inverter', reuse=self.reuse) as scope:
                out, _ = self._conv(input=out, params=conv_1by1_1)
                out = tf.reshape(out, [out_shape[0], out_shape[1], out_shape[3], out_shape[4]])
                out = tf.transpose(out, perm=[1,0,2,3])
                scopes_list.append(scope)
        conv_1by1_1024 = OrderedDict({'type': 'conv_2D', 'stride': [1, 1], 'kernel': [1, 1], 
            'features': 1024,
            'activation': 'relu', 
            'padding': 'VALID', 
            'batch_norm': True, 'dropout':0.0})
        with tf.variable_scope('inverter_conv_1by1_1024', reuse=self.reuse) as scope:
            out, _ = self._conv(input=out, params=conv_1by1_1024) 
            do_bn = conv_1by1_1024.get('batch_norm', False)
            if do_bn:
                out = self._batch_norm(input=out)
            else:
                out = self._add_bias(input=out, params=conv_1by1_1024)
            out = self._activate(input=out, params=conv_1by1_1024)
        # if True:
        #     def fc_map(tens):
        #         for i in range(num_fc):
        #             with tf.variable_scope('Inverter_fc_%d' %i, reuse=self.reuse) as scope :
        #                 tens = self._linear(input=tens, params=fully_connected)
        #                 tens = self._activate(input=tens, params=fully_connected)
        #                 # scopes_list.append(scope)
        #         return tens
        #     out = tf.map_fn(fc_map, out, back_prop=True, swap_memory=True, parallel_iterations=256)
        #     out = tf.transpose(out, perm= [1, 2, 0])
        #     dim = int(math.sqrt(self.images.shape.as_list()[1]))
        #     out = tf.reshape(out, [self.params['batch_size'], -1, dim, dim])
        # else:
        #     conv_1by1_1 = OrderedDict({'type': 'conv_2D', 'stride': [1, 1], 'kernel': [1, 1], 
        #                         'features': 1,
        #                         'activation': 'relu', 
        #                         'padding': 'VALID', 
        #                         'batch_norm': True, 'dropout':0.0})
        #     out_shape = out.shape.as_list()
        #     out = tf.reshape(out, [out_shape[0]*out_shape[1], out_shape[2], out_shape[3], out_shape[4]])
        #     with tf.variable_scope('%s_conv_1by1_1' % 'inverter', reuse=self.reuse) as scope:
        #         out, _ = self._conv(input=out, params=conv_1by1_1)
        #         out = tf.reshape(out, [out_shape[0], out_shape[1], out_shape[3], out_shape[4]])
        #         out = tf.transpose(out, perm=[1,0,2,3])
        #         scopes_list.append(scope)
        # conv_1by1_1024 = OrderedDict({'type': 'conv_2D', 'stride': [1, 1], 'kernel': [1, 1], 
        #     'features': 1024,
        #     'activation': 'relu', 
        #     'padding': 'VALID', 
        #     'batch_norm': True, 'dropout':0.0})
        # with tf.variable_scope('inverter_conv_1by1_1024', reuse=self.reuse) as scope:
        #     out, _ = self._conv(input=out, params=conv_1by1_1024) 
        #     do_bn = conv_1by1_1024.get('batch_norm', False)
        #     if do_bn:
        #         out = self._batch_norm(input=out)
        #     else:
        #         out = self._add_bias(input=out, params=conv_1by1_1024)
        #     out = self._activate(input=out, params=conv_1by1_1024)
        #     # scopes_list.append(scope)
        self._build_branch(subnet='inverter', inputs=out)
        self.all_scopes['inverter'] += scopes_list

+69 −52
Original line number Diff line number Diff line
@@ -127,54 +127,50 @@ def get_regularization_loss(scope=None, name="total_regularization_loss"):
    return tf.constant(0.0)


# def reduce_gradients(grads_and_vars, on_horovod, model=None, run_params=None):
#   if on_horovod:
#     from horovod.tensorflow import allreduce, size
#     from horovod.tensorflow.mpi_ops import register_group
#     if run_params['hvd_group'] is None :
#         layer_indices = get_grads_vars_layer_indices(grads_and_vars, model)
#         averaged_grads_and_vars = []
#         num_groups = len(layer_indices)
#         for idx, layer in enumerate(layer_indices.keys()):
#             ind_list = layer_indices[layer]
#             if len(ind_list) >= 1:
#                 layer_grads = [grads_and_vars[ind][0] for ind in ind_list]
#                 layer_vars = [grads_and_vars[ind][1] for ind in ind_list]
#                 g_id = register_group(len(layer_grads), "%s:%s:%d" % (layer_grads[0].name, layer_grads[-1].name, idx))  
#             if size() > 1:
#                 avg_grads = [allreduce(grad, compression=run_params['hvd_fp16'], group_id = g_id)
#                             if grad is not None else tf.constant(0) for grad in layer_grads ]
#                 averaged_grads_and_vars.append([(avg_grad, var) for avg_grad, var in zip(avg_grads, layer_vars)])
#         print('per layer grouping')        
#         return list(chain.from_iterable(averaged_grads_and_vars))
#     else:
#         num_groups = run_params['hvd_group']
#         num_grads_per_group = (len(grads_and_vars) + num_groups - 1) // num_groups
#         group_ids = [register_group(num_grads_per_group, "%s:%s:%d" % (grads_and_vars[0][0].name, grads_and_vars[-1][0].name, i))
#                 for i in range(len(grads_and_vars) // num_grads_per_group)]
    
#         if len(grads_and_vars) % num_grads_per_group != 0:
#             group_ids.append(register_group(len(grads_and_vars) % num_grads_per_group, 
#                                         "%s:%s:%d" % (grads_and_vars[0][0].name, grads_and_vars[-1][0].name,
#                                         len(grads_and_vars) // num_grads_per_group + 1)))
#     if size() > 1:
#       averaged_grads_and_vars = []
#       with tf.name_scope("all_reduce"):
#         for idx, (grad, var) in enumerate(grads_and_vars):
#           if grad is not None:
#             avg_grad = allreduce(grad, compression=run_params['hvd_fp16'], group_id=group_ids[idx//num_grads_per_group])
#             averaged_grads_and_vars.append((avg_grad, var))
#           else:
#             averaged_grads_and_vars.append((tf.constant(0), var))
#       return averaged_grads_and_vars
#     else:
#       return grads_and_vars
#   else:
#     raise NotImplementedError("Reduce in tower-mode is not implemented.")

def reduce_gradients(grads_and_vars, on_horovod, model=None, run_params=None):
  if on_horovod:
    from horovod.tensorflow import allreduce, size, rank
    from horovod.tensorflow import allreduce, size
    try:
      from horovod.tensorflow.mpi_ops import register_group
      if run_params['hvd_group'] is None :
          layer_indices = get_grads_vars_layer_indices(grads_and_vars, model)
          averaged_grads_and_vars = []
          num_groups = len(layer_indices)
          for idx, layer in enumerate(layer_indices.keys()):
              ind_list = layer_indices[layer]
              if len(ind_list) >= 1:
                  layer_grads = [grads_and_vars[ind][0] for ind in ind_list]
                  layer_vars = [grads_and_vars[ind][1] for ind in ind_list]
                  g_id = register_group(len(layer_grads), "%s:%s:%d" % (layer_grads[0].name, layer_grads[-1].name, idx))  
              if size() > 1:
                  avg_grads = [allreduce(grad, compression=run_params['hvd_fp16'], group_id = g_id)
                              if grad is not None else tf.constant(0) for grad in layer_grads ]
                  averaged_grads_and_vars.append([(avg_grad, var) for avg_grad, var in zip(avg_grads, layer_vars)])
          print('per layer grouping')        
          return list(chain.from_iterable(averaged_grads_and_vars))
      else:
          num_groups = run_params['hvd_group']
          num_grads_per_group = (len(grads_and_vars) + num_groups - 1) // num_groups
          group_ids = [register_group(num_grads_per_group, "%s:%s:%d" % (grads_and_vars[0][0].name, grads_and_vars[-1][0].name, i))
                  for i in range(len(grads_and_vars) // num_grads_per_group)]
      
          if len(grads_and_vars) % num_grads_per_group != 0:
              group_ids.append(register_group(len(grads_and_vars) % num_grads_per_group, 
                                          "%s:%s:%d" % (grads_and_vars[0][0].name, grads_and_vars[-1][0].name,
                                          len(grads_and_vars) // num_grads_per_group + 1)))
      if size() > 1:
        averaged_grads_and_vars = []
        with tf.name_scope("all_reduce"):
          for idx, (grad, var) in enumerate(grads_and_vars):
            if grad is not None:
              avg_grad = allreduce(grad, compression=run_params['hvd_fp16'], group_id=group_ids[idx//num_grads_per_group])
              averaged_grads_and_vars.append((avg_grad, var))
            else:
              averaged_grads_and_vars.append((tf.constant(0), var))
        return averaged_grads_and_vars
      else:
        return grads_and_vars
    except ImportError:
      if size() > 1:
        averaged_grads_and_vars = []
        with tf.name_scope("all_reduce"):
@@ -189,6 +185,27 @@ def reduce_gradients(grads_and_vars, on_horovod, model=None, run_params=None):
        return averaged_grads_and_vars
      else:
        return grads_and_vars 
  else:
    raise NotImplementedError("Horovod is needed to reduce gradients")


# def reduce_gradients(grads_and_vars, on_horovod, model=None, run_params=None):
#    if on_horovod:
#     from horovod.tensorflow import allreduce, size, rank
#     if size() > 1:
#       averaged_grads_and_vars = []
#       with tf.name_scope("all_reduce"):
#         for idx, (grad, var) in enumerate(grads_and_vars):
#           if grad is not None:
#             # print("rank: %d, grad: %s, var:%s" %(rank(), grad.name, var.name))
#             avg_grad = allreduce(grad)
#             averaged_grads_and_vars.append((avg_grad, var))
#           else:
#             print("grad: None, var:%s" %(var.name))
#             averaged_grads_and_vars.append((tf.constant(0), var))
#       return averaged_grads_and_vars
#     else:
#       return grads_and_vars


def optimize_loss(loss,
@@ -411,8 +428,8 @@ def post_process_gradients(grads_and_vars, summaries, lr,
    )

  # Optionally clip gradients by global norm.
  # if clip_gradients is not None:
    # grads_and_vars = _clip_gradients_by_norm(grads_and_vars, clip_gradients)
  if clip_gradients is not None:
    grads_and_vars = _clip_gradients_by_norm(grads_and_vars, clip_gradients)

  # Add histograms for variables, gradients and gradient norms.
  for gradient, variable in grads_and_vars: