Loading stemdl/inputs.py +2 −1 Original line number Diff line number Diff line Loading @@ -175,7 +175,7 @@ class DatasetTFRecords(object): alpha = tf.random_uniform([1], minval=self.params['noise_min'], maxval=self.params['noise_max'], dtype=image.dtype) noise = tf.random_uniform(image.shape, dtype=image.dtype) trans_image = (1 - alpha[0]) * image / tf.reduce_max(image, keepdims=True) + alpha[0] * noise trans_image = (1 - alpha[0]) * image + alpha[0] * noise return trans_image Loading Loading @@ -520,6 +520,7 @@ class DatasetLMDB(DatasetTFRecords): images.append(tf.reshape(image, self.data_specs['image_shape'])) labels.append(tf.reshape(label, self.data_specs['label_shape'])) elif self.mode == 'eval': ds = ds.take(self.num_samples) ds = ds.batch(self.params['batch_size'], drop_remainder=True) ds = ds.map(self.wrapped_decode) iterator = ds.make_one_shot_iterator() Loading stemdl/losses.py +3 −2 Original line number Diff line number Diff line Loading @@ -81,7 +81,7 @@ def calc_loss(n_net, scope, hyper_params, params, labels, step=None, images=None probe_re = n_net.model_output['decoder_RE'] pot = n_net.model_output['inverter'] pot_labels, probe_labels_re, probe_labels_im = [tf.expand_dims(itm, axis=1) for itm in tf.unstack(labels, axis=1)] weight= np.prod(pot_labels.shape.as_list()[-2:]) #weight= np.prod(pot_labels.shape.as_list()[-2:]) weight=None inverter_loss = calculate_loss_regressor(pot, pot_labels, params, hyper_params, weight=weight) decoder_loss_im = calculate_loss_regressor(probe_im, probe_labels_im, params, hyper_params, weight=weight) Loading @@ -108,7 +108,8 @@ def calc_loss(n_net, scope, hyper_params, params, labels, step=None, images=None #Assemble all of the losses. losses = tf.get_collection(tf.GraphKeys.LOSSES) if hyper_params['network_type'] == 'YNet': losses = [inverter_loss , decoder_loss_re, decoder_loss_im, reg_loss] #losses = [inverter_loss , decoder_loss_re, decoder_loss_im, 0.01 * reg_loss] losses = [inverter_loss , decoder_loss_re, decoder_loss_im] # losses, prefac = ynet_adjusted_losses(losses, step) # tf.summary.scalar("prefac_inverter", prefac) # losses = [inverter_loss] Loading stemdl/network.py +20 −6 Original line number Diff line number Diff line Loading @@ -604,10 +604,18 @@ class ConvNet: is_training = 'train' == self.operation # TODO: scaling and centering during normalization need to be hyperparams. Now hardwired. param_initializers={ 'beta': tf.constant_initializer(0.0), 'gamma': tf.constant_initializer(0.1), 'beta': tf.constant_initializer(0.0, dtype=tf.float16), 'gamma': tf.constant_initializer(0.1, dtype=tf.float16), } output = tf.contrib.layers.batch_norm(input, decay=decay, scale=True, epsilon=epsilon,zero_debias_moving_mean=False,is_training=is_training,fused=True,data_format='NCHW',renorm=False,param_initializers=param_initializers) if self.params['IMAGE_FP16']: input= tf.cast(input, tf.float32) # with tf.variable_scope('layer_normalization', reuse=None) as scope: # output = tf.keras.layers.LayerNormalization(trainable=False)(inputs=input) mean , variance = tf.nn.moments(input, axes=[2,3], keepdims=True) output = (input - mean)/ (tf.sqrt(variance) + 1e-7) if self.params['IMAGE_FP16']: output = tf.cast(output, tf.float16) #output = tf.contrib.layers.batch_norm(input, decay=decay, scale=True, epsilon=epsilon,zero_debias_moving_mean=False,is_training=is_training,fused=True,data_format='NCHW',renorm=False,param_initializers=param_initializers) #output = tf.contrib.layers.batch_norm(input, decay=decay, scale=True, epsilon=epsilon,zero_debias_moving_mean=False,is_training=is_training,fused=True,data_format='NCHW',renorm=False) # output = input # Keep tabs on the number of weights Loading Loading @@ -698,6 +706,8 @@ class ConvNet: if params is not None: if params['activation'] == 'tanh': return tf.nn.tanh(input, name=name) elif params['activation'] == 'leaky_relu': return tf.nn.leaky_relu(input, name=name) else: return tf.nn.relu(input, name=name) else: Loading Loading @@ -2812,11 +2822,15 @@ class YNet(FCDenseNet, FCNet): 'features': inputs.shape.as_list()[1], 'activation': 'relu', 'padding': 'VALID', 'batch_norm': True, 'dropout':0.0}) 'batch_norm': False, 'dropout':0.0}) if conv_params['batch_norm']: out = self._batch_norm(input=inputs) else: out = inputs out = self._activate(input=out, params=conv_params) with tf.variable_scope('residual_conv_1', reuse=self.reuse) as scope: out, _ = self._conv(input=out, params=conv_params) if conv_params['batch_norm']: out = self._batch_norm(input=out) out = self._activate(input=out, params=conv_params) with tf.variable_scope('residual_conv_2', reuse=self.reuse) as scope: Loading stemdl/network_utils.py +9 −9 Original line number Diff line number Diff line Loading @@ -388,7 +388,7 @@ def generate_freq2space_json(out_dir= 'json_files', conv_type="conv_2D", input_c ######### # Y-net # ######### def generate_YNet_json(save= True, out_dir='json_files', n_pool=3, n_layers_per_block=2, kernel=[3,3], conv_type='conv_2D', def generate_YNet_json(save= True, out_dir='json_files', n_pool=3, n_layers_per_block=2, kernel=[3,3], conv_type='conv_2D',activation='relu', dropout_prob=0, output_channels=1, output_size=256, fc_dim=256, init_features=1024, batch_norm=True, fc_layers=1): pool = OrderedDict({'type': 'pooling', 'stride': [2, 2], 'kernel': [2, 2], 'pool_type': 'max','padding':'SAME'}) Loading @@ -399,12 +399,12 @@ def generate_YNet_json(save= True, out_dir='json_files', n_pool=3, n_layers_per_ layers_keys_list = [] conv_cvae = OrderedDict({'type': 'conv_2D', 'stride': [2, 2], 'kernel': [4, 4], 'features': 16, 'activation': 'relu', 'padding': 'SAME', 'batch_norm': batch_norm, 'dropout': 0.0}) fc_cvae = OrderedDict({'type': 'fully_connected','weights': fc_dim,'bias': fc_dim, 'activation': 'relu', 'activation': activation, 'padding': 'SAME', 'batch_norm': batch_norm, 'dropout': 0.0}) fc_cvae = OrderedDict({'type': 'fully_connected','weights': fc_dim,'bias': fc_dim, 'activation': activation, 'regularize': True}) cvae_model = OrderedDict({'n_conv_layers': 4, 'n_fc_layers':fc_layers,'fc_params': fc_cvae, 'conv_params':conv_cvae}) init_features = 1024 freq2space_block = OrderedDict({'type': 'freq2space', 'activation': 'relu', 'dropout': dropout_prob, freq2space_block = OrderedDict({'type': 'freq2space', 'activation': activation, 'dropout': dropout_prob, 'init_features':init_features, 'batch_norm': batch_norm}) freq2space_block['type'] = 'freq2space_CVAE' freq2space_block['cvae_params'] = cvae_model Loading @@ -420,7 +420,7 @@ def generate_YNet_json(save= True, out_dir='json_files', n_pool=3, n_layers_per_ layers_params_list = [] layers_keys_list = [] conv_layer_base = OrderedDict({'type': conv_type, 'stride': [1, 1], 'kernel': kernel, 'features': None, 'activation': 'relu', 'padding': 'SAME', 'batch_norm': batch_norm, 'dropout':dropout_prob}) 'activation': activation, 'padding': 'SAME', 'batch_norm': batch_norm, 'dropout':dropout_prob}) deconv_layer_base = OrderedDict({'type': "deconv_2D", 'stride': [2, 2], 'kernel': [4,4], 'features': None, 'padding': 'SAME', 'upsample': pool['kernel'][0]}) features = 1024 Loading Loading @@ -472,7 +472,7 @@ def generate_YNet_json(save= True, out_dir='json_files', n_pool=3, n_layers_per_ model_params.append(OrderedDict(zip(layers_keys_list, layers_params_list))) # inverter branch, essentially a freq2space layer then Transition Up freq2space_block = OrderedDict({'type': 'freq2space', 'activation': 'relu', 'dropout': dropout_prob, freq2space_block = OrderedDict({'type': 'freq2space', 'activation': activation, 'dropout': dropout_prob, 'init_features':64, 'n_fc_layers':2}) layers_params_list = [] layers_keys_list = [] Loading @@ -480,7 +480,7 @@ def generate_YNet_json(save= True, out_dir='json_files', n_pool=3, n_layers_per_ layers_params_list.append(freq2space_block) conv_layer_base = OrderedDict({'type': conv_type, 'stride': [1, 1], 'kernel': kernel, 'features': None, 'activation': 'relu', 'padding': 'SAME', 'batch_norm': batch_norm, 'dropout':dropout_prob}) 'activation': activation, 'padding': 'SAME', 'batch_norm': batch_norm, 'dropout':dropout_prob}) deconv_layer_base = OrderedDict({'type': "deconv_2D", 'stride': [2, 2], 'kernel': [4,4], 'features': None, 'padding': 'SAME', 'upsample': pool['kernel'][0]}) features = 1024 Loading Loading @@ -567,10 +567,10 @@ def generate_fcdensenet_json(random=False, conv_type="conv_2D", growth_rate=64, n_layers_per_block = [n_layers_per_block] * (2 * n_pool + 1) std_conv = OrderedDict({'type': conv_type, 'stride': [1, 1], 'kernel': DB_conv_kernel, 'features': growth_rate, 'activation': 'relu', 'padding': 'SAME', 'batch_norm': False}) 'activation': activation, 'padding': 'SAME', 'batch_norm': False}) deconv = OrderedDict({'type': ''}) layer = OrderedDict({'type': conv_type, 'stride': [1, 1], 'kernel': DB_conv_kernel, 'features': growth_rate, 'activation': 'relu', 'padding': 'SAME', 'batch_norm': True, 'dropout':dropout_prob}) 'activation': activation, 'padding': 'SAME', 'batch_norm': True, 'dropout':dropout_prob}) pool = OrderedDict({'type': 'pooling', 'stride': [2, 2], 'kernel': [2, 2], 'pool_type': 'max','padding':'SAME'}) Loading stemdl/runtime.py +4 −10 Original line number Diff line number Diff line Loading @@ -281,7 +281,7 @@ def train(network_config, hyper_params, params, gpu_id=None): # Gather all training related ops into a single one. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) increment_op = tf.assign_add(global_step, 1) ema = tf.train.ExponentialMovingAverage(decay=0.999, num_updates=global_step) ema = tf.train.ExponentialMovingAverage(decay=0.9, num_updates=global_step) all_ops = tf.group(*([train_opt] + update_ops + IO_ops + [increment_op])) with tf.control_dependencies([all_ops]): Loading Loading @@ -407,10 +407,10 @@ def train(network_config, hyper_params, params, gpu_id=None): train_elf.before_run() # Here we do validation: if doValidate: val = validate(network_config, hyper_params, params, sess, dset) val = validate(network_config, hyper_params, params, sess, dset, num_batches=50) val_results.append((train_elf.last_step,val)) if doFinish: val = validate(network_config, hyper_params, params, sess, dset) val = validate(network_config, hyper_params, params, sess, dset, num_batches=50) val_results.append((train_elf.last_step, val)) tf.reset_default_graph() tf.keras.backend.clear_session() Loading Loading @@ -532,9 +532,8 @@ def validate(network_config, hyper_params, params, sess, dset, num_batches=10): error_averaging = hvd.allreduce(errors) if num_batches is not None: num_samples = num_batches else: elif num_batches > dset.num_samples: num_samples = dset.num_samples #error = np.array([sess.run([IO_ops,error_averaging])[-1] for i in range(4)]) errors = np.array([sess.run([IO_ops,error_averaging])[-1] for i in range(num_samples//params['batch_size'])]) result = errors.mean() print_rank('Validation Reconstruction Error %s: %3.3e' % (loss_label, errors.mean())) Loading Loading @@ -563,11 +562,6 @@ def validate(network_config, hyper_params, params, sess, dset, num_batches=10): else: num_samples = dset.num_samples errors = np.array([sess.run([IO_ops,error_averaging])[-1] for i in range(num_samples//params['batch_size'])]) # errors = np.array([sess.run([IO_ops,errors])[-1] for i in range(dset.num_samples)]) # errors = tf.reduce_mean(errors) # avg_errors = hvd.allreduce(tf.expand_dims(errors, axis=0)) # error = sess.run(avg_errors) # print_rank('Validation Reconstruction Error %s: %3.3e' % (loss_label, errors.mean())) result = errors.mean() print_rank('Validation Reconstruction Error %s: %3.3e' % (loss_label, errors.mean())) tf.summary.scalar("Validation_loss_label_%s" % loss_label, tf.constant(errors.mean())) Loading Loading
stemdl/inputs.py +2 −1 Original line number Diff line number Diff line Loading @@ -175,7 +175,7 @@ class DatasetTFRecords(object): alpha = tf.random_uniform([1], minval=self.params['noise_min'], maxval=self.params['noise_max'], dtype=image.dtype) noise = tf.random_uniform(image.shape, dtype=image.dtype) trans_image = (1 - alpha[0]) * image / tf.reduce_max(image, keepdims=True) + alpha[0] * noise trans_image = (1 - alpha[0]) * image + alpha[0] * noise return trans_image Loading Loading @@ -520,6 +520,7 @@ class DatasetLMDB(DatasetTFRecords): images.append(tf.reshape(image, self.data_specs['image_shape'])) labels.append(tf.reshape(label, self.data_specs['label_shape'])) elif self.mode == 'eval': ds = ds.take(self.num_samples) ds = ds.batch(self.params['batch_size'], drop_remainder=True) ds = ds.map(self.wrapped_decode) iterator = ds.make_one_shot_iterator() Loading
stemdl/losses.py +3 −2 Original line number Diff line number Diff line Loading @@ -81,7 +81,7 @@ def calc_loss(n_net, scope, hyper_params, params, labels, step=None, images=None probe_re = n_net.model_output['decoder_RE'] pot = n_net.model_output['inverter'] pot_labels, probe_labels_re, probe_labels_im = [tf.expand_dims(itm, axis=1) for itm in tf.unstack(labels, axis=1)] weight= np.prod(pot_labels.shape.as_list()[-2:]) #weight= np.prod(pot_labels.shape.as_list()[-2:]) weight=None inverter_loss = calculate_loss_regressor(pot, pot_labels, params, hyper_params, weight=weight) decoder_loss_im = calculate_loss_regressor(probe_im, probe_labels_im, params, hyper_params, weight=weight) Loading @@ -108,7 +108,8 @@ def calc_loss(n_net, scope, hyper_params, params, labels, step=None, images=None #Assemble all of the losses. losses = tf.get_collection(tf.GraphKeys.LOSSES) if hyper_params['network_type'] == 'YNet': losses = [inverter_loss , decoder_loss_re, decoder_loss_im, reg_loss] #losses = [inverter_loss , decoder_loss_re, decoder_loss_im, 0.01 * reg_loss] losses = [inverter_loss , decoder_loss_re, decoder_loss_im] # losses, prefac = ynet_adjusted_losses(losses, step) # tf.summary.scalar("prefac_inverter", prefac) # losses = [inverter_loss] Loading
stemdl/network.py +20 −6 Original line number Diff line number Diff line Loading @@ -604,10 +604,18 @@ class ConvNet: is_training = 'train' == self.operation # TODO: scaling and centering during normalization need to be hyperparams. Now hardwired. param_initializers={ 'beta': tf.constant_initializer(0.0), 'gamma': tf.constant_initializer(0.1), 'beta': tf.constant_initializer(0.0, dtype=tf.float16), 'gamma': tf.constant_initializer(0.1, dtype=tf.float16), } output = tf.contrib.layers.batch_norm(input, decay=decay, scale=True, epsilon=epsilon,zero_debias_moving_mean=False,is_training=is_training,fused=True,data_format='NCHW',renorm=False,param_initializers=param_initializers) if self.params['IMAGE_FP16']: input= tf.cast(input, tf.float32) # with tf.variable_scope('layer_normalization', reuse=None) as scope: # output = tf.keras.layers.LayerNormalization(trainable=False)(inputs=input) mean , variance = tf.nn.moments(input, axes=[2,3], keepdims=True) output = (input - mean)/ (tf.sqrt(variance) + 1e-7) if self.params['IMAGE_FP16']: output = tf.cast(output, tf.float16) #output = tf.contrib.layers.batch_norm(input, decay=decay, scale=True, epsilon=epsilon,zero_debias_moving_mean=False,is_training=is_training,fused=True,data_format='NCHW',renorm=False,param_initializers=param_initializers) #output = tf.contrib.layers.batch_norm(input, decay=decay, scale=True, epsilon=epsilon,zero_debias_moving_mean=False,is_training=is_training,fused=True,data_format='NCHW',renorm=False) # output = input # Keep tabs on the number of weights Loading Loading @@ -698,6 +706,8 @@ class ConvNet: if params is not None: if params['activation'] == 'tanh': return tf.nn.tanh(input, name=name) elif params['activation'] == 'leaky_relu': return tf.nn.leaky_relu(input, name=name) else: return tf.nn.relu(input, name=name) else: Loading Loading @@ -2812,11 +2822,15 @@ class YNet(FCDenseNet, FCNet): 'features': inputs.shape.as_list()[1], 'activation': 'relu', 'padding': 'VALID', 'batch_norm': True, 'dropout':0.0}) 'batch_norm': False, 'dropout':0.0}) if conv_params['batch_norm']: out = self._batch_norm(input=inputs) else: out = inputs out = self._activate(input=out, params=conv_params) with tf.variable_scope('residual_conv_1', reuse=self.reuse) as scope: out, _ = self._conv(input=out, params=conv_params) if conv_params['batch_norm']: out = self._batch_norm(input=out) out = self._activate(input=out, params=conv_params) with tf.variable_scope('residual_conv_2', reuse=self.reuse) as scope: Loading
stemdl/network_utils.py +9 −9 Original line number Diff line number Diff line Loading @@ -388,7 +388,7 @@ def generate_freq2space_json(out_dir= 'json_files', conv_type="conv_2D", input_c ######### # Y-net # ######### def generate_YNet_json(save= True, out_dir='json_files', n_pool=3, n_layers_per_block=2, kernel=[3,3], conv_type='conv_2D', def generate_YNet_json(save= True, out_dir='json_files', n_pool=3, n_layers_per_block=2, kernel=[3,3], conv_type='conv_2D',activation='relu', dropout_prob=0, output_channels=1, output_size=256, fc_dim=256, init_features=1024, batch_norm=True, fc_layers=1): pool = OrderedDict({'type': 'pooling', 'stride': [2, 2], 'kernel': [2, 2], 'pool_type': 'max','padding':'SAME'}) Loading @@ -399,12 +399,12 @@ def generate_YNet_json(save= True, out_dir='json_files', n_pool=3, n_layers_per_ layers_keys_list = [] conv_cvae = OrderedDict({'type': 'conv_2D', 'stride': [2, 2], 'kernel': [4, 4], 'features': 16, 'activation': 'relu', 'padding': 'SAME', 'batch_norm': batch_norm, 'dropout': 0.0}) fc_cvae = OrderedDict({'type': 'fully_connected','weights': fc_dim,'bias': fc_dim, 'activation': 'relu', 'activation': activation, 'padding': 'SAME', 'batch_norm': batch_norm, 'dropout': 0.0}) fc_cvae = OrderedDict({'type': 'fully_connected','weights': fc_dim,'bias': fc_dim, 'activation': activation, 'regularize': True}) cvae_model = OrderedDict({'n_conv_layers': 4, 'n_fc_layers':fc_layers,'fc_params': fc_cvae, 'conv_params':conv_cvae}) init_features = 1024 freq2space_block = OrderedDict({'type': 'freq2space', 'activation': 'relu', 'dropout': dropout_prob, freq2space_block = OrderedDict({'type': 'freq2space', 'activation': activation, 'dropout': dropout_prob, 'init_features':init_features, 'batch_norm': batch_norm}) freq2space_block['type'] = 'freq2space_CVAE' freq2space_block['cvae_params'] = cvae_model Loading @@ -420,7 +420,7 @@ def generate_YNet_json(save= True, out_dir='json_files', n_pool=3, n_layers_per_ layers_params_list = [] layers_keys_list = [] conv_layer_base = OrderedDict({'type': conv_type, 'stride': [1, 1], 'kernel': kernel, 'features': None, 'activation': 'relu', 'padding': 'SAME', 'batch_norm': batch_norm, 'dropout':dropout_prob}) 'activation': activation, 'padding': 'SAME', 'batch_norm': batch_norm, 'dropout':dropout_prob}) deconv_layer_base = OrderedDict({'type': "deconv_2D", 'stride': [2, 2], 'kernel': [4,4], 'features': None, 'padding': 'SAME', 'upsample': pool['kernel'][0]}) features = 1024 Loading Loading @@ -472,7 +472,7 @@ def generate_YNet_json(save= True, out_dir='json_files', n_pool=3, n_layers_per_ model_params.append(OrderedDict(zip(layers_keys_list, layers_params_list))) # inverter branch, essentially a freq2space layer then Transition Up freq2space_block = OrderedDict({'type': 'freq2space', 'activation': 'relu', 'dropout': dropout_prob, freq2space_block = OrderedDict({'type': 'freq2space', 'activation': activation, 'dropout': dropout_prob, 'init_features':64, 'n_fc_layers':2}) layers_params_list = [] layers_keys_list = [] Loading @@ -480,7 +480,7 @@ def generate_YNet_json(save= True, out_dir='json_files', n_pool=3, n_layers_per_ layers_params_list.append(freq2space_block) conv_layer_base = OrderedDict({'type': conv_type, 'stride': [1, 1], 'kernel': kernel, 'features': None, 'activation': 'relu', 'padding': 'SAME', 'batch_norm': batch_norm, 'dropout':dropout_prob}) 'activation': activation, 'padding': 'SAME', 'batch_norm': batch_norm, 'dropout':dropout_prob}) deconv_layer_base = OrderedDict({'type': "deconv_2D", 'stride': [2, 2], 'kernel': [4,4], 'features': None, 'padding': 'SAME', 'upsample': pool['kernel'][0]}) features = 1024 Loading Loading @@ -567,10 +567,10 @@ def generate_fcdensenet_json(random=False, conv_type="conv_2D", growth_rate=64, n_layers_per_block = [n_layers_per_block] * (2 * n_pool + 1) std_conv = OrderedDict({'type': conv_type, 'stride': [1, 1], 'kernel': DB_conv_kernel, 'features': growth_rate, 'activation': 'relu', 'padding': 'SAME', 'batch_norm': False}) 'activation': activation, 'padding': 'SAME', 'batch_norm': False}) deconv = OrderedDict({'type': ''}) layer = OrderedDict({'type': conv_type, 'stride': [1, 1], 'kernel': DB_conv_kernel, 'features': growth_rate, 'activation': 'relu', 'padding': 'SAME', 'batch_norm': True, 'dropout':dropout_prob}) 'activation': activation, 'padding': 'SAME', 'batch_norm': True, 'dropout':dropout_prob}) pool = OrderedDict({'type': 'pooling', 'stride': [2, 2], 'kernel': [2, 2], 'pool_type': 'max','padding':'SAME'}) Loading
stemdl/runtime.py +4 −10 Original line number Diff line number Diff line Loading @@ -281,7 +281,7 @@ def train(network_config, hyper_params, params, gpu_id=None): # Gather all training related ops into a single one. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) increment_op = tf.assign_add(global_step, 1) ema = tf.train.ExponentialMovingAverage(decay=0.999, num_updates=global_step) ema = tf.train.ExponentialMovingAverage(decay=0.9, num_updates=global_step) all_ops = tf.group(*([train_opt] + update_ops + IO_ops + [increment_op])) with tf.control_dependencies([all_ops]): Loading Loading @@ -407,10 +407,10 @@ def train(network_config, hyper_params, params, gpu_id=None): train_elf.before_run() # Here we do validation: if doValidate: val = validate(network_config, hyper_params, params, sess, dset) val = validate(network_config, hyper_params, params, sess, dset, num_batches=50) val_results.append((train_elf.last_step,val)) if doFinish: val = validate(network_config, hyper_params, params, sess, dset) val = validate(network_config, hyper_params, params, sess, dset, num_batches=50) val_results.append((train_elf.last_step, val)) tf.reset_default_graph() tf.keras.backend.clear_session() Loading Loading @@ -532,9 +532,8 @@ def validate(network_config, hyper_params, params, sess, dset, num_batches=10): error_averaging = hvd.allreduce(errors) if num_batches is not None: num_samples = num_batches else: elif num_batches > dset.num_samples: num_samples = dset.num_samples #error = np.array([sess.run([IO_ops,error_averaging])[-1] for i in range(4)]) errors = np.array([sess.run([IO_ops,error_averaging])[-1] for i in range(num_samples//params['batch_size'])]) result = errors.mean() print_rank('Validation Reconstruction Error %s: %3.3e' % (loss_label, errors.mean())) Loading Loading @@ -563,11 +562,6 @@ def validate(network_config, hyper_params, params, sess, dset, num_batches=10): else: num_samples = dset.num_samples errors = np.array([sess.run([IO_ops,error_averaging])[-1] for i in range(num_samples//params['batch_size'])]) # errors = np.array([sess.run([IO_ops,errors])[-1] for i in range(dset.num_samples)]) # errors = tf.reduce_mean(errors) # avg_errors = hvd.allreduce(tf.expand_dims(errors, axis=0)) # error = sess.run(avg_errors) # print_rank('Validation Reconstruction Error %s: %3.3e' % (loss_label, errors.mean())) result = errors.mean() print_rank('Validation Reconstruction Error %s: %3.3e' % (loss_label, errors.mean())) tf.summary.scalar("Validation_loss_label_%s" % loss_label, tf.constant(errors.mean())) Loading