Loading scripts/summit_scripts/smc_19_sim.py +22 −23 Original line number Diff line number Diff line Loading @@ -26,7 +26,7 @@ def simulate(filehandle, cif_path, idx= None, gpu_id=0, clean_up=False): slab_t = sim_params['slab_t'] sim_params['space_group']= spgroup_num sim_params['material'] = matname energies = [100, 125, 150, 175, 200] energies = np.arange(100,200,10) for (sample_idx, energy) in enumerate(energies): try: cbed_stack = [] Loading Loading @@ -75,23 +75,19 @@ def simulate(filehandle, cif_path, idx= None, gpu_id=0, clean_up=False): wrong_shape = cbed.shape != (512, 512) if has_nan: print('rank=%d, skipped simulation=%s, index=%d, error=NaN' % (comm_rank, cif_path, sample_idx)) break pass elif wrong_shape: print('rank=%d, skipped simulation=%s, index=%d, error=wrong cbed shape' % (comm_rank, cif_path, sample_idx)) break pass else: cbed_stack.append(cbed) # write to h5 / tfrecords / lmdb if len(cbed_stack) != 3: break pass else: cbed_stack = np.stack(cbed_stack) if cbed_stack.shape != (3, 512, 512): print('rank=%d, skipped simulation=%s, index=%d, error=Wrong cbed_stack Shape' % (comm_rank, cif_path, sample_idx)) break else: g = filehandle.create_group('sample_%d_%d' % (idx, sample_idx)) g.attrs['space_group'] = np.string_(sim_params['space_group']) g.attrs['material'] = np.string_(sim_params['material']) Loading Loading @@ -125,7 +121,7 @@ def generate_data(samples, outdir_path, mode='train', runtime=2000): num_sims = samples.size h5path = os.path.join(outdir_path, 'batch_%s_%d.h5'% (mode, comm_rank)) mode = 'w' with h5py.File(h5path, mode=mode) as f: f = h5py.File(h5path, mode=mode) for (idx, cif_path) in enumerate(samples[comm_rank:num_sims:comm_size]): manual = idx < ( num_sims - comm_size) spgroup_num, matname = parse_cif_path(cif_path) Loading @@ -134,8 +130,10 @@ def generate_data(samples, outdir_path, mode='train', runtime=2000): if (time() - t_elaps) < runtime: simulate(f, cif_path, idx=idx, gpu_id=int(np.mod(comm_rank, 6)), clean_up=manual) else: # f.flush() break f.flush() f.close() return return def get_samples(cif_paths, ratio=0.9): samples = cif_paths Loading @@ -152,6 +150,7 @@ def get_samples(cif_paths, ratio=0.9): samples_train = samples[:train_size] samples_dev = samples[train_size:train_size + int(remain * train_size)] samples_test = samples[train_size + int(remain * train_size):] if comm_rank == 0: print('samples sizes (train, dev, test): %d, %d, %d' %(samples_train.size, samples_dev.size, samples_test.size)) return samples_train, samples_dev, samples_test return samples Loading @@ -161,11 +160,11 @@ def main(cifdir_path, outdir_path, runtime=1800): t_elaps = time() cif_paths = get_cif_paths(cifdir_path) samples_train, samples_dev, samples_test = get_samples(cif_paths, ratio=0.9) generate_data(samples_train, outdir_path, mode='train', runtime=runtime*0.9) generate_data(samples_train, outdir_path, mode='train', runtime=runtime*0.8) print('rank=%d, finished simulating training data' % comm_rank) generate_data(samples_test, outdir_path, mode='dev', runtime=runtime*0.95) generate_data(samples_dev, outdir_path, mode='dev', runtime=runtime*0.9) print('rank=%d, finished simulating dev data' % comm_rank) generate_data(samples_dev, outdir_path, mode='test', runtime=runtime) generate_data(samples_test, outdir_path, mode='test', runtime=runtime) print('rank=%d, finished simulating test data' % comm_rank) return Loading scripts/summit_scripts/smc_19_utils.py +10 −6 Original line number Diff line number Diff line Loading @@ -54,8 +54,8 @@ def get_cell_orientation(vec): def func(x): return np.abs(np.dot(vec,x)) res = minimize(func, np.array([-1,1,-1]), method='CG') if np.abs(res.x.sum()) < 1e-4: return np.array([0,0,1]) if np.sum(np.abs(res.x)) < 1e-4: return np.array([1,0,0]) return res.x Loading Loading @@ -170,7 +170,7 @@ def update_sim_params(sim_params, msa_cls=None, sp_cls=None): sim_params['angles'] = str(e) return sim_params def get_sim_params(sp_cell, slab_t= 200, sampling=np.array([512,512]), d_cutoff=4, grid_steps=np.array([32, 32]), def get_sim_params(sp_cell, slab_t= 100, sampling=np.array([512,512]), d_cutoff=4, grid_steps=np.array([32, 32]), cell_dim = 100, energy=100e3, orientation_num=3, beam_overlap=1): """ return a dict object to set params of simulation and write to h5. Loading @@ -178,12 +178,16 @@ def get_sim_params(sp_cell, slab_t= 200, sampling=np.array([512,512]), d_cutoff= sim_params= dict() # scattering params hkls, dhkls = get_kinematic_reflection(sp_cell.structure, top=orientation_num) hkls, dhkls = get_kinematic_reflection(sp_cell.structure, top=3) if hkls[0].size > 3: # hexagonal systems hkls = np.array([[itm[0], itm[1], itm[-1]] for itm in hkls]) cutoff = np.logical_and(dhkls < 5., dhkls > 1.) # not considering less than 5 ang. d-spacing # if dhkls[cutoff].size > 1: hkls, dhkls = hkls[cutoff], dhkls[cutoff] if dhkls[cutoff].size > 2: hkls = hkls[cutoff] dhkls = dhkls[cutoff] if dhkls.size > orientation_num: dhkls = dhkls[:orientation_num] hkls = hkls[:orientation_num] y_dirs = np.array([get_cell_orientation(z_dir) for z_dir in hkls]) semi_angles, _, _ = overlap_params(0.5, dhkls, voltage2Lambda(energy)) sim_params['y_dirs'] = y_dirs Loading Loading
scripts/summit_scripts/smc_19_sim.py +22 −23 Original line number Diff line number Diff line Loading @@ -26,7 +26,7 @@ def simulate(filehandle, cif_path, idx= None, gpu_id=0, clean_up=False): slab_t = sim_params['slab_t'] sim_params['space_group']= spgroup_num sim_params['material'] = matname energies = [100, 125, 150, 175, 200] energies = np.arange(100,200,10) for (sample_idx, energy) in enumerate(energies): try: cbed_stack = [] Loading Loading @@ -75,23 +75,19 @@ def simulate(filehandle, cif_path, idx= None, gpu_id=0, clean_up=False): wrong_shape = cbed.shape != (512, 512) if has_nan: print('rank=%d, skipped simulation=%s, index=%d, error=NaN' % (comm_rank, cif_path, sample_idx)) break pass elif wrong_shape: print('rank=%d, skipped simulation=%s, index=%d, error=wrong cbed shape' % (comm_rank, cif_path, sample_idx)) break pass else: cbed_stack.append(cbed) # write to h5 / tfrecords / lmdb if len(cbed_stack) != 3: break pass else: cbed_stack = np.stack(cbed_stack) if cbed_stack.shape != (3, 512, 512): print('rank=%d, skipped simulation=%s, index=%d, error=Wrong cbed_stack Shape' % (comm_rank, cif_path, sample_idx)) break else: g = filehandle.create_group('sample_%d_%d' % (idx, sample_idx)) g.attrs['space_group'] = np.string_(sim_params['space_group']) g.attrs['material'] = np.string_(sim_params['material']) Loading Loading @@ -125,7 +121,7 @@ def generate_data(samples, outdir_path, mode='train', runtime=2000): num_sims = samples.size h5path = os.path.join(outdir_path, 'batch_%s_%d.h5'% (mode, comm_rank)) mode = 'w' with h5py.File(h5path, mode=mode) as f: f = h5py.File(h5path, mode=mode) for (idx, cif_path) in enumerate(samples[comm_rank:num_sims:comm_size]): manual = idx < ( num_sims - comm_size) spgroup_num, matname = parse_cif_path(cif_path) Loading @@ -134,8 +130,10 @@ def generate_data(samples, outdir_path, mode='train', runtime=2000): if (time() - t_elaps) < runtime: simulate(f, cif_path, idx=idx, gpu_id=int(np.mod(comm_rank, 6)), clean_up=manual) else: # f.flush() break f.flush() f.close() return return def get_samples(cif_paths, ratio=0.9): samples = cif_paths Loading @@ -152,6 +150,7 @@ def get_samples(cif_paths, ratio=0.9): samples_train = samples[:train_size] samples_dev = samples[train_size:train_size + int(remain * train_size)] samples_test = samples[train_size + int(remain * train_size):] if comm_rank == 0: print('samples sizes (train, dev, test): %d, %d, %d' %(samples_train.size, samples_dev.size, samples_test.size)) return samples_train, samples_dev, samples_test return samples Loading @@ -161,11 +160,11 @@ def main(cifdir_path, outdir_path, runtime=1800): t_elaps = time() cif_paths = get_cif_paths(cifdir_path) samples_train, samples_dev, samples_test = get_samples(cif_paths, ratio=0.9) generate_data(samples_train, outdir_path, mode='train', runtime=runtime*0.9) generate_data(samples_train, outdir_path, mode='train', runtime=runtime*0.8) print('rank=%d, finished simulating training data' % comm_rank) generate_data(samples_test, outdir_path, mode='dev', runtime=runtime*0.95) generate_data(samples_dev, outdir_path, mode='dev', runtime=runtime*0.9) print('rank=%d, finished simulating dev data' % comm_rank) generate_data(samples_dev, outdir_path, mode='test', runtime=runtime) generate_data(samples_test, outdir_path, mode='test', runtime=runtime) print('rank=%d, finished simulating test data' % comm_rank) return Loading
scripts/summit_scripts/smc_19_utils.py +10 −6 Original line number Diff line number Diff line Loading @@ -54,8 +54,8 @@ def get_cell_orientation(vec): def func(x): return np.abs(np.dot(vec,x)) res = minimize(func, np.array([-1,1,-1]), method='CG') if np.abs(res.x.sum()) < 1e-4: return np.array([0,0,1]) if np.sum(np.abs(res.x)) < 1e-4: return np.array([1,0,0]) return res.x Loading Loading @@ -170,7 +170,7 @@ def update_sim_params(sim_params, msa_cls=None, sp_cls=None): sim_params['angles'] = str(e) return sim_params def get_sim_params(sp_cell, slab_t= 200, sampling=np.array([512,512]), d_cutoff=4, grid_steps=np.array([32, 32]), def get_sim_params(sp_cell, slab_t= 100, sampling=np.array([512,512]), d_cutoff=4, grid_steps=np.array([32, 32]), cell_dim = 100, energy=100e3, orientation_num=3, beam_overlap=1): """ return a dict object to set params of simulation and write to h5. Loading @@ -178,12 +178,16 @@ def get_sim_params(sp_cell, slab_t= 200, sampling=np.array([512,512]), d_cutoff= sim_params= dict() # scattering params hkls, dhkls = get_kinematic_reflection(sp_cell.structure, top=orientation_num) hkls, dhkls = get_kinematic_reflection(sp_cell.structure, top=3) if hkls[0].size > 3: # hexagonal systems hkls = np.array([[itm[0], itm[1], itm[-1]] for itm in hkls]) cutoff = np.logical_and(dhkls < 5., dhkls > 1.) # not considering less than 5 ang. d-spacing # if dhkls[cutoff].size > 1: hkls, dhkls = hkls[cutoff], dhkls[cutoff] if dhkls[cutoff].size > 2: hkls = hkls[cutoff] dhkls = dhkls[cutoff] if dhkls.size > orientation_num: dhkls = dhkls[:orientation_num] hkls = hkls[:orientation_num] y_dirs = np.array([get_cell_orientation(z_dir) for z_dir in hkls]) semi_angles, _, _ = overlap_params(0.5, dhkls, voltage2Lambda(energy)) sim_params['y_dirs'] = y_dirs Loading