Commit ae889eb0 authored by Laanait, Nouamane's avatar Laanait, Nouamane
Browse files

mods to smc scripts

parent de564fd3
Loading
Loading
Loading
Loading
+22 −23
Original line number Diff line number Diff line
@@ -26,7 +26,7 @@ def simulate(filehandle, cif_path, idx= None, gpu_id=0, clean_up=False):
    slab_t = sim_params['slab_t']
    sim_params['space_group']= spgroup_num
    sim_params['material'] = matname
    energies = [100, 125, 150, 175, 200]
    energies = np.arange(100,200,10)
    for (sample_idx, energy) in enumerate(energies):
        try:
            cbed_stack = []
@@ -75,23 +75,19 @@ def simulate(filehandle, cif_path, idx= None, gpu_id=0, clean_up=False):
                wrong_shape = cbed.shape != (512, 512) 
                if has_nan: 
                    print('rank=%d, skipped simulation=%s, index=%d, error=NaN' % (comm_rank, cif_path, sample_idx))
                    break
                    pass
                elif wrong_shape:
                    print('rank=%d, skipped simulation=%s, index=%d, error=wrong cbed shape' % (comm_rank, cif_path, sample_idx))
                    break
                    pass
                else:
                    cbed_stack.append(cbed)


            # write to h5 / tfrecords / lmdb
            if len(cbed_stack) != 3:
                break
                pass
            else:
                cbed_stack = np.stack(cbed_stack)
            if cbed_stack.shape != (3, 512, 512):
                print('rank=%d, skipped simulation=%s, index=%d, error=Wrong cbed_stack Shape' % (comm_rank, cif_path, sample_idx))
                break
            else:
                g = filehandle.create_group('sample_%d_%d' % (idx, sample_idx))
                g.attrs['space_group'] = np.string_(sim_params['space_group'])
                g.attrs['material'] = np.string_(sim_params['material'])
@@ -125,7 +121,7 @@ def generate_data(samples, outdir_path, mode='train', runtime=2000):
    num_sims = samples.size
    h5path = os.path.join(outdir_path, 'batch_%s_%d.h5'% (mode, comm_rank))
    mode = 'w'
    with h5py.File(h5path, mode=mode) as f:
    f = h5py.File(h5path, mode=mode)
    for (idx, cif_path) in enumerate(samples[comm_rank:num_sims:comm_size]):
        manual = idx < ( num_sims - comm_size) 
        spgroup_num, matname = parse_cif_path(cif_path)
@@ -134,8 +130,10 @@ def generate_data(samples, outdir_path, mode='train', runtime=2000):
        if (time() - t_elaps) < runtime:
            simulate(f, cif_path, idx=idx, gpu_id=int(np.mod(comm_rank, 6)), clean_up=manual)
        else:
#                 f.flush()
                break
            f.flush()
            f.close()
            return
    return
            
def get_samples(cif_paths, ratio=0.9):
    samples = cif_paths
@@ -152,6 +150,7 @@ def get_samples(cif_paths, ratio=0.9):
        samples_train = samples[:train_size]
        samples_dev = samples[train_size:train_size + int(remain * train_size)]
        samples_test = samples[train_size + int(remain * train_size):]
        if comm_rank == 0:
            print('samples sizes (train, dev, test): %d, %d, %d' %(samples_train.size, samples_dev.size, samples_test.size))
        return samples_train, samples_dev, samples_test
    return samples
@@ -161,11 +160,11 @@ def main(cifdir_path, outdir_path, runtime=1800):
    t_elaps = time()
    cif_paths = get_cif_paths(cifdir_path)
    samples_train, samples_dev, samples_test = get_samples(cif_paths, ratio=0.9)
    generate_data(samples_train, outdir_path, mode='train', runtime=runtime*0.9)
    generate_data(samples_train, outdir_path, mode='train', runtime=runtime*0.8)
    print('rank=%d, finished simulating training data' % comm_rank)
    generate_data(samples_test, outdir_path, mode='dev', runtime=runtime*0.95)
    generate_data(samples_dev, outdir_path, mode='dev', runtime=runtime*0.9)
    print('rank=%d, finished simulating dev data' % comm_rank)
    generate_data(samples_dev, outdir_path, mode='test', runtime=runtime)
    generate_data(samples_test, outdir_path, mode='test', runtime=runtime)
    print('rank=%d, finished simulating test data' % comm_rank)
    return
            
+10 −6
Original line number Diff line number Diff line
@@ -54,8 +54,8 @@ def get_cell_orientation(vec):
    def func(x):
        return np.abs(np.dot(vec,x))
    res = minimize(func, np.array([-1,1,-1]), method='CG')
    if np.abs(res.x.sum()) < 1e-4:
        return np.array([0,0,1])
    if np.sum(np.abs(res.x)) < 1e-4:
        return np.array([1,0,0]) 
    return res.x
    

@@ -170,7 +170,7 @@ def update_sim_params(sim_params, msa_cls=None, sp_cls=None):
            sim_params['angles'] = str(e)
    return sim_params

def get_sim_params(sp_cell, slab_t= 200, sampling=np.array([512,512]), d_cutoff=4, grid_steps=np.array([32, 32]),
def get_sim_params(sp_cell, slab_t= 100, sampling=np.array([512,512]), d_cutoff=4, grid_steps=np.array([32, 32]),
                   cell_dim = 100, energy=100e3, orientation_num=3, beam_overlap=1):
    """
    return a dict object to set params of simulation and write to h5.
@@ -178,12 +178,16 @@ def get_sim_params(sp_cell, slab_t= 200, sampling=np.array([512,512]), d_cutoff=
    sim_params= dict()
    
    # scattering params
    hkls, dhkls = get_kinematic_reflection(sp_cell.structure, top=orientation_num)
    hkls, dhkls = get_kinematic_reflection(sp_cell.structure, top=3)
    if hkls[0].size > 3: # hexagonal systems    
        hkls = np.array([[itm[0], itm[1], itm[-1]] for itm in hkls])
    cutoff = np.logical_and(dhkls < 5., dhkls > 1.) # not considering less than 5 ang. d-spacing
#     if dhkls[cutoff].size > 1:
    hkls, dhkls = hkls[cutoff], dhkls[cutoff]
    if dhkls[cutoff].size > 2:
        hkls = hkls[cutoff]
        dhkls = dhkls[cutoff]
    if dhkls.size > orientation_num:
        dhkls = dhkls[:orientation_num]
        hkls = hkls[:orientation_num]
    y_dirs = np.array([get_cell_orientation(z_dir) for z_dir in hkls])
    semi_angles, _, _ = overlap_params(0.5, dhkls, voltage2Lambda(energy))
    sim_params['y_dirs'] = y_dirs