minor mods to smc scripts and context creation has been pulled out of the MSA... (b8df5eb3) · Commits · disMultiABM / namsa

namsa/msa.py

+7 −6

Original line number	Diff line number	Diff line
		@@ -455,7 +455,8 @@ class MSAGPU(MSAHybrid):
		from pycuda.tools import clear_context_caches
		clear_context_caches()

		def build_potential_slices(self, slice_thickness):
		def build_potential_slices(self, ctx, slice_thickness):
		self.ctx = ctx
		# find number of slices and atomic sites per slice
		self.slice_t = slice_thickness
		self.num_slices = np.int32(np.floor(self.dims[-1] / slice_thickness))
		@@ -507,9 +508,9 @@ class MSAGPU(MSAHybrid):
		# build potential
		build_potential(potential_slices_d, atom_pot_stack_d, sites_d,
		np.float32(self.sigma), block=block, grid=grid)
		ctx.synchronize()
		self.ctx.synchronize()
		cuda.memcpy_dtoh_async(self.potential_slices, potential_slices_d, cuda.Stream())

		self.ctx.synchronize()
		# free gpu memory
		sites_d.free()
		atom_pot_stack_d.free()
		@@ -593,7 +594,7 @@ class MSAGPU(MSAHybrid):
		# build probe in x-space
		fft_plan = skfft.Plan(self.psi_k.shape, np.complex64, np.complex64, batch=1)
		cufft.cufftExecC2C(fft_plan.handle, int(psi_k_d), int(psi_x_d), cufft.CUFFT_INVERSE)
		ctx.synchronize()
		self.ctx.synchronize()
		fftshift_func(psi_x_d, shape_y, block=block, grid=grid)
		#ctx.synchronize()
		cuda.memcpy_dtoh_async(self.psi, psi_x_d, cuda.Stream())
		@@ -760,7 +761,7 @@ class MSAGPU(MSAHybrid):
		self.print_debug('batch: %s' % format(batch))
		self.__propagate_beams(num_probes, batch, probe_d, propag_d, psi_k_d, norm_const, grid_steps_d, grid_range_d,
		self.probes[phase][batch], plan, ones_d, stream, transmit=transmit)
		ctx.synchronize()
		self.ctx.synchronize()
		# 4. clean-up
		for plan, probe_d, norm_const in zip(plans, probes_d, norm_consts):
		cufft.cufftDestroy(plan.handle)
		@@ -768,7 +769,7 @@ class MSAGPU(MSAHybrid):
		norm_const.free()
		# ctx.synchronize()
		del probe_d, norm_const, plan
		ctx.synchronize()
		self.ctx.synchronize()
		self.print_verbose('finished simulation phase #%d' % i)
		self.probes /= self.normalization
		# self.probes[phase][batch] = self.probes[phase][batch]/self.normalization

namsa/msa_bkp.py

0 → 100644

+972 −0

File added.

Preview size limit exceeded, changes collapsed.

scripts/summit_scripts/smc_19_sim.py

+60 −19

Original line number	Diff line number	Diff line
		@@ -8,17 +8,44 @@ from mpi4py import MPI
		from itertools import chain, product
		import tensorflow as tf
		import lmdb
		import pycuda.driver as cuda
		import pycuda

		comm = MPI.COMM_WORLD
		comm_size = comm.Get_size()
		comm_rank = comm.Get_rank()

		def setup_device(gpu_rank=0):
		global ctx
		cuda.init()
		dev = cuda.Device(gpu_rank)
		ctx = dev.make_context()
		import atexit
		def _clean_up():
		global ctx
		if ctx is not None:
		try:#global ctx
		#ctx.push()
		ctx.pop()
		ctx.detach()
		#ctx = None
		except:
		pass
		from pycuda.tools import clear_context_caches
		clear_context_caches()
		atexit.register(_clean_up)
		return ctx

		def simulate(filehandle, cif_path, idx= None, gpu_ctx=None, clean_up=False):

		def simulate(filehandle, cif_path, idx= None, gpu_id=0, clean_up=False):
		# load cif and get sim params
		spgroup_num, matname = parse_cif_path(cif_path)
		sp = SupercellBuilder(cif_path, verbose=False, debug=False)
		latts = np.array(sp.structure.lattice.abc)
		if np.any(latts >= 10.):
		print('rank=%d, skipped simulation=%s, latt. const. too large=%s' % (comm_rank, cif_path, format(latts)))
		return

		sim_params = get_sim_params(sp, grid_steps=np.array([8,8]), orientation_num=3)
		z_dirs = sim_params['z_dirs']
		y_dirs = sim_params['y_dirs']
		@@ -27,6 +54,8 @@ def simulate(filehandle, cif_path, idx= None, gpu_id=0, clean_up=False):
		sim_params['space_group']= spgroup_num
		sim_params['material'] = matname
		energies = np.arange(100,200,10)

		# ctx = msa.setup_device(gpu_rank=gpu_id)
		for (sample_idx, energy) in enumerate(energies):
		try:
		cbed_stack = []
		@@ -47,9 +76,9 @@ def simulate(filehandle, cif_path, idx= None, gpu_id=0, clean_up=False):
		# simulate
		msa = MSAGPU(energy, semi_angle, sp.supercell_sites, sampling=sampling,
		verbose=False, debug=False)
		ctx = msa.setup_device(gpu_rank=gpu_id)
		# ctx = msa.setup_device(gpu_rank=gpu_id)
		msa.calc_atomic_potentials()
		msa.build_potential_slices(slice_thickness)
		msa.build_potential_slices(gpu_ctx, slice_thickness)
		msa.build_probe(probe_dict=probe_params)
		msa.generate_probe_positions(grid_steps=grid_steps)
		msa.plan_simulation()
		@@ -57,24 +86,33 @@ def simulate(filehandle, cif_path, idx= None, gpu_id=0, clean_up=False):

		# process cbed
		cbed = msa.probes.mean(0)

		pot_isnan = np.isnan(msa.potential_slices)
		# update sim_params dict
		sim_params = update_sim_params(sim_params, msa_cls=msa, sp_cls=sp)

		#
		# clean-up context and/or allocated memory
		if clean_up and ctx is not None:
		msa.clean_up(ctx=ctx, vars=msa.vars)
		del msa
		else:
		msa.clean_up(ctx=None, vars=msa.vars)
		del msa
		# if clean_up and ctx is not None:
		# msa.clean_up(ctx=ctx, vars=msa.vars)
		# # del msa
		# else:
		# msa.clean_up(ctx=None, vars=msa.vars)
		# # del msa

		# check data integrity
		# isnan = np.isnan(cbed)
		# print('cbed elements with nan', np.where(isnan == True)[0].size)
		has_nan = np.all(np.isnan(cbed))
		# pot_isnan = np.isnan(msa.potential_slices)
		# print('pot elements with nan', np.where(pot_isnan == True)[0].size)

		# if has_nan:
		# # print(z_dirs,y_dirs,sim_params['semi_angles'],sim_params['abc'], energy)
		# print('')
		wrong_shape = cbed.shape != (512, 512)
		if has_nan:
		print('rank=%d, skipped simulation=%s, index=%d, error=NaN' % (comm_rank, cif_path, sample_idx))
		print('rank=%d, skipped simulation=%s, index=%d, error=NaN, abc=%s' % (comm_rank, cif_path, sample_idx, format(sim_params['abc'])))
		pass
		elif wrong_shape:
		print('rank=%d, skipped simulation=%s, index=%d, error=wrong cbed shape' % (comm_rank, cif_path, sample_idx))
		@@ -106,8 +144,8 @@ def simulate(filehandle, cif_path, idx= None, gpu_id=0, clean_up=False):
		print("rank=%d, skipped simulation=%s, error=%s" % (comm_rank, cif_path, format(e)))
		finally:
		try:
		if clean_up and ctx is not None:
		msa.clean_up(ctx=ctx, vars=msa.vars)
		if clean_up and gpu_ctx is not None:
		msa.clean_up(ctx=gpu_ctx, vars=msa.vars)
		del msa
		else:
		msa.clean_up(ctx=None, vars=msa.vars)
		@@ -116,7 +154,7 @@ def simulate(filehandle, cif_path, idx= None, gpu_id=0, clean_up=False):
		pass


		def generate_data(samples, outdir_path, mode='train', runtime=2000):
		def generate_data(samples, outdir_path, mode='train', runtime=2000, gpu_ctx=None):
		t = time()
		num_sims = samples.size
		h5path = os.path.join(outdir_path, 'batch_%s_%d.h5'% (mode, comm_rank))
		@@ -128,11 +166,13 @@ def generate_data(samples, outdir_path, mode='train', runtime=2000):
		if comm_rank == 0 and bool(idx % 100):
		print('time=%3.2f, num_sims= %d' %(time() - t, idx * comm_size))
		if (time() - t_elaps) < runtime:
		simulate(f, cif_path, idx=idx, gpu_id=int(np.mod(comm_rank, 6)), clean_up=manual)
		simulate(f, cif_path, idx=idx, gpu_ctx=gpu_ctx, clean_up=False)
		else:
		f.flush()
		f.close()
		return
		f.flush()
		f.close()
		return

		def get_samples(cif_paths, ratio=0.9):
		@@ -159,12 +199,13 @@ def main(cifdir_path, outdir_path, runtime=1800):
		global t_elaps
		t_elaps = time()
		cif_paths = get_cif_paths(cifdir_path)
		ctx = setup_device(gpu_rank=int(np.mod(comm_rank, 6)))
		samples_train, samples_dev, samples_test = get_samples(cif_paths, ratio=0.9)
		generate_data(samples_train, outdir_path, mode='train', runtime=runtime*0.8)
		generate_data(samples_train, outdir_path, mode='train', runtime=runtime*0.8, gpu_ctx=ctx)
		print('rank=%d, finished simulating training data' % comm_rank)
		generate_data(samples_dev, outdir_path, mode='dev', runtime=runtime*0.9)
		generate_data(samples_dev, outdir_path, mode='dev', runtime=runtime*0.9, gpu_ctx=ctx)
		print('rank=%d, finished simulating dev data' % comm_rank)
		generate_data(samples_test, outdir_path, mode='test', runtime=runtime)
		generate_data(samples_test, outdir_path, mode='test', runtime=runtime, gpu_ctx=ctx)
		print('rank=%d, finished simulating test data' % comm_rank)
		return

Admin message