Loading namsa/msa.py +9 −44 Original line number Diff line number Diff line Loading @@ -9,16 +9,15 @@ import ctypes import sys from warnings import warn, catch_warnings, simplefilter import os try: import pyfftw except: pyfftw=None import pycuda import pycuda.driver as cuda from pycuda import gpuarray import skcuda.fft as skfft import skcuda.cufft as cufft from time import time from time import time, sleep import h5py from mpi4py import MPI Loading @@ -44,27 +43,19 @@ def setup_device(gpu_id=0): cuda.init() dev = cuda.Device(gpu_id) ctx = dev.make_context() # ctx.attach() gpu_id = gpu_id import atexit def _clean_up(): global ctx if ctx is not None: try:#global ctx #ctx.push() try: ctx.pop() ctx.detach() #ctx = None except Exception as e: warn(format(e)) from pycuda.tools import clear_context_caches clear_context_caches() atexit.register(_clean_up) return ctx # return context in case of manual clean-up return ctx class MSA(object): def __init__(self, energy, semi_angle, supercell, sampling=np.array([512, 512]), max_angle=None, verbose=False, Loading Loading @@ -356,37 +347,11 @@ class MSA(object): class MSAHybrid(MSA): def setup_device(self, gpu_rank=0): global ctx cuda.init() dev = cuda.Device(gpu_rank) ctx = dev.make_context() # ctx.attach() self.gpu_rank = gpu_rank import atexit def _clean_up(): global ctx if ctx is not None: try:#global ctx #ctx.push() ctx.pop() ctx.detach() #ctx = None except: pass from pycuda.tools import clear_context_caches clear_context_caches() atexit.register(_clean_up) return ctx # return context in case of manual clean-up def plan_simulation(self, num_probes=None): if num_probes is None: num_probes = self.num_probes self.print_verbose('Simulation requested %d probes simultaneously.' % self.num_probes) free_mem, tot_mem = pycuda.driver.mem_get_info() free_mem, tot_mem = cuda.mem_get_info() free_mem = free_mem/1024e6 # in GB mem_alloc = num_probes * np.prod(self.sampling) * 8 / 1024e6 + self.potential_slices.nbytes / 1024e6 self.print_verbose('mem_alloc: %2.3f' % mem_alloc) Loading Loading @@ -425,9 +390,9 @@ class MSAHybrid(MSA): self.print_verbose('Spent %2.4f s building %d probes on cpu' % (sim_t, self.max_probes)) # Copy over to device trans_gpu = pycuda.gpuarray.to_gpu_async(slices) mask_propag_gpu = pycuda.gpuarray.to_gpu_async(mask * propag) probes_gpu = pycuda.gpuarray.to_gpu_async(self.probes) trans_gpu = gpuarray.to_gpu_async(slices) mask_propag_gpu = gpuarray.to_gpu_async(mask * propag) probes_gpu = gpuarray.to_gpu_async(self.probes) # Setup fft plans # TODO: tile multiple fft plans Loading Loading @@ -892,7 +857,7 @@ class MSAGPU(MSAHybrid): class MSAMPI(MSAGPU): def __init__(self, *args, **kwargs): global comm #global comm comm = MPI.COMM_WORLD self.size = comm.Get_size() self.rank = comm.Get_rank() Loading namsa/scattering.py +1 −1 Original line number Diff line number Diff line Loading @@ -6,7 +6,7 @@ def get_kinematic_reflection(unit_cell, top=3): Find top hkl and d_hkl with highest intensity. """ xrd = XRDCalculator().get_pattern(unit_cell) hkls = np.array([list(itm.keys())[0] for itm in xrd.hkls]) hkls = np.array([itm[0]['hkl'] for itm in xrd.hkls]) intens = xrd.y if top > intens.size: top = intens.size Loading Loading
namsa/msa.py +9 −44 Original line number Diff line number Diff line Loading @@ -9,16 +9,15 @@ import ctypes import sys from warnings import warn, catch_warnings, simplefilter import os try: import pyfftw except: pyfftw=None import pycuda import pycuda.driver as cuda from pycuda import gpuarray import skcuda.fft as skfft import skcuda.cufft as cufft from time import time from time import time, sleep import h5py from mpi4py import MPI Loading @@ -44,27 +43,19 @@ def setup_device(gpu_id=0): cuda.init() dev = cuda.Device(gpu_id) ctx = dev.make_context() # ctx.attach() gpu_id = gpu_id import atexit def _clean_up(): global ctx if ctx is not None: try:#global ctx #ctx.push() try: ctx.pop() ctx.detach() #ctx = None except Exception as e: warn(format(e)) from pycuda.tools import clear_context_caches clear_context_caches() atexit.register(_clean_up) return ctx # return context in case of manual clean-up return ctx class MSA(object): def __init__(self, energy, semi_angle, supercell, sampling=np.array([512, 512]), max_angle=None, verbose=False, Loading Loading @@ -356,37 +347,11 @@ class MSA(object): class MSAHybrid(MSA): def setup_device(self, gpu_rank=0): global ctx cuda.init() dev = cuda.Device(gpu_rank) ctx = dev.make_context() # ctx.attach() self.gpu_rank = gpu_rank import atexit def _clean_up(): global ctx if ctx is not None: try:#global ctx #ctx.push() ctx.pop() ctx.detach() #ctx = None except: pass from pycuda.tools import clear_context_caches clear_context_caches() atexit.register(_clean_up) return ctx # return context in case of manual clean-up def plan_simulation(self, num_probes=None): if num_probes is None: num_probes = self.num_probes self.print_verbose('Simulation requested %d probes simultaneously.' % self.num_probes) free_mem, tot_mem = pycuda.driver.mem_get_info() free_mem, tot_mem = cuda.mem_get_info() free_mem = free_mem/1024e6 # in GB mem_alloc = num_probes * np.prod(self.sampling) * 8 / 1024e6 + self.potential_slices.nbytes / 1024e6 self.print_verbose('mem_alloc: %2.3f' % mem_alloc) Loading Loading @@ -425,9 +390,9 @@ class MSAHybrid(MSA): self.print_verbose('Spent %2.4f s building %d probes on cpu' % (sim_t, self.max_probes)) # Copy over to device trans_gpu = pycuda.gpuarray.to_gpu_async(slices) mask_propag_gpu = pycuda.gpuarray.to_gpu_async(mask * propag) probes_gpu = pycuda.gpuarray.to_gpu_async(self.probes) trans_gpu = gpuarray.to_gpu_async(slices) mask_propag_gpu = gpuarray.to_gpu_async(mask * propag) probes_gpu = gpuarray.to_gpu_async(self.probes) # Setup fft plans # TODO: tile multiple fft plans Loading Loading @@ -892,7 +857,7 @@ class MSAGPU(MSAHybrid): class MSAMPI(MSAGPU): def __init__(self, *args, **kwargs): global comm #global comm comm = MPI.COMM_WORLD self.size = comm.Get_size() self.rank = comm.Get_rank() Loading
namsa/scattering.py +1 −1 Original line number Diff line number Diff line Loading @@ -6,7 +6,7 @@ def get_kinematic_reflection(unit_cell, top=3): Find top hkl and d_hkl with highest intensity. """ xrd = XRDCalculator().get_pattern(unit_cell) hkls = np.array([list(itm.keys())[0] for itm in xrd.hkls]) hkls = np.array([itm[0]['hkl'] for itm in xrd.hkls]) intens = xrd.y if top > intens.size: top = intens.size Loading