Commit 2cf564d5 authored by Somnath, Suhas's avatar Somnath, Suhas
Browse files

Merge branch 'cnms_dev' of https://github.com/pycroscopy/pycroscopy into cnms_dev

parents c21d523e 16f3c570
......@@ -22,7 +22,7 @@ Once a user converts their microscope's data format into an HDF5 format, by simp
2. Installation
---------------
Pycroscopy requires the installation of a development environment such as Spyder from Continuum or PyCharm.
Pycroscopy requires many commonly used python packages such as numpy, scipy etc. To simplify the installation process, we recommend the installation of Anaconda which contains most of the prerequisite packages as well as a development environment - Spyder.
1. Uninstall existing Python 2.7 distribution(s) if installed. Restart computer afterwards.
......@@ -41,3 +41,5 @@ Pycroscopy requires the installation of a development environment such as Spyder
pip install pycroscopy
4. Enjoy pycroscopy!
If you would like to quickly view HDF5 files generated by and used in pycroscopy, we recommend HDF View - available at <https://support.hdfgroup.org/products/java/hdfview/>
......@@ -29,7 +29,8 @@ MOCK_MODULES = ['numpy', 'scipy', 'matplotlib', 'matplotlib.pyplot',
'sklearn.utils.extmath', 'skimage', 'skimage.feature',
'skimage.measure', 'skimage.transform', 'matplotlib.patches',
'matplotlib.colors', 'numpy_groupies', 'scipy.linalg',
'skimage.data', 'skimage.io', 'skimage.util', 'igor']
'skimage.data', 'skimage.io', 'skimage.util', 'igor',
'sklearn.neighbors']
for mod_name in MOCK_MODULES:
sys.modules[mod_name] = mock.Mock()
# sys.modules.update((mod_name, mock.Mock()))
......
......@@ -57,6 +57,8 @@ class BELoopModel(Model):
def __init__(self, h5_main, variables=['DC_Offset'], parallel=True):
super(BELoopModel, self).__init__(h5_main, variables, parallel)
self._h5_group = None
self.h5_guess_parameters = None
self.h5_fit_parameters = None
self._sho_spec_inds = None
self._sho_spec_vals = None # used only at one location. can remove if deemed unnecessary
self._met_spec_inds = None
......@@ -113,41 +115,83 @@ class BELoopModel(Model):
return super(BELoopModel, self)._isLegal(h5_main, variables)
def simulate_script(self):
# def simulate_script(self):
#
# self._create_projection_datasets()
# max_pos, sho_spec_inds_per_forc, metrics_spec_inds_per_forc = self._get_sho_chunk_sizes(10, verbose=True)
#
# # turn this into a loop
# forc_chunk_index = 0
# pos_chunk_index = 0
#
# dc_vec, loops_2d, nd_mat_shape_dc_first, order_dc_offset_reverse = self._get_projection_data(
# forc_chunk_index, max_pos, metrics_spec_inds_per_forc, pos_chunk_index, sho_spec_inds_per_forc)
#
# # step 8: perform loop unfolding
# projected_loops_2d, loop_metrics_1d = self._project_loop_batch(dc_vec, np.transpose(loops_2d))
# print('Finished projecting all loops')
# print 'Projected loops of shape:', projected_loops_2d.shape, ', need to bring to:', nd_mat_shape_dc_first
# print 'Loop metrics of shape:', loop_metrics_1d.shape, ', need to bring to:', nd_mat_shape_dc_first[1:]
#
# # test the reshapes back
# projected_loops_2d = self._reshape_projected_loops_for_h5(projected_loops_2d,
# order_dc_offset_reverse,
# nd_mat_shape_dc_first)
# metrics_2d, success = self._reshape_results_for_h5(loop_metrics_1d, nd_mat_shape_dc_first)
def _set_guess(self, h5_guess):
"""
Setup to run the fit on an existing guess dataset. Sets the attributes
normally defined during doGuess.
self._create_projection_datasets()
max_pos, sho_spec_inds_per_forc, metrics_spec_inds_per_forc = self._get_sho_chunk_sizes(10, verbose=True)
Parameters
----------
h5_guess : h5py.Dataset
Dataset object containing the guesses
# turn this into a loop
forc_chunk_index = 0
pos_chunk_index = 0
"""
'''
Get the Spectroscopic and Position datasets from `self.h5_main`
'''
self._sho_spec_inds = getAuxData(self.h5_main, auxDataName=['Spectroscopic_Indices'])[0]
self._sho_spec_vals = getAuxData(self.h5_main, auxDataName=['Spectroscopic_Values'])[0]
self._sho_pos_inds = getAuxData(self.h5_main, auxDataName=['Position_Indices'])[0]
dc_vec, loops_2d, nd_mat_shape_dc_first, order_dc_offset_reverse = self._get_projection_data(
forc_chunk_index, max_pos, metrics_spec_inds_per_forc, pos_chunk_index, sho_spec_inds_per_forc)
'''
Find the Spectroscopic index for the DC_Offset
'''
dc_ind = np.argwhere(self._sho_spec_vals.attrs['labels'] == 'DC_Offset').squeeze()
self._dc_spec_index = dc_ind
self._dc_offset_index = 1 + dc_ind
# step 8: perform loop unfolding
projected_loops_2d, loop_metrics_1d = self._project_loop_batch(dc_vec, np.transpose(loops_2d))
print('Finished projecting all loops')
print 'Projected loops of shape:', projected_loops_2d.shape, ', need to bring to:', nd_mat_shape_dc_first
print 'Loop metrics of shape:', loop_metrics_1d.shape, ', need to bring to:', nd_mat_shape_dc_first[1:]
'''
Get the group and projection datasets
'''
self._h5_group = h5_guess.parent
self.h5_projected_loops = self._h5_group['Projected_Loops']
self.h5_loop_metrics = self._h5_group['Loop_Metrics']
self._met_spec_inds = self._h5_group['Loop_Metrics_Indices']
# test the reshapes back
projected_loops_2d = self._reshape_projected_loops_for_h5(projected_loops_2d,
order_dc_offset_reverse,
nd_mat_shape_dc_first)
metrics_2d, success = self._reshape_results_for_h5(loop_metrics_1d, nd_mat_shape_dc_first)
self.h5_guess = h5_guess
def doGuess(self, max_mem=None, processors=None, verbose=False):
def doGuess(self, max_mem=None, processors=None, verbose=False, get_loop_parameters=True):
"""
Parameters
----------
processors : uint, optional
Number of processors to use for computing. Currently this is a serial operation
Number of processors to use for computing. Currently this is a serial operation and this attribute is
ignored.
Default None, output of psutil.cpu_count - 2 is used
max_mem : uint, optional
Memory in MB to use for computation
Default None, available memory from psutil.virtual_memory is used
verbose : bool, optional
Whether or not to print debug statements
Default False
get_loop_parameters : bool, optional
Should the physical loop parameters be calculated after the guess is done
Default True
Returns
-------
......@@ -209,22 +253,44 @@ class BELoopModel(Model):
self._getDataChunk()
if get_loop_parameters:
self.h5_guess_parameters = self.extract_loop_parameters(self.h5_guess)
return self.h5_guess
def doFit(self, processors=None, max_mem=None, solver_type='least_squares', solver_options={'jac': '2-point'},
obj_func={'class': 'BE_Fit_Methods', 'obj_func': 'BE_LOOP', 'xvals': np.array([])}):
obj_func={'class': 'BE_Fit_Methods', 'obj_func': 'BE_LOOP', 'xvals': np.array([])},
get_loop_parameters=True, h5_guess=None):
"""
Fit the loops
:param processors:
:param solver_type:
:param solver_options:
:param obj_func:
:return:
"""
if self.h5_guess is None:
print("You need to guess before fitting\n")
return None
Parameters
----------
processors : uint, optional
Number of processors to use for computing. Currently this is a serial operation
Default None, output of psutil.cpu_count - 2 is used
max_mem : uint, optional
Memory in MB to use for computation
Default None, available memory from psutil.virtual_memory is used
solver_type : str
Which solver from scipy.optimize should be used to fit the loops
solver_options : dict of str
Parameters to be passed to the solver defined by `solver_type`
obj_func : dict of str
Dictionary defining the class and method for the loop residual function as well
as the parameters to be passed
get_loop_parameters : bool, optional
Should the physical loop parameters be calculated after the guess is done
Default True
h5_guess : h5py.Dataset
Existing guess to use as input to fit.
Default None
Returns
-------
results: list
List of the results returned by the solver
"""
if processors is None:
processors = self._maxCpus
else:
......@@ -233,6 +299,12 @@ class BELoopModel(Model):
if max_mem is None:
max_mem = self._maxDataChunk
if h5_guess is not None:
self._set_guess(h5_guess)
elif self.h5_guess is None:
print("You need to guess before fitting\n")
return None
self._createFitDataset()
self._get_sho_chunk_sizes(max_mem, verbose=True)
......@@ -282,7 +354,7 @@ class BELoopModel(Model):
self.fit = np.hstack(tuple(results))
self._setResults()
return results
elif legit_obj_func:
warn('Error: Solver "%s" does not exist!. For additional info see scipy.optimize\n' % (solver_type))
return None
......@@ -291,6 +363,11 @@ class BELoopModel(Model):
(obj_func['obj_func']))
return None
if get_loop_parameters:
self.h5_fit_parameters = self.extract_loop_parameters(self.h5_fit)
return results
def extract_loop_parameters(self, h5_loop_fit, nuc_threshold=0.03):
"""
Method to extract a set of physical loop parameters from a dataset of fit parameters
......
......@@ -141,7 +141,6 @@ class BESHOmodel(Model):
'''
copyAttributes(self.h5_guess, self.h5_fit, skip_refs=False)
def _getFrequencyVector(self):
"""
Assumes that the data is reshape-able
......@@ -231,13 +230,38 @@ class BESHOmodel(Model):
# ask super to take care of the rest, which is a standardized operation
super(BESHOmodel, self)._setResults(is_guess)
def _set_guess(self, h5_guess):
"""
Setup to run the fit on an existing guess dataset. Sets the attributes
normally defined during doGuess.
Parameters
----------
h5_guess : h5py.Dataset
Dataset object containing the guesses
"""
h5_spec_inds = getAuxData(self.h5_main, auxDataName=['Spectroscopic_Indices'])[0]
self.step_start_inds = np.where(h5_spec_inds[0] == 0)[0]
self.num_udvs_steps = len(self.step_start_inds)
# find the frequency vector and hold in memory
self._getFrequencyVector()
self.is_reshapable = isReshapable(self.h5_main, self.step_start_inds)
self.h5_guess = h5_guess
def doGuess(self, processors=None, strategy='complex_gaussian',
options={"peak_widths": np.array([10,200]),"peak_step":20}):
options={"peak_widths": np.array([10, 200]), "peak_step": 20}):
"""
Parameters
----------
data
processors: int
Number of processors to use during parallel guess
Default None, output of psutil.cpu_count - 2 is used
strategy: string
Default is 'Wavelet_Peaks'.
Can be one of ['wavelet_peaks', 'relative_maximum', 'gaussian_processes']. For updated list, run GuessMethods.methods
......@@ -245,10 +269,6 @@ class BESHOmodel(Model):
Default Options for wavelet_peaks{"peaks_widths": np.array([10,200]), "peak_step":20}.
Dictionary of options passed to strategy. For more info see GuessMethods documentation.
kwargs:
processors: int
number of processors to use. Default all processors on the system except for 1.
Returns
-------
......@@ -265,23 +285,25 @@ class BESHOmodel(Model):
options = {'frequencies': freq_vec}
super(BESHOmodel, self).doGuess(processors=processors, strategy=strategy, options=options)
def doFit(self, processors=None, solver_type='least_squares',solver_options={'jac':'cs'},
obj_func={'class': 'Fit_Methods', 'obj_func': 'SHO', 'xvals': np.array([])}):
def doFit(self, processors=None, solver_type='least_squares', solver_options={'jac':'cs'},
obj_func={'class': 'Fit_Methods', 'obj_func': 'SHO', 'xvals': np.array([])},
h5_guess=None):
"""
Parameters
----------
processors: int
Default is 1.
processors : int
Number of processors to use.
strategy: string
Default None, output of psutil.cpu_count - 2 is used
strategy : string
Default is 'Wavelet_Peaks'.
Can be one of ['wavelet_peaks', 'relative_maximum', 'gaussian_processes']. For updated list, run GuessMethods.methods
options: dict
options : dict
Default {"peaks_widths": np.array([10,200])}}.
Dictionary of options passed to strategy. For more info see GuessMethods documentation.
h5_guess : h5py.Dataset
Existing guess to use as input to fit.
Default None
Returns
-------
......@@ -292,11 +314,15 @@ class BESHOmodel(Model):
else:
processors = min(processors, self._maxCpus)
if h5_guess is not None:
self._set_guess(h5_guess)
self._createFitDatasets()
self._start_pos = 0
xvals = self.freq_vec
results = super(BESHOmodel, self).doFit(processors=processors, solver_type=solver_type, solver_options=solver_options,
obj_func={'class':'Fit_Methods','obj_func':'SHO', 'xvals':xvals})
results = super(BESHOmodel, self).doFit(processors=processors, solver_type=solver_type,
solver_options=solver_options,
obj_func={'class': 'Fit_Methods', 'obj_func': 'SHO', 'xvals': xvals})
return results
def _reformatResults(self, results, strategy='wavelet_peaks', verbose=False):
......
......@@ -29,6 +29,7 @@ class BEPSndfTranslator(Translator):
"""
Translates Band Excitation Polarization Switching (BEPS) datasets from .dat
files to .h5
"""
def translate(self, data_filepath, show_plots=True, save_plots=True, do_histogram=False, debug=False):
......@@ -52,6 +53,7 @@ class BEPSndfTranslator(Translator):
--------------
h5_path : String / unicode
Absolute path of the generated .h5 file
"""
## Read the parameter files
if debug:
......@@ -175,6 +177,7 @@ class BEPSndfTranslator(Translator):
Returns
-------
None
"""
print('Reading data file(s)')
self.dset_index = 0
......@@ -199,7 +202,7 @@ class BEPSndfTranslator(Translator):
self.ds_pixel_start_indx = pixel_ind
h5_refs = self.__initialize_meas_group(self.max_pixels - pixel_ind, current_pixels)
# print('reading Pixel {} of {}'.format(pixel_ind,self.max_pixels))
print('reading Pixel {} of {}'.format(pixel_ind,self.max_pixels))
self.__append_pixel_data(current_pixels)
prev_pixels = current_pixels
......@@ -230,6 +233,7 @@ class BEPSndfTranslator(Translator):
Returns
-------
None
"""
# Update the number of pixels in the attributes
meas_grp = self.ds_main.parent
......@@ -316,6 +320,7 @@ class BEPSndfTranslator(Translator):
---------
h5_refs : list of HDF5group and HDF5Dataset references
references of the written H5 datasets
"""
tot_bins = 0
......@@ -402,9 +407,9 @@ class BEPSndfTranslator(Translator):
ds_udvs_inds = MicroDataset('UDVS_Indices', self.spec_inds[1])
# ds_udvs_inds.attrs['labels'] = {'UDVS_step':(slice(None),)}
"""
'''
Create the Spectroscopic Values tables
"""
'''
spec_vals, spec_inds, spec_vals_labs, spec_vals_units, spec_vals_labs_names = \
createSpecVals(self.udvs_mat, spec_inds, bin_freqs, exec_bin_vec,
curr_parm_dict, np.array(self.udvs_labs), self.udvs_units)
......@@ -424,20 +429,15 @@ class BEPSndfTranslator(Translator):
ds_spec_mat.attrs[label] = names
ds_spec_vals_mat.attrs[label] = names
"""
'''
New Method for chunking the Main_Data dataset. Chunking is now done in N-by-N squares of UDVS steps by pixels.
N is determined dinamically based on the dimensions of the dataset. Currently it is set such that individual
chunks are less than 10kB in size.
Chris Smith -- csmith55@utk.edu
"""
'''
max_bins_per_pixel = np.max(pixel_bins.values())
"""
pixel_chunking = maxReadPixels(10240, num_pix, max_bins_per_pixel, np.dtype('complex64').itemsize)
chunking = np.floor(np.sqrt(pixel_chunking))
chunking = max(1, chunking)
chunking = min(actual_udvs_steps, num_pix, chunking)
"""
beps_chunks = calc_chunks([num_pix, tot_pts],
np.complex64(0).itemsize,
unit_chunks=(1, max_bins_per_pixel))
......@@ -491,18 +491,21 @@ class BEPSndfTranslator(Translator):
Returns
---------
None
"""
if self.__num_wave_types__ == 1 and not self.halve_udvs_steps:
"""Technically, this will be taken care of in the later (general) part but
since this condition is more common it is worth writing for specifically"""
data_vec = pixel_data[self.__unique_waves__[0]].spectrogram_vec
noise_mat = np.float32(pixel_data[self.__unique_waves__[0]].noise_floor_mat)
zero_pix = self.__unique_waves__[0]
data_vec = pixel_data[zero_pix].spectrogram_vec
noise_mat = np.float32(pixel_data[zero_pix].noise_floor_mat)
# Storing a list of lists since we don't know how many pixels we will find in this measurement group
self.pos_vals_list.append([pixel_data[0].x_value, pixel_data[0].y_value,
pixel_data[0].z_value])
self.pos_vals_list.append([pixel_data[zero_pix].x_value, pixel_data[zero_pix].y_value,
pixel_data[zero_pix].z_value])
else:
......@@ -581,6 +584,7 @@ class BEPSndfTranslator(Translator):
absolute file path of the UDVS spreadsheet
parms_mat_path : String / unicode
absolute filepath of the .mat parms file
"""
udvs_filepath = None
folder_path, tail = path.split(file_path)
......@@ -656,6 +660,7 @@ class BEPSndfTranslator(Translator):
-----------
ex_wfm : 1D numpy float array
Band Excitation waveform
"""
if not path.exists(filepath):
warn('BEPSndfTranslator - NO more_parms.mat file found')
......@@ -685,6 +690,7 @@ class BEPSndfTranslator(Translator):
units for columns in the UDVS table
UDVS_mat : 2D numpy float array
Contents of the UDVS table
"""
workbook = xlreader.open_workbook(udvs_filepath)
worksheet = workbook.sheet_by_index(0)
......@@ -726,6 +732,7 @@ class BEPSndfTranslator(Translator):
----------
uniq : 1D numpy array
Unique waveform types in format listed above
"""
sorted_all = np.unique(vec)
pos_vals = sorted_all[sorted_all >= 0]
......@@ -786,6 +793,7 @@ class BEPSndfParser(object):
scout : Boolean (optional. Default = true)
whether or not the parser should figure out basic details such as
the number of pixels, and the spatial dimensionality
"""
self.__file_handle__ = open(file_path, "rb")
self.__EOF__ = False
......@@ -805,6 +813,7 @@ class BEPSndfParser(object):
-------
wave_type : int
Wave type. Positive number means chirp up, negative number is chirp down.
"""
return self.__wave_type__
......@@ -816,6 +825,7 @@ class BEPSndfParser(object):
-------
num_pix : unsigned int
Number of pixels in this file
"""
return self.__num_pixels__
......@@ -846,6 +856,7 @@ class BEPSndfParser(object):
For phase checking, it is recommended that this function be modified to
also keep track of the byte positions of the pixels so that pixels can be
directly accessed if need be.
"""
count = 0
self.__num_pixels__ = 0
......@@ -950,7 +961,8 @@ class BEPSndfPixel(object):
data_vec : 1D float numpy array
Data contained within each pixel
harm: unsigned int
Harmonic of the BE waveform. absolute value of the wave type used to normalize the response waveform.
Harmonic of the BE waveform. absolute value of the wave type used to normalize the response waveform.
"""
harm = abs(harm)
......@@ -1060,8 +1072,8 @@ class BEPSndfPixel(object):
*Typical things that change during BEPS*
1. BE parameters:
a. Center Frequency, Band Width - changes in the BE_bin_w
b. Amplitude, Phase Variation, Band Edge Smoothing, Band Edge Trim - Harder to find out what happened exactly
- FFT should show changes
b. Amplitude, Phase Variation, Band Edge Smoothing, Band Edge Trim - Harder to find out what happened
exactly - FFT should show changes
c. BE repeats, desired duration - changes in the spectrogram length?
2. VS Parameters:
a. Amplitude, Phase shift - Changes in the AC_amp_vec / DC offset
......
......@@ -138,7 +138,7 @@ class OneViewTranslator(Translator):
num_files = scan_size_x * scan_size_y
h5_main, h5_mean_spec, h5_ronch = self._setupH5(usize, vsize, tmp.dtype.type,
h5_main, h5_mean_spec, h5_ronch = self._setupH5(usize, vsize, np.float32,
scan_size_x, scan_size_y,
image_parms)
......
......@@ -109,6 +109,12 @@ class PtychographyTranslator(Translator):
self.binning_func = block_reduce
self.bin_func = bin_func
if scan_size_x is None:
scan_size_x = int(np.floor(np.sqrt(len(file_list))))
scan_size_y = scan_size_x
if scan_size_y is None:
scan_size_y = int(np.floor(len(file_list) / scan_size_x))
num_files = scan_size_x*scan_size_y
h5_main, h5_mean_spec, h5_ronch = self._setupH5(usize, vsize, np.float32, scan_size_x, scan_size_y)
......
......@@ -295,9 +295,18 @@ class ImageWindow(object):
@staticmethod
def abs_fft_func(image):
"""
Take the 2d FFT of each window in `windows` and return in the proper form.
:param image:
:return:
Parameters
----------
image : numpy.ndarray
Windowed image to take the FFT of
Returns
-------
windows : numpy.ndarray
Array of the Magnitude of the FFT of each window for the input
`image`
"""
windows = np.empty_like(image, dtype=absfft32)
windows['FFT Magnitude'] = np.abs(np.fft.fftshift(np.fft.fft2(image)))
......@@ -309,8 +318,17 @@ class ImageWindow(object):
"""
Take the 2d FFT of each window in `windows` and return in the proper form.
:param windows:
:return:
Parameters
----------
image : numpy.ndarray
Windowed image to take the FFT of
Returns
-------
windows : numpy.ndarray
Array of windows and the Magnitude of the FFT of each window for the input
`image`
"""
windows = np.empty_like(image, dtype=winabsfft32)
windows['Image Data'] = image
......@@ -323,8 +341,16 @@ class ImageWindow(object):
"""
Take the 2d FFT of each window in `windows` and return in the proper form.
:param image:
:return:
Parameters
----------
image : numpy.ndarray
Windowed image to take the FFT of
Returns
-------
windows : numpy.ndarray
Array of windows and the FFT of each window for the input `image`
"""
windows = np.empty_like(image, dtype=wincompfft32)
windows['Image Data'] = image
......@@ -334,92 +360,6 @@ class ImageWindow(object):
return windows
# def clean_windows(self, h5_win=None, n_comp=None):
# """
# Rebuild the Image from the SVD results on the windows.
# Optionally, only use components less than n_comp.
#
# Parameters