Commit 0ae6f8aa authored by Unknown's avatar Unknown
Browse files

PycroDataset updates

slice method now operates directly on the h5 dataset.
Added some documentation
parent 5ffa4753
......@@ -17,6 +17,32 @@ from .hdf_utils import checkIfMain, getAuxData, get_attr, get_data_descriptor, g
class PycroDataset(h5py.Dataset):
def __init__(self, h5_ref, sort_dims=False):
"""
New data object that extends the h5py.Dataset.
Parameters
----------
h5_ref : hdf5.Dataset
The base dataset to be extended
sort_dims : bool
Should the dimensions be sorted internally from fastest changing to slowest.
Methods
-------
Attributes
----------
h5_spec_vals : h5py.Dataset
Associated Spectroscopic Values dataset
h5_spec_inds : h5py.Dataset
Associated Spectroscopic Indices dataset
h5_pos_vals : h5py.Dataset
Associated Position Values dataset
h5_pos_inds : h5py.Dataset
Associated Position Indices dataset
"""
if not checkIfMain(h5_ref):
raise TypeError('Supply a h5py.Dataset object that is a pycroscopy main dataset')
......@@ -132,12 +158,21 @@ class PycroDataset(h5py.Dataset):
return get_unit_values(self.h5_spec_inds, self.h5_spec_vals)[dim_name]
def current_sorting(self):
"""
Prints the current sorting method.
"""
if self.__sort_dims:
print('Data dimensions are sorted in order from fastest changing dimension to slowest.')
else:
print('Data dimensions are in the order they occur in the file.')
def toggle_sorting(self):
"""
Toggles between sorting from fastest changing dimension to slowest and sorting based on the
order of the labels
"""
if self.__n_dim_data is not None:
if self.__sort_dims:
nd_sort = self.__pos_sort_order[::-1] + self.__spec_sort_order[::-1]
......@@ -149,6 +184,15 @@ class PycroDataset(h5py.Dataset):
self.__sort_dims = not self.__sort_dims
def get_n_dim_form(self):
"""
Reshapes the dataset to an N-dimensional array
Returns
-------
n_dim_data : numpy.ndarray
N-dimensional form of the dataset
"""
n_dim_data, success = reshape_to_Ndims(self, sort_dims=self.__sort_dims)
......@@ -159,15 +203,25 @@ class PycroDataset(h5py.Dataset):
return self.__n_dim_data
def slice(self, **kwargs):
def slice(self, **slice_dict):
"""
Slice the dataset based on an input dictionary of 'str': slice pairs.
Each string should correspond to a dimension label. The slices can be
array-likes or slice objects.
Parameters
----------
kwargs
slice_dict : dict
Dictionary of array-likes.
Returns
-------
data_slice : numpy.ndarray
Slice of the dataset. Dataset has been reshaped to N-dimensions if `success` is True, only
by Position dimensions if `success` is 'Positions', or not reshape at all if `success`
is False.
success : str or bool
Informs the user as to how the data_slice has been shaped.
"""
# Ensure that the n_dimensional data exists
......@@ -177,19 +231,24 @@ class PycroDataset(h5py.Dataset):
_ = self.get_n_dim_form()
# Create default slices that include the entire dimension
n_dim_slices = [slice(None) for _ in self.__n_dim_labs]
n_dim_slices = dict()
n_dim_slices_sizes = dict()
for dim_lab, dim_size in zip(self.n_dim_labels(), self.n_dim_sizes()):
n_dim_slices[dim_lab] = list(range(dim_size))
n_dim_slices_sizes[dim_lab] = len(n_dim_slices[dim_lab])
# Loop over all the keyword arguments and create slices for each.
for key, val in kwargs.items():
for key, val in slice_dict.items():
# Make sure the dimension is valid
if key not in self.__n_dim_labs:
raise KeyError('Cannot slice on dimension {}. '
'Valid dimensions are {}.'.format(key, self.__n_dim_labs.tolist()))
# Check the value and convert to a slice object if possible.
# Use a list if not.
if isinstance(val, slice) or isinstance(val, list):
if isinstance(val, slice):
val = n_dim_slices[key][val]
elif isinstance(val, list):
pass
elif isinstance(val, np.ndarray):
val = val.flatten().tolist()
......@@ -198,22 +257,39 @@ class PycroDataset(h5py.Dataset):
else:
raise TypeError('The slices must be array-likes or slice objects.')
idim = self.n_dim_labels().index(key)
n_dim_slices[key] = val
n_dim_slices[idim] = val
n_dim_slices_sizes[key] = len(val)
# Now that the slices are built, we just need to apply them to the data
# This method is slow and memory intensive but shouldn't fail if multiple lists are given.
# TODO: More elegant slicing method for PycroDataset objects
data_slice = self.__n_dim_data[n_dim_slices[0]]
for pos_ind, pos_lab in enumerate(self.__pos_dim_labels):
n_dim_slices[pos_lab] = np.isin(self.h5_pos_inds[:, pos_ind], n_dim_slices[pos_lab])
if pos_ind == 0:
pos_slice = n_dim_slices[pos_lab]
else:
pos_slice = np.logical_and(pos_slice, n_dim_slices[pos_lab])
for idim, this_slice in enumerate(n_dim_slices[1:]):
idim += 1
print(idim, this_slice)
base_slice = [slice(None) for _ in self.__n_dim_labs]
pos_slice = np.argwhere(pos_slice)
for spec_ind, spec_lab in enumerate(self.__spec_dim_labels):
n_dim_slices[spec_lab] = np.isin(self.h5_spec_inds[spec_ind], n_dim_slices[spec_lab])
if spec_ind == 0:
spec_slice = n_dim_slices[spec_lab]
else:
spec_slice = np.logical_and(spec_slice, n_dim_slices[spec_lab])
spec_slice = np.argwhere(spec_slice)
if len(np.argwhere(pos_slice)) <= len(np.argwhere(spec_slice)):
# Fewer final positions that spectra (Most common case)
data_slice = self[pos_slice, :][:, spec_slice]
else:
data_slice = self[spec_slice, :][:, pos_slice]
base_slice[idim] = this_slice
print(base_slice)
data_slice = data_slice[base_slice]
data_slice, success = reshape_to_Ndims(data_slice,
h5_pos=self.h5_pos_inds[pos_slice, :],
h5_spec=self.h5_spec_inds[:, spec_slice])
return data_slice
return data_slice, success
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment