Commit 009036cf authored by CompPhysChris's avatar CompPhysChris Committed by GitHub
Browse files

Merge pull request #66 from pycroscopy/cnms_dev

Cnms dev
parents 7274f1dd d7f4944b
......@@ -11,7 +11,7 @@ import numpy as np
from .microdata import MicroDataset
__all__ = ['getDataSet', 'getH5DsetRefs', 'getH5RegRefIndices', 'get_dimensionality', 'get_sort_order',
'getAuxData', 'getDataAttr', 'getH5GroupRef', 'checkIfMain', 'checkAndLinkAncillary',
'getAuxData', 'getDataAttr', 'getH5GroupRefs', 'checkIfMain', 'checkAndLinkAncillary',
'createRefFromIndices', 'copyAttributes', 'reshape_to_Ndims', 'linkRefs', 'linkRefAsAlias',
'findH5group', 'get_formatted_labels', 'reshape_from_Ndims']
......@@ -139,7 +139,7 @@ def getH5DsetRefs(ds_names, h5_refs):
return aux_dset
def getH5GroupRef(group_name, h5_refs):
def getH5GroupRefs(group_name, h5_refs):
Given a list of H5 references and a group name,
this method returns H5 Datagroup object corresponding to the names.
......@@ -157,11 +157,11 @@ def getH5GroupRef(group_name, h5_refs):
h5_grp : HDF5 Object Reference
reference to group that matches the `group_name`
for dset in h5_refs:
# assuming that this name will show up only once in the list
return dset
return None
group_list = list()
for item in h5_refs:
return group_list
def findH5group(h5_main, tool_name):
......@@ -754,7 +754,7 @@ def create_empty_dataset(source_dset, dtype, dset_name, new_attrs=dict(), skip_r
# Check if the dataset already exists
h5_new_dset = h5_group[dset_name]
# Make sure it has the correct shape and dtype
if any((source_dset.shape!=h5_new_dset.shape,source_dset.dtype!=h5_new_dset.dtype)):
if any((source_dset.shape != h5_new_dset.shape, source_dset.dtype != h5_new_dset.dtype)):
del h5_new_dset, h5_group[dset_name]
h5_new_dset = h5_group.create_dataset(dset_name, shape=source_dset.shape, dtype=dtype,
compression=source_dset.compression, chunks=source_dset.chunks)
......@@ -117,34 +117,40 @@ class ioHDF5(object):
self.file = h5py.File(self.path, mode = 'r+')
def close(self):
'''Close h5.file'''
Close h5.file
def delete(self):
''' Delete h5.file'''
Delete h5.file
def flush(self):
'''Flush data from memory and commit to file.
Use this after manually inserting data into the hdf dataset'''
Flush data from memory and commit to file.
Use this after manually inserting data into the hdf dataset
def writeData(self, data, print_log=False):
Writes data into the hdf5 file and assigns data attributes such as region references.
The tree structure is inferred from the AFMData Object.
data : Instance of MicroData
Tree structure describing the organization of the data
refList : List of HDF5dataset or HDF5Datagroup references
References to the objects written
f = self.file
......@@ -158,9 +164,10 @@ class ioHDF5(object):
# Figuring out if the first item in AFMData tree is file or group
if is '' and data.parent is '/':
# For file we just write the attributes
for key in data.attrs.iterkeys():
f.attrs[key] = data.attrs[key]
if print_log: print('Wrote attributes of file {} \n'.format(
for key, val in data.attrs.iteritems():
f.attrs[key] = val
if print_log:
print('Wrote attributes of file {} \n'.format(
root =
# For a group we write it and its attributes
......@@ -170,7 +177,7 @@ class ioHDF5(object):
ensure that the chosen index is new.
previous = np.where([ in key for key in f[data.parent].keys()])[0]
if len(previous)==0:
if len(previous) == 0:
index = 0
# assuming that the last element of previous contains the highest index
......@@ -187,10 +194,10 @@ class ioHDF5(object):
for key in data.attrs.iterkeys():
if data.attrs[key] is None:
for key, val in data.attrs.iteritems():
if val is None:
g.attrs[key] = data.attrs[key]
g.attrs[key] = val
if print_log: print('Wrote attributes to group: {} \n'.format(
root =
......@@ -218,8 +225,10 @@ class ioHDF5(object):
for key in child.attrs.iterkeys():
itm.attrs[key] = child.attrs[key]
for key, val in child.attrs.iteritems():
if val is None:
itm.attrs[key] = val
if print_log: print('Wrote attributes to group {}\n'.format(
# here we do the recursive function call
for ch in child.children:
......@@ -323,25 +332,27 @@ class ioHDF5(object):
def write_region_references(dataset, slices, print_log=False):
Creates attributes of a h5.Dataset that refer to regions in the arrays
dataset : h5.Dataset instance
Dataset to which region references will be added as attributes
slices : dictionary
The slicing information must be formatted using tuples of slice objects.
The slicing information must be formatted using tuples of slice objects.
For example {'region_1':(slice(None, None), slice (0,1))}
print_log : Boolean (Optional. Default = False)
Whether or not to print status messages
if print_log: print('Starting to write Region References to Dataset',, 'of shape:', dataset.shape)
for sl in slices.iterkeys():
if print_log: print('About to write region reference:', sl, ':', slices[sl])
if print_log:
print('About to write region reference:', sl, ':', slices[sl])
if len(slices[sl]) == len(dataset.shape):
dataset.attrs[sl] = dataset.regionref[slices[sl]]
if print_log: print('Wrote Region Reference:%s' % sl)
if print_log:
print('Wrote Region Reference:%s' % sl)
warn('Region reference %s could not be written since the object size was not equal to the dimensions of'
' the dataset' % sl)
......@@ -62,13 +62,13 @@ def read_dm3(image_path, get_parms=True):
image_parms = dmtag['ImageList'][img_index]['ImageTags']
if get_parms:
image_parms = _parse_dm3_parms(image_parms)
image_parms = unnest_parm_dicts(image_parms)
image_parms = dict()
return image, image_parms
def _parse_dm3_parms(image_parms, prefix=''):
def unnest_parm_dicts(image_parms, prefix=''):
Parses the nested image parameter dictionary and converts it to a single
level dictionary, prepending the name of inner dictionaries to their
......@@ -87,10 +87,10 @@ def _parse_dm3_parms(image_parms, prefix=''):
# print 'name',name,'val',val
name = '-'.join([prefix]+name.split()).strip('-')
if isinstance(val, dict):
new_parms.update(_parse_dm3_parms(val, name))
new_parms.update(unnest_parm_dicts(val, name))
elif isinstance(val, list) and isinstance(val[0], dict):
for thing in val:
new_parms.update(_parse_dm3_parms(thing, name))
new_parms.update(unnest_parm_dicts(thing, name))
new_parms[name] = try_tag_to_string(val)
......@@ -99,7 +99,7 @@ class MicroDataGroup(MicroData):
for ch in child.children:
__tree(ch, parent+'/'
# print(
for child in self.children:
......@@ -28,8 +28,10 @@ from .igor_ibw import IgorIBWTranslator
from . import image
from .image import ImageTranslator
from .numpy_translator import NumpyTranslator
from . import ndata_translator
from .ndata_translator import NDataTranslator
__all__ = ['Translator', 'BEodfTranslator', 'BEPSndfTranslator', 'BEodfRelaxationTranslator',
'GIVTranslator', 'GLineTranslator', 'GDMTranslator', 'PtychographyTranslator',
'SporcTranslator', 'MovieTranslator', 'IgorIBWTranslator', 'NumpyTranslator',
'OneViewTranslator', 'ImageTranslator']
'OneViewTranslator', 'ImageTranslator', 'NDataTranslator']
Created on Feb 9, 2016
@author: Chris Smith
import os
import json
import zipfile
import numpy as np
from warnings import warn
from skimage.measure import block_reduce
from skimage.util import crop
from .translator import Translator
from .utils import generate_dummy_main_parms, make_position_mat, get_spectral_slicing, \
get_position_slicing, build_ind_val_dsets
from ..hdf_utils import getH5DsetRefs, getH5GroupRefs, calc_chunks, link_as_main
from ..io_hdf5 import ioHDF5
from ..io_image import unnest_parm_dicts
from ..microdata import MicroDataGroup, MicroDataset
class NDataTranslator(Translator):
Translate Pytchography data from a set of images to an HDF5 file
def __init__(self, *args, **kwargs):
super(NDataTranslator, self).__init__(*args, **kwargs)
self.rebin = False
self.bin_factor = (1,1,1,1)
self.hdf = None
self.binning_func = self.__no_bin
self.bin_func = None
self.h5_main = None
self.root_image_list = list()
self.crop_method = 'percent'
self.crop_ammount = None
self.image_list_tag = None
def translate(self, h5_path, image_path, bin_factor=None, bin_func=np.mean, start_image=0, scan_size_x=None,
scan_size_y=None, crop_ammount=None, crop_method='percent'):
Basic method that adds Ptychography data to existing hdf5 thisfile
You must have already done the basic translation with BEodfTranslator
h5_path : str
Absolute path to where the HDF5 file should be located
image_path : str
Absolute path to folder holding the image files
bin_factor : array_like of uint, optional
Downsampling factor for each dimension. Default is None.
bin_func : callable, optional
Function which will be called to calculate the return value
of each block. Function must implement an axis parameter,
i.e. numpy.mean. Ignored if bin_factor is None. Default is
start_image : int, optional
Integer denoting which image in the file path should be considered the starting
point. Default is 0, start with the first image on the list.
scan_size_x : int, optional
Number of Ronchigrams in the x direction. Default is None, value will be determined
from the number of images and `scan_size_y` if it is given.
scan_size_y : int, optional
Number of Ronchigrams in the y direction. Default is None, value will be determined
from the number of images and `scan_size_x` if it is given.
crop_ammount : uint or list of uints, optional
How much should be cropped from the original image. Can be a single unsigned
integer or a list of unsigned integers. A single integer will crop the same
ammount from all edges. A list of two integers will crop the x-dimension by
the first integer and the y-dimension by the second integer. A list of 4
integers will crop the image as [left, right, top, bottom].
crop_method : str, optional
Which cropping method should be used. How much of the image is removed is
determined by the value of `crop_ammount`.
'percent' - A percentage of the image is removed.
'absolute' - The specific number of pixel is removed.
h5_main : h5py.Dataset
HDF5 Dataset object that contains the flattened images
# Open the hdf5 file and delete any contents
hdf = ioHDF5(h5_path)
self.hdf = hdf
self.crop_method = crop_method
self.crop_ammount = crop_ammount
Get the list of all files with the .tif extension and
the number of files in the list
file_list = self._parse_file_path(image_path)
Get zipfile handles for all the ndata1 files that were found in the image_path
ziplist = [zipfile.ZipFile(zip_path, 'r') for zip_path in file_list]
image_parm_list = self._getimageparms(file_list)
Check if a bin_factor is given. Set up binning objects if it is.
if bin_factor is not None:
self.rebin = True
if isinstance(bin_factor, int):
self.bin_factor = (1, 1, bin_factor, bin_factor)
elif len(bin_factor) == 2:
self.bin_factor = (1, 1) + bin_factor
raise ValueError('Input parameter `bin_factor` must be a length 2 array_like or an integer.\n' +
'{} was given.'.format(bin_factor))
self.binning_func = block_reduce
self.bin_func = bin_func
h5_channels = self._setupH5(image_parm_list)
self._read_data(ziplist, h5_channels)
def _create_root_image(self, image_path):
Create the Groups and Datasets for a single root image
image_path : str
Path to the image file
image, image_parms = read_dm3(image_path)
if image.ndim == 3:
image = np.sum(image, axis=0)
Create the Measurement and Channel Groups to hold the
image Datasets
root_grp = MicroDataGroup('/')
meas_grp = MicroDataGroup('Measurement_')
chan_grp = MicroDataGroup('Channel_')
Set the Measurement Group attributes
usize, vsize = image.shape
meas_grp.attrs['image_size_u'] = usize
meas_grp.attrs['image_size_v'] = vsize
meas_grp.attrs['translator'] = 'OneView'
meas_grp.attrs['num_pixels'] = image.size
ds_rawimage = MicroDataset('Raw_Data', np.reshape(image, (-1, 1)))
Build Spectroscopic and Position datasets for the image
pos_mat = make_position_mat(image.shape)
spec_mat = np.array([[0]], dtype=np.uint8)
ds_spec_inds = MicroDataset('Spectroscopic_Indices', spec_mat)
ds_spec_vals = MicroDataset('Spectroscopic_Values', spec_mat, dtype=np.float32)
spec_lab = get_spectral_slicing(['Image'])
ds_spec_inds.attrs['labels'] = spec_lab
ds_spec_inds.attrs['units'] = ''
ds_spec_vals.attrs['labels'] = spec_lab
ds_spec_vals.attrs['units'] = ''
ds_pos_inds = MicroDataset('Position_Indices', pos_mat)
ds_pos_vals = MicroDataset('Position_Values', pos_mat, dtype=np.float32)
pos_lab = get_position_slicing(['X', 'Y'])
ds_pos_inds.attrs['labels'] = pos_lab
ds_pos_inds.attrs['units'] = ['pixel', 'pixel']
ds_pos_vals.attrs['labels'] = pos_lab
ds_pos_vals.attrs['units'] = ['pixel', 'pixel']
chan_grp.addChildren([ds_rawimage, ds_spec_inds, ds_spec_vals,
ds_pos_inds, ds_pos_vals])
Write the data to file and get the handle for the image dataset
image_refs = self.hdf.writeData(root_grp)
h5_image = getH5DsetRefs(['Raw_Data'], image_refs)[0]
Link references to raw
aux_ds_names = ['Position_Indices', 'Position_Values', 'Spectroscopic_Indices', 'Spectroscopic_Values']
link_as_main(h5_image, *getH5DsetRefs(aux_ds_names, image_refs))
def _read_data(self, zip_list, h5_channels):
Iterates over the images in `file_list`, reading each image and downsampling if
reqeusted, and writes the flattened image to file. Also builds the Mean_Ronchigram
and the Spectroscopic_Mean datasets at the same time.
zip_list : list of str
List of all files in `image_path` that will be read
h5_main : h5py.Dataset
Dataset which will hold the Ronchigrams
h5_mean_spec : h5py.Dataset
Dataset which will hold the Spectroscopic Mean
h5_ronch : h5py.Dataset
Dataset which will hold the Mean Ronchigram
image_path : str
Absolute file path to the directory which hold the images
h5_main_list = list()
For each file, we must read the data then create the neccessary datasets, add them to the channel, and
write it all to file
for ifile, (this_file, this_channel) in enumerate(zip(zip_list, h5_channels)):
Extract the data file from the zip archive and read it into an array
tmp_path = this_file.extract('data.npy')
this_data = np.load(tmp_path)
Find the shape of the data, then calculate the final dimensions based on the crop and
downsampling parameters
while this_data.ndim < 4:
this_data = np.expand_dims(this_data, 0)
this_data = self.crop_ronc(this_data)
scan_size_x, scan_size_y, usize, vsize = this_data.shape
usize = int(round(1.0*usize / self.bin_factor[-2]))
vsize = int(round(1.0*vsize / self.bin_factor[-1]))
num_images = scan_size_x*scan_size_y
num_pixels = usize*vsize
Write these attributes to the Measurement group
new_attrs = {'image_size_u': usize,
'image_size_v': vsize,
'scan_size_x': scan_size_x,
'scan_size_y': scan_size_y}
# Get the Position and Spectroscopic Datasets
ds_spec_ind, ds_spec_vals = build_ind_val_dsets((usize, vsize), is_spectral=True,
labels=['U', 'V'], units=['pixel', 'pixel'])
ds_pos_ind, ds_pos_val = build_ind_val_dsets([scan_size_x, scan_size_y], is_spectral=False,
labels=['X', 'Y'], units=['pixel', 'pixel'])
ds_chunking = calc_chunks([num_images, num_pixels],
unit_chunks=(1, num_pixels))
# Allocate space for Main_Data and Pixel averaged Data
ds_main_data = MicroDataset('Raw_Data', data=[], maxshape=(num_images, num_pixels),
chunking=ds_chunking, dtype=np.float32, compression='gzip')
ds_mean_ronch_data = MicroDataset('Mean_Ronchigram',
data=np.zeros(num_pixels, dtype=np.float32),
ds_mean_spec_data = MicroDataset('Spectroscopic_Mean',
data=np.zeros(num_images, dtype=np.float32),
# Add datasets as children of Measurement_000 data group
ds_channel = MicroDataGroup(
ds_channel.addChildren([ds_main_data, ds_spec_ind, ds_spec_vals, ds_pos_ind,
ds_pos_val, ds_mean_ronch_data, ds_mean_spec_data])
h5_refs = self.hdf.writeData(ds_channel)
h5_main = getH5DsetRefs(['Raw_Data'], h5_refs)[0]
h5_ronch = getH5DsetRefs(['Mean_Ronchigram'], h5_refs)[0]
h5_mean_spec = getH5DsetRefs(['Spectroscopic_Mean'], h5_refs)[0]
aux_ds_names = ['Position_Indices',
link_as_main(h5_main, *getH5DsetRefs(aux_ds_names, h5_refs))
mean_ronch = np.zeros(h5_ronch.shape, dtype=np.float32)
this_data = self.binning_func(this_data, self.bin_factor, self.bin_func).reshape(h5_main.shape)
h5_main[:, :] = this_data
h5_mean_spec[:] = np.mean(this_data, axis=1)
h5_ronch[:] = np.mean(this_data, axis=0)
def crop_ronc(self, ronc):
Crop the input Ronchigram by the specified ammount using the specified method.
ronc : numpy.array
Input image to be cropped.
cropped_ronc : numpy.array
Cropped image
if self.crop_ammount is None:
return ronc
crop_ammount = self.crop_ammount
crop_method = self.crop_method
if crop_method == 'percent':
crop_ammount = np.round(np.atleast_2d(crop_ammount)/100.0*ronc.shape)
crop_ammount = tuple([tuple(row) for row in crop_ammount.astype(np.uint32)])
elif crop_method == 'absolute':
if isinstance(crop_ammount, int):
crop_ammount = ((crop_ammount,), (crop_ammount,))
elif len(crop_ammount) == 2:
crop_ammount = ((crop_ammount[0],), (crop_ammount[1],))
elif len(crop_ammount) == 4:
crop_ammount = ((crop_ammount[0], crop_ammount[1]), (crop_ammount[2], crop_ammount[3]))
raise ValueError('The crop_ammount should be an integer or list of 2 or 4 integers.')
raise ValueError('Allowed values of crop_method are percent and absolute.')
crop_ammount = ((0,), (0,))+crop_ammount
cropped_ronc = crop(ronc, crop_ammount)
if any([dim == 0 for dim in cropped_ronc.shape]):
warn("Requested crop ammount is greater than the image size. No cropping will be done.")
return ronc
return cropped_ronc