ndata.py 18.3 KB
Newer Older
Chris Smith's avatar
Chris Smith committed
1
2
3
4
5
6
"""
Created on Feb 9, 2016

@author: Chris Smith
"""

7
from __future__ import division, print_function, absolute_import, unicode_literals
8

Chris Smith's avatar
Chris Smith committed
9
import json
10
import os
Chris Smith's avatar
Chris Smith committed
11
12
import zipfile
from warnings import warn
13

14
import h5py
15
import numpy as np
Chris Smith's avatar
Chris Smith committed
16
17
from skimage.measure import block_reduce
from skimage.util import crop
18

19
20
21
22
23
24
from sidpy.sid import Translator
from sidpy.hdf.hdf_utils import write_simple_attrs

from pyUSID.io.write_utils import Dimension, calc_chunks
from pyUSID.io.hdf_utils import write_main_dataset, create_indexed_group

Somnath, Suhas's avatar
Somnath, Suhas committed
25
26
from .df_utils.image_utils import unnest_parm_dicts
from .df_utils.dm_utils import read_dm3
Chris Smith's avatar
Chris Smith committed
27
28
29
30
31
32
33
34
35
36
37


class NDataTranslator(Translator):
    """
    Translate Pytchography data from a set of images to an HDF5 file
    """

    def __init__(self, *args, **kwargs):
        super(NDataTranslator, self).__init__(*args, **kwargs)

        self.rebin = False
Unknown's avatar
Unknown committed
38
        self.bin_factor = (1, 1, 1, 1)
39
        self.h5_f = None
Chris Smith's avatar
Chris Smith committed
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
        self.binning_func = self.__no_bin
        self.bin_func = None
        self.h5_main = None
        self.root_image_list = list()
        self.crop_method = 'percent'
        self.crop_ammount = None
        self.image_list_tag = None

    def translate(self, h5_path, image_path, bin_factor=None, bin_func=np.mean, start_image=0, scan_size_x=None,
                  scan_size_y=None, crop_ammount=None, crop_method='percent'):
        """
        Basic method that adds Ptychography data to existing hdf5 thisfile
        You must have already done the basic translation with BEodfTranslator

        Parameters
        ----------------
        h5_path : str
            Absolute path to where the HDF5 file should be located
        image_path : str
Unknown's avatar
Unknown committed
59
            Absolute path to folder holding the image files or the path to a specific file
Chris Smith's avatar
Chris Smith committed
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
        bin_factor : array_like of uint, optional
            Downsampling factor for each dimension.  Default is None.
        bin_func : callable, optional
            Function which will be called to calculate the return value
            of each block.  Function must implement an axis parameter,
            i.e. numpy.mean.  Ignored if bin_factor is None.  Default is
            numpy.mean.
        start_image : int, optional
            Integer denoting which image in the file path should be considered the starting
            point.  Default is 0, start with the first image on the list.
        scan_size_x : int, optional
            Number of Ronchigrams in the x direction.  Default is None, value will be determined
            from the number of images and `scan_size_y` if it is given.
        scan_size_y : int, optional
            Number of Ronchigrams in the y direction.  Default is None, value will be determined
            from the number of images and `scan_size_x` if it is given.
        crop_ammount : uint or list of uints, optional
            How much should be cropped from the original image.  Can be a single unsigned
            integer or a list of unsigned integers.  A single integer will crop the same
            ammount from all edges.  A list of two integers will crop the x-dimension by
            the first integer and the y-dimension by the second integer.  A list of 4
            integers will crop the image as [left, right, top, bottom].
        crop_method : str, optional
            Which cropping method should be used.  How much of the image is removed is
            determined by the value of `crop_ammount`.
            'percent' - A percentage of the image is removed.
            'absolute' - The specific number of pixel is removed.
        Returns
        ----------
        h5_main : h5py.Dataset
            HDF5 Dataset object that contains the flattened images

        """
        # Open the hdf5 file and delete any contents
94
95
        if os.path.exists(h5_path):
            os.remove(h5_path)
Chris Smith's avatar
Chris Smith committed
96

97
98
99
        h5_f = h5py.File(h5_path, 'w')

        self.h5_f = h5_f
Chris Smith's avatar
Chris Smith committed
100
101
102
103
104
105
106
        self.crop_method = crop_method
        self.crop_ammount = crop_ammount

        '''
        Get the list of all files with the .tif extension and
        the number of files in the list
        '''
107
        image_path = os.path.abspath(image_path)
Chris Smith's avatar
Chris Smith committed
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
        file_list = self._parse_file_path(image_path)

        image_parm_list = self._getimageparms(file_list)

        '''
        Check if a bin_factor is given.  Set up binning objects if it is.
        '''
        if bin_factor is not None:
            self.rebin = True
            if isinstance(bin_factor, int):
                self.bin_factor = (1, 1, bin_factor, bin_factor)
            elif len(bin_factor) == 2:
                self.bin_factor = (1, 1) + bin_factor
            else:
                raise ValueError('Input parameter `bin_factor` must be a length 2 array_like or an integer.\n' +
                                 '{} was given.'.format(bin_factor))

            self.binning_func = block_reduce
            self.bin_func = bin_func

        h5_channels = self._setupH5(image_parm_list)

Unknown's avatar
Unknown committed
130
        self._read_data(file_list, h5_channels)
Chris Smith's avatar
Chris Smith committed
131

132
        self.h5_f.close()
Chris Smith's avatar
Chris Smith committed
133
134
135

        return

136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
    # def _create_root_image(self, image_path):
    #     """
    #     Create the Groups and Datasets for a single root image
    #
    #     Parameters
    #     ----------
    #     image_path : str
    #         Path to the image file
    #
    #     Returns
    #     -------
    #     None
    #     """
    #     image, image_parms = read_dm3(image_path)
    #     if image.ndim == 3:
    #         image = np.sum(image, axis=0)
    #
    #     '''
    #     Create the Measurement and Channel Groups to hold the
    #     image Datasets
    #     '''
    #     root_grp = VirtualGroup('/')
    #
    #     meas_grp = VirtualGroup('Measurement_')
    #
    #     chan_grp = VirtualGroup('Channel_')
    #     root_grp.add_children([meas_grp])
    #     meas_grp.add_children([chan_grp])
    #
    #     '''
    #     Set the Measurement Group attributes
    #     '''
    #     meas_grp.attrs.update(image_parms)
    #     usize, vsize = image.shape
    #     meas_grp.attrs['image_size_u'] = usize
    #     meas_grp.attrs['image_size_v'] = vsize
    #     meas_grp.attrs['translator'] = 'OneView'
    #     meas_grp.attrs['num_pixels'] = image.size
    #
    #     ds_raw_image = VirtualDataset('Raw_Data', np.reshape(image, (-1, 1)))
    #
    #     '''
    #     Build Spectroscopic and Position datasets for the image
    #     '''
    #     spec_desc = Dimension('Intensity', 'a.u.', [1])
    #     ds_spec_inds, ds_spec_vals = build_ind_val_dsets(spec_desc, is_spectral=True)
    #
    #     pos_dims = [Dimension('X', 'a.u.', np.arange(image.shape[0])),
    #                 Dimension('Y', 'a.u.', np.arange(image.shape[1]))]
    #     ds_pos_inds, ds_pos_vals = build_ind_val_dsets(pos_dims, is_spectral=False)
    #
    #     chan_grp.add_children([ds_raw_image, ds_spec_inds, ds_spec_vals,
    #                            ds_pos_inds, ds_pos_vals])
    #
    #     '''
    #     Write the data to file and get the handle for the image dataset
    #     '''
    #     image_refs = self.h5_f.write(root_grp)
    #
    #     h5_image = get_h5_obj_refs(['Raw_Data'], image_refs)[0]
    #
    #     '''
    #     Link references to raw
    #     '''
    #     aux_ds_names = ['Position_Indices', 'Position_Values', 'Spectroscopic_Indices', 'Spectroscopic_Values']
    #     link_as_main(h5_image, *get_h5_obj_refs(aux_ds_names, image_refs))
    #
    #     self.root_image_list.append(h5_image)
Chris Smith's avatar
Chris Smith committed
204

Unknown's avatar
Unknown committed
205
    def _read_data(self, file_list, h5_channels):
Chris Smith's avatar
Chris Smith committed
206
207
208
209
210
211
212
        """
        Iterates over the images in `file_list`, reading each image and downsampling if
        reqeusted, and writes the flattened image to file.  Also builds the Mean_Ronchigram
        and the Spectroscopic_Mean datasets at the same time.

        Parameters
        ----------
Unknown's avatar
Unknown committed
213
        file_list : list of str
Chris Smith's avatar
Chris Smith committed
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
            List of all files in `image_path` that will be read
        h5_main : h5py.Dataset
            Dataset which will hold the Ronchigrams
        h5_mean_spec : h5py.Dataset
            Dataset which will hold the Spectroscopic Mean
        h5_ronch : h5py.Dataset
            Dataset which will hold the Mean Ronchigram
        image_path : str
            Absolute file path to the directory which hold the images

        Returns
        -------
        None
        """
        h5_main_list = list()
        '''
        For each file, we must read the data then create the neccessary datasets, add them to the channel, and
        write it all to file
        '''
Unknown's avatar
Unknown committed
233
234
235
236
237
238
239

        '''
        Get zipfile handles for all the ndata1 files that were found in the image_path
        '''

        for ifile, (this_file, this_channel) in enumerate(zip(file_list, h5_channels)):
            _, ext = os.path.splitext(this_file)
Chris Smith's avatar
Chris Smith committed
240
            if ext in ['.ndata1', '.ndata']:
Unknown's avatar
Unknown committed
241
242
243
244
245
246
247
248
249
250
                '''
                Extract the data file from the zip archive and read it into an array
                '''
                this_zip = zipfile.ZipFile(this_file, 'r')
                tmp_path = this_zip.extract('data.npy')
                this_data = np.load(tmp_path)
                os.remove(tmp_path)
            elif ext == '.npy':
                # Read data directly from npy file
                this_data = np.load(this_file)
Chris Smith's avatar
Chris Smith committed
251
252
253
254
255
256
257
258
259
260
261

            '''
            Find the shape of the data, then calculate the final dimensions based on the crop and
            downsampling parameters
            '''
            while this_data.ndim < 4:
                this_data = np.expand_dims(this_data, 0)

            this_data = self.crop_ronc(this_data)
            scan_size_x, scan_size_y, usize, vsize = this_data.shape

Unknown's avatar
Unknown committed
262
263
            usize = int(round(1.0 * usize / self.bin_factor[-2]))
            vsize = int(round(1.0 * vsize / self.bin_factor[-1]))
Chris Smith's avatar
Chris Smith committed
264

Unknown's avatar
Unknown committed
265
266
            num_images = scan_size_x * scan_size_y
            num_pixels = usize * vsize
Chris Smith's avatar
Chris Smith committed
267

268
269
270
271
272
273
274
            '''
            Write these attributes to the Measurement group
            '''
            new_attrs = {'image_size_u': usize,
                         'image_size_v': vsize,
                         'scan_size_x': scan_size_x,
                         'scan_size_y': scan_size_y}
275
276
277

            write_simple_attrs(this_channel.parent, new_attrs)

278

Chris Smith's avatar
Chris Smith committed
279
            # Get the Position and Spectroscopic Datasets
280
281
282
            spec_desc = [Dimension('U', 'pixel', np.arange(usize)), Dimension('V', 'pixel', np.arange(vsize))]
            pos_desc = [Dimension('X', 'pixel', np.arange(scan_size_x)),
                        Dimension('Y', 'pixel', np.arange(scan_size_y))]
Chris Smith's avatar
Chris Smith committed
283
284
285
286
287

            ds_chunking = calc_chunks([num_images, num_pixels],
                                      np.float32(0).itemsize,
                                      unit_chunks=(1, num_pixels))

288
289
290
291
292
            # Allocate space for Main_Data and Pixel averaged DataX
            h5_main = write_main_dataset(this_channel, (num_images, num_pixels), 'Raw_Data',
                                         'Intensity', 'a.u.',
                                         pos_desc, spec_desc,
                                         chunks=ds_chunking, dtype=np.float32)
Chris Smith's avatar
Chris Smith committed
293

294
295
            h5_ronch = this_channel.create_dataset('Mean_Ronchigram',
                                                   data=np.zeros(num_pixels, dtype=np.float32))
Chris Smith's avatar
Chris Smith committed
296

Unknown's avatar
Unknown committed
297
            h5_mean_spec = this_channel.create_dataset('Mean_Spectrogram',
298
                                                       data=np.zeros(num_images, dtype=np.float32))
Chris Smith's avatar
Chris Smith committed
299
300
301
302
303
304
305
306
307

            this_data = self.binning_func(this_data, self.bin_factor, self.bin_func).reshape(h5_main.shape)

            h5_main[:, :] = this_data

            h5_mean_spec[:] = np.mean(this_data, axis=1)

            h5_ronch[:] = np.mean(this_data, axis=0)

308
            self.h5_f.flush()
Chris Smith's avatar
Chris Smith committed
309
310
311

            h5_main_list.append(h5_main)

312
        self.h5_f.flush()
Chris Smith's avatar
Chris Smith committed
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335

    def crop_ronc(self, ronc):
        """
        Crop the input Ronchigram by the specified ammount using the specified method.

        Parameters
        ----------
        ronc : numpy.array
            Input image to be cropped.

        Returns
        -------
        cropped_ronc : numpy.array
            Cropped image
        """

        if self.crop_ammount is None:
            return ronc

        crop_ammount = self.crop_ammount
        crop_method = self.crop_method

        if crop_method == 'percent':
Unknown's avatar
Unknown committed
336
            crop_ammount = np.round(np.atleast_2d(crop_ammount) / 100.0 * ronc.shape)
Chris Smith's avatar
Chris Smith committed
337
338
339
340
341
342
343
344
345
346
347
348
349
            crop_ammount = tuple([tuple(row) for row in crop_ammount.astype(np.uint32)])
        elif crop_method == 'absolute':
            if isinstance(crop_ammount, int):
                crop_ammount = ((crop_ammount,), (crop_ammount,))
            elif len(crop_ammount) == 2:
                crop_ammount = ((crop_ammount[0],), (crop_ammount[1],))
            elif len(crop_ammount) == 4:
                crop_ammount = ((crop_ammount[0], crop_ammount[1]), (crop_ammount[2], crop_ammount[3]))
            else:
                raise ValueError('The crop_ammount should be an integer or list of 2 or 4 integers.')
        else:
            raise ValueError('Allowed values of crop_method are percent and absolute.')

Unknown's avatar
Unknown committed
350
        crop_ammount = ((0,), (0,)) + crop_ammount
Chris Smith's avatar
Chris Smith committed
351
352
353
354
355
356
357
358

        cropped_ronc = crop(ronc, crop_ammount)

        if any([dim == 0 for dim in cropped_ronc.shape]):
            warn("Requested crop ammount is greater than the image size.  No cropping will be done.")
            return ronc
        return cropped_ronc

Unknown's avatar
Unknown committed
359
360
    @staticmethod
    def downSampRoncVec(ronch_vec, binning_factor):
Chris Smith's avatar
Chris Smith committed
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
        """
        Downsample the image by taking the mean over nearby values

        Parameters
        ----------
        ronch_vec : ndarray
            Image data
        binning_factor : int
            factor to reduce the size of the image by

        Returns
        -------
        ronc_mat3_mean : ndarray
            Flattened downsampled image
        """
        ccd_pix = int(np.sqrt(ronch_vec.size))
        ronc_mat = ronch_vec.reshape(ccd_pix, ccd_pix)
        ronc_mat2 = ronc_mat.reshape(ccd_pix, ccd_pix / binning_factor, binning_factor)
        ronc_mat2_mean = ronc_mat2.mean(2)  # take the mean along the 3rd dimension
        ronc_mat3 = ronc_mat2_mean.reshape(ccd_pix / binning_factor, binning_factor, -1)
        ronc_mat3_mean = ronc_mat3.mean(1)

        return ronc_mat3_mean.reshape(-1)

    @staticmethod
Unknown's avatar
Unknown committed
386
    def _parse_file_path(image_path):
Chris Smith's avatar
Chris Smith committed
387
388
389
390
391
392
393
394
395
396
397
398
399
        """
        Returns a list of all files in the directory given by path

        Parameters
        ---------------
        path : string / unicode
            absolute path to directory containing files

        Returns
        ----------
        file_list : list of strings
            names of all files in directory located at path
        """
Chris Smith's avatar
Chris Smith committed
400
        allowed_image_types = ['.ndata1', '.npy', '.ndata']
Unknown's avatar
Unknown committed
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
        if os.path.isdir(image_path):
            # Image path is to a directory
            file_list = list()
            for root, dirs, files in os.walk(image_path):
                for thisfile in files:
                    _, ext = os.path.splitext(thisfile)
                    if ext not in allowed_image_types:
                        continue
                    else:
                        file_list.append(os.path.join(root, thisfile))
        else:
            # Image path is a file
            _, ext = os.path.splitext(image_path)
            if ext in allowed_image_types:
                file_list = [image_path]

Chris Smith's avatar
Chris Smith committed
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
        return file_list

    @staticmethod
    def _getimageparms(file_list):
        """
        Returns the image parameters for each file in the `file_list`

        Parameters
        ------------
        file_list : list of zipfile.ZipFile
            List of zipfile objects

        Returns
        -----------
        parm_list : list of dict
            List of image parameters from the files in `file_list`
        """
        parm_list = list()

Unknown's avatar
Unknown committed
436
437
        for fpath in file_list:
            base, ext = os.path.splitext(fpath)
Chris Smith's avatar
Chris Smith committed
438
            if ext in ['.ndata1', '.ndata']:
Unknown's avatar
Unknown committed
439
440
441
442
443
444
445
446
447
448
                zfile = zipfile.ZipFile(fpath, 'r')
                tmp_path = zfile.extract('metadata.json')
            elif ext == '.npy':
                folder, basename = os.path.split(base)
                same_name_path = base+'.json'
                metapath = os.path.join(folder, 'metadata.json')
                if os.path.exists(same_name_path):
                    tmp_path = same_name_path
                elif os.path.exists(metapath):
                    tmp_path = metapath
Chris Smith's avatar
Chris Smith committed
449
450
451
452
453
            metafile = open(tmp_path, 'r')
            metastring = metafile.read()
            parm_list.append(unnest_parm_dicts(json.loads(metastring)))
            metafile.close()

Unknown's avatar
Unknown committed
454
455
            if ext == '.ndata1':
                os.remove(tmp_path)
Chris Smith's avatar
Chris Smith committed
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479

        return parm_list

    def _setupH5(self, image_parms):
        """
        Setup the HDF5 file in which to store the data
        Due to the structure of the ndata format, we can only create the Measurement and Channel groups here

        Parameters
        ----------
        image_parms : dict
            Dictionary of parameters

        Returns
        -------
        h5_main : h5py.Dataset
            HDF5 Dataset that the images will be written into
        h5_mean_spec : h5py.Dataset
            HDF5 Dataset that the mean over all positions will be written
            into
        h5_ronch : h5py.Dataset
            HDF5 Dateset that the mean over all Spectroscopic steps will be
            written into
        """
480
        root_parms = dict()
Chris Smith's avatar
Chris Smith committed
481
482
483
        root_parms['data_type'] = 'PtychographyData'

        # Create the hdf5 data Group
484
        write_simple_attrs(self.h5_f, root_parms)
Chris Smith's avatar
Chris Smith committed
485

486
        h5_channels = list()
Chris Smith's avatar
Chris Smith committed
487
488
        for meas_parms in image_parms:
            # Create new measurement group for each set of parameters
489
            meas_grp = create_indexed_group(self.h5_f, 'Measurement')
Chris Smith's avatar
Chris Smith committed
490
            # Write the parameters as attributes of the group
491
            write_simple_attrs(meas_grp, meas_parms)
Chris Smith's avatar
Chris Smith committed
492

493
            chan_grp = create_indexed_group(meas_grp, 'Channel')
Chris Smith's avatar
Chris Smith committed
494

495
            h5_channels.append(chan_grp)
Chris Smith's avatar
Chris Smith committed
496

497
        self.h5_f.flush()
Chris Smith's avatar
Chris Smith committed
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521

        return h5_channels

    @staticmethod
    def __no_bin(image, *args, **kwargs):
        """
        Does absolutely nothing to the image.  Exists so that we can have
        a bin function to call whether we actually rebin the image or not.

        Parameters
        ----------
        image : ndarray
            Image
        args:
            Argument list
        kwargs:
            Keyword argument list

        Returns
        -------
        image : ndarray
            The input image
        """
        return image