This GitLab instance is undergoing maintenance and is operating in read-only mode.

You are on a read-only GitLab instance.
signal_filter.py 14 KB
Newer Older
1
2
3
4
5
6
7
8
9
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 07 11:48:53 2017

@author: Suhas Somnath

"""

from __future__ import division, print_function, absolute_import, unicode_literals
10
import h5py
11
12
import numpy as np
from collections import Iterable
13
14
from pyUSID.processing.process import Process, parallel_compute
from pyUSID.io.hdf_utils import create_results_group, write_main_dataset, write_simple_attrs, create_empty_dataset, \
15
    write_ind_val_dsets
16
from pyUSID.io.write_utils import Dimension
17
from .fft import get_noise_floor, are_compatible_filters, build_composite_freq_filter
18
from .gmode_utils import test_filter
19

20
21
22
23
24
25
26
27
try:
    from mpi4py import MPI
    if MPI.COMM_WORLD.Get_size() == 1:
        # mpi4py available but NOT called via mpirun or mpiexec => single node
        MPI = None
except ImportError:
    # mpi4py not even present! Single node by default:
    MPI = None
syz's avatar
syz committed
28
# TODO: correct implementation of num_pix
29
30
31


class SignalFilter(Process):
32
    def __init__(self, h5_main, frequency_filters=None, noise_threshold=None, write_filtered=True,
33
                 write_condensed=False, num_pix=1, phase_rad=0,  **kwargs):
syz's avatar
syz committed
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
        """
        Filters the entire h5 dataset with the given filtering parameters.
        Parameters
        ----------
        h5_main : h5py.Dataset object
            Dataset to process
        frequency_filters : (Optional) single or list of pycroscopy.fft.FrequencyFilter objects
            Frequency (vertical) filters to apply to signal
        noise_threshold : (Optional) float. Default - None
            Noise tolerance to apply to data. Value must be within (0, 1)
        write_filtered : (Optional) bool. Default - True
            Whether or not to write the filtered data to file
        write_condensed : Optional) bool. Default - False
            Whether or not to write the condensed data in frequency space to file. Use this for datasets that are very
            large but sparse in frequency space.
        num_pix : (Optional) uint. Default - 1
            Number of pixels to use for filtering. More pixels means a lower noise floor and the ability to pick up
            weaker signals. Use only if absolutely necessary. This value must be a divisor of the number of pixels in
            the dataset
        phase_rad : (Optional). float
            Degrees by which the output is rotated with respect to the input to compensate for phase lag.
            This feature has NOT yet been implemented.
        kwargs : (Optional). dictionary
            Please see Process class for additional inputs
        """
59

60
        super(SignalFilter, self).__init__(h5_main, **kwargs)
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

        if frequency_filters is None and noise_threshold is None:
            raise ValueError('Need to specify at least some noise thresholding / frequency filter')

        if noise_threshold is not None:
            if noise_threshold >= 1 or noise_threshold <= 0:
                raise ValueError('Noise threshold must be within (0 1)')

        self.composite_filter = 1
        if frequency_filters is not None:
            if not isinstance(frequency_filters, Iterable):
                frequency_filters = [frequency_filters]
            if not are_compatible_filters(frequency_filters):
                raise ValueError('frequency filters must be a single or list of FrequencyFilter objects')
            self.composite_filter = build_composite_freq_filter(frequency_filters)
        else:
            write_condensed = False

        if write_filtered is False and write_condensed is False:
            raise ValueError('You need to write the filtered and/or the condensed dataset to the file')

        num_effective_pix = h5_main.shape[0] * 1.0 / num_pix
        if num_effective_pix % 1 > 0:
            raise ValueError('Number of pixels not divisible by the number of pixels to use for FFT filter')

        self.num_effective_pix = int(num_effective_pix)
        self.phase_rad = phase_rad

        self.noise_threshold = noise_threshold
        self.frequency_filters = frequency_filters

        self.write_filtered = write_filtered
        self.write_condensed = write_condensed

95
96
97
98
99
100
        """
        Remember that the default number of pixels corresponds to only the raw data that can be held in memory
        In the case of signal filtering, the datasets that will occupy space are:
        1. Raw, 2. filtered (real + freq space copies), 3. Condensed (substantially lesser space)
        The actual scaling of memory depends on options:
        """
101
        scaling_factor = 1 + 2 * self.write_filtered + 0.25 * self.write_condensed
102
103
        self._max_pos_per_read = int(self._max_pos_per_read / scaling_factor)

104
        if self.verbose and self.mpi_rank == 0:
syz's avatar
syz committed
105
106
107
108
109
110
111
112
            print('Allowed to read {} pixels per chunk'.format(self._max_pos_per_read))

        self.parms_dict = dict()
        if self.frequency_filters is not None:
            for filter in self.frequency_filters:
                self.parms_dict.update(filter.get_parms())
        if self.noise_threshold is not None:
            self.parms_dict['noise_threshold'] = self.noise_threshold
syz's avatar
syz committed
113
        self.parms_dict['num_pix'] = self.num_effective_pix
syz's avatar
syz committed
114

115
        self.process_name = 'FFT_Filtering'
116
        self.duplicate_h5_groups, self.partial_h5_groups = self._check_for_duplicates()
syz's avatar
syz committed
117

118
119
120
121
122
123
124
125
        self.data = None
        self.filtered_data = None
        self.condensed_data = None
        self.noise_floors = None
        self.h5_filtered = None
        self.h5_condensed = None
        self.h5_noise_floors = None

126
    def test(self, pix_ind=None, excit_wfm=None, **kwargs):
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
        """
        Tests the signal filter on a single pixel (randomly chosen unless manually specified) worth of data.
        Parameters
        ----------
        pix_ind : int, optional. default = random
            Index of the pixel whose data will be used for inference
        excit_wfm : array-like, optional. default = None
            Waveform against which the raw and filtered signals will be plotted. This waveform can be a fraction of the
            length of a single pixel's data. For example, in the case of G-mode, where a single scan line is yet to be
            broken down into pixels, the excitation waveform for a single pixel can br provided to automatically
            break the raw and filtered responses also into chunks of the same size.
        Returns
        -------
        fig, axes
        """
142
143
        if self.mpi_rank > 0:
            return
144
145
146
        if pix_ind is None:
            pix_ind = np.random.randint(0, high=self.h5_main.shape[0])
        return test_filter(self.h5_main[pix_ind], frequency_filters=self.frequency_filters, excit_wfm=excit_wfm,
147
148
                           noise_threshold=self.noise_threshold, plot_title='Pos #' + str(pix_ind), show_plots=True,
                           **kwargs)
149

150
151
    def _create_results_datasets(self):
        """
syz's avatar
syz committed
152
        Creates all the datasets necessary for holding all parameters + data.
153
154
        """

155
        self.h5_results_grp = create_results_group(self.h5_main, self.process_name)
156

syz's avatar
syz committed
157
        self.parms_dict.update({'last_pixel': 0, 'algorithm': 'pycroscopy_SignalFilter'})
158

159
        write_simple_attrs(self.h5_results_grp, self.parms_dict)
160

161
        assert isinstance(self.h5_results_grp, h5py.Group)
162

163
164
165
166
        if isinstance(self.composite_filter, np.ndarray):
            h5_comp_filt = self.h5_results_grp.create_dataset('Composite_Filter',
                                                              data=np.float32(self.composite_filter))

167
168
169
            if self.verbose and self.mpi_rank==0:
                print('Rank {} - Finished creating the Composite_Filter dataset'.format(self.mpi_rank))

170
171
172
173
174
        # First create the position datsets if the new indices are smaller...
        if self.num_effective_pix != self.h5_main.shape[0]:
            # TODO: Do this part correctly. See past solution:
            """
            # need to make new position datasets by taking every n'th index / value:
175
                new_pos_vals = np.atleast_2d(h5_pos_vals[slice(0, None, self.num_effective_pix), :])
176
177
178
179
180
                pos_descriptor = []
                for name, units, leng in zip(h5_pos_inds.attrs['labels'], h5_pos_inds.attrs['units'],
                                             [int(np.unique(h5_pos_inds[:, dim_ind]).size / self.num_effective_pix)
                                              for dim_ind in range(h5_pos_inds.shape[1])]):
                    pos_descriptor.append(Dimension(name, units, np.arange(leng)))
181
                ds_pos_inds, ds_pos_vals = build_ind_val_dsets(pos_descriptor, is_spectral=False, verbose=self.verbose)
182
                h5_pos_vals.data = np.atleast_2d(new_pos_vals)  # The data generated above varies linearly. Override.
183

184
185
186
            """
            h5_pos_inds_new, h5_pos_vals_new = write_ind_val_dsets(self.h5_results_grp,
                                                                   Dimension('pixel', 'a.u.', self.num_effective_pix),
187
188
189
190
                                                                   is_spectral=False, verbose=self.verbose and self.mpi_rank==0)
            if self.verbose and self.mpi_rank==0:
                print('Rank {} - Created the new position ancillary dataset'.format(self.mpi_rank))

191
192
193
        else:
            h5_pos_inds_new = self.h5_main.h5_pos_inds
            h5_pos_vals_new = self.h5_main.h5_pos_vals
194

195
196
197
            if self.verbose and self.mpi_rank==0:
                print('Rank {} - Reusing source datasets position datasets'.format(self.mpi_rank))

198
        if self.noise_threshold is not None:
199
200
201
202
            self.h5_noise_floors = write_main_dataset(self.h5_results_grp, (self.num_effective_pix, 1), 'Noise_Floors',
                                                      'Noise', 'a.u.', None, Dimension('arb', '', [1]),
                                                      dtype=np.float32, aux_spec_prefix='Noise_Spec_',
                                                      h5_pos_inds=h5_pos_inds_new, h5_pos_vals=h5_pos_vals_new,
203
204
205
                                                      verbose=self.verbose and self.mpi_rank==0)
            if self.verbose and self.mpi_rank==0:
                print('Rank {} - Finished creating the Noise_Floors dataset'.format(self.mpi_rank))
206
207

        if self.write_filtered:
208
209
210
            # Filtered data is identical to Main_Data in every way - just a duplicate
            self.h5_filtered = create_empty_dataset(self.h5_main, self.h5_main.dtype, 'Filtered_Data',
                                                    h5_group=self.h5_results_grp)
211
212
            if self.verbose and self.mpi_rank==0:
                print('Rank {} - Finished creating the Filtered dataset'.format(self.mpi_rank))
213

214
        self.hot_inds = None
215
216

        if self.write_condensed:
217
218
219
220
221
222
            self.hot_inds = np.where(self.composite_filter > 0)[0]
            self.hot_inds = np.uint(self.hot_inds[int(0.5 * len(self.hot_inds)):])  # only need to keep half the data
            condensed_spec = Dimension('hot_frequencies', '', int(0.5 * len(self.hot_inds)))
            self.h5_condensed = write_main_dataset(self.h5_results_grp, (self.num_effective_pix, len(self.hot_inds)),
                                                   'Condensed_Data', 'Complex', 'a. u.', None, condensed_spec,
                                                   h5_pos_inds=h5_pos_inds_new, h5_pos_vals=h5_pos_vals_new,
223
224
225
226
227
228
229
                                                   dtype=np.complex, verbose=self.verbose and self.mpi_rank==0)
            if self.verbose and self.mpi_rank==0:
                print('Rank {} - Finished creating the Condensed dataset'.format(self.mpi_rank))

        if self.mpi_size > 1:
            self.mpi_comm.Barrier()
        self.h5_main.file.flush()
230

231
232
233
234
235
236
237
238
239
240
241
    def _get_existing_datasets(self):
        """
        Extracts references to the existing datasets that hold the results
        """
        if self.write_filtered:
            self.h5_filtered = self.h5_results_grp['Filtered_Data']
        if self.write_condensed:
            self.h5_condensed = self.h5_results_grp['Condensed_Data']
        if self.noise_threshold is not None:
            self.h5_noise_floors = self.h5_results_grp['Noise_Floors']

242
    def _write_results_chunk(self):
syz's avatar
syz committed
243
244
245
        """
        Writes data chunks back to the file
        """
246
247
        # Get access to the private variable:
        pos_in_batch = self._get_pixels_in_current_batch()
248
249

        if self.write_condensed:
250
            self.h5_condensed[pos_in_batch, :] = self.condensed_data
251
        if self.noise_threshold is not None:
252
            self.h5_noise_floors[pos_in_batch, :] = np.atleast_2d(self.noise_floors)
253
        if self.write_filtered:
254
            self.h5_filtered[pos_in_batch, :] = self.filtered_data
255

256
        # Not responsible for checkpointing anymore. Process class handles this.
257

258
    def _unit_computation(self, *args, **kwargs):
syz's avatar
syz committed
259
        """
260
        Processing per chunk of the dataset
syz's avatar
syz committed
261
262
        Parameters
        ----------
263
264
265
266
        args : list
            Not used
        kwargs : dictionary
            Not used
syz's avatar
syz committed
267
        """
268
269
        # get FFT of the entire data chunk
        self.data = np.fft.fftshift(np.fft.fft(self.data, axis=1), axes=1)
270

271
272
        if self.noise_threshold is not None:
            self.noise_floors = parallel_compute(self.data, get_noise_floor, cores=self._cores,
Unknown's avatar
Unknown committed
273
274
                                                 func_args=[self.noise_threshold],
                                                 verbose=self.verbose)
275

276
277
278
        if isinstance(self.composite_filter, np.ndarray):
            # multiple fft of data with composite filter
            self.data *= self.composite_filter
279

280
281
282
        if self.noise_threshold is not None:
            # apply thresholding
            self.data[np.abs(self.data) < np.tile(np.atleast_2d(self.noise_floors), self.data.shape[1])] = 1E-16
283

284
285
286
        if self.write_condensed:
            # set self.condensed_data here
            self.condensed_data = self.data[:, self.hot_inds]
287

288
289
290
291
        if self.write_filtered:
            # take inverse FFT
            self.filtered_data = np.real(np.fft.ifft(np.fft.ifftshift(self.data, axes=1), axis=1))
            if self.phase_rad > 0:
292
                # TODO: implement phase compensation
293
294
295
                # do np.roll on data
                # self.data = np.roll(self.data, 0, axis=1)
                pass
296