giv_bayesian.py 14 KB
Newer Older
1
2
# -*- coding: utf-8 -*-
"""
syz's avatar
syz committed
3
Created on Thu Nov 02 11:48:53 2017
4

syz's avatar
syz committed
5
@author: Suhas Somnath
6
7
8
9
10
11
12
13
14

"""

from __future__ import division, print_function, absolute_import, unicode_literals

import time as tm
import numpy as np
from .process import Process, parallel_compute
from ..io.microdata import MicroDataset, MicroDataGroup
15
from ..io.io_utils import realToCompound
syz's avatar
syz committed
16
from ..io.hdf_utils import getH5DsetRefs, getAuxData, copyAttributes, link_as_main
17
18
19
20
21
22
from ..io.translators.utils import build_ind_val_dsets
from ..io.io_hdf5 import ioHDF5
from .giv_utils import do_bayesian_inference

cap_dtype = np.dtype({'names': ['Forward', 'Reverse'],
                      'formats': [np.float32, np.float32]})
syz's avatar
syz committed
23
# TODO : Take lesser used bayesian inference params from kwargs if provided
24
# TODO: Allow resuming of computation
25
26


27
class GIVBayesian(Process):
syz's avatar
syz committed
28
    def __init__(self, h5_main, ex_freq, gain, num_x_steps=250, r_extra=110, **kwargs):
29
        """
syz's avatar
syz committed
30
31
        Applies Bayesian Inference to General Mode IV (G-IV) data to extract the true current

32
33
        Parameters
        ----------
syz's avatar
syz committed
34
35
36
37
38
39
        h5_main : h5py.Dataset object
            Dataset to process
        ex_freq : float
            Frequency of the excitation waveform
        gain : uint
            Gain setting on current amplifier (typically 7-9)
Unknown's avatar
Unknown committed
40
        num_x_steps : uint (Optional, default = 250)
syz's avatar
syz committed
41
            Number of steps for the inferred results. Note: this may be end up being slightly different from specified.
Unknown's avatar
Unknown committed
42
        r_extra : float (Optional, default = 110 [Ohms])
syz's avatar
syz committed
43
44
45
            Extra resistance in the RC circuit that will provide correct current and resistance values
        kwargs : dict
            Other parameters specific to the Process class and nuanced bayesian_inference parameters
46
        """
47
        super(GIVBayesian, self).__init__(h5_main, **kwargs)
48
49
50
51
52
53
        self.gain = gain
        self.ex_freq = ex_freq
        self.r_extra = r_extra
        self.num_x_steps = int(num_x_steps)
        if self.num_x_steps % 4 == 0:
            self.num_x_steps = ((self.num_x_steps // 2) + 1) * 2
54
        if self.verbose:
55
56
57
            print('ensuring that half steps should be odd, num_x_steps is now', self.num_x_steps)

        # take these from kwargs
58
        bayesian_parms = {'gam': 0.03, 'e': 10.0, 'sigma': 10.0, 'sigmaC': 1.0, 'num_samples': 2E3}
59

60
61
62
63
64
        self.parms_dict = {'freq': self.ex_freq, 'num_x_steps': self.num_x_steps, 'r_extra': self.r_extra}
        self.parms_dict.update(bayesian_parms)

        self.process_name = 'Bayesian_Inference'
        self.duplicate_h5_groups = self._check_for_duplicates()
65
66
67
68
69
70
71
72

        h5_spec_vals = getAuxData(h5_main, auxDataName=['Spectroscopic_Values'])[0]
        self.single_ao = np.squeeze(h5_spec_vals[()])

        roll_cyc_fract = -0.25
        self.roll_pts = int(self.single_ao.size * roll_cyc_fract)
        self.rolled_bias = np.roll(self.single_ao, self.roll_pts)

syz's avatar
syz committed
73
74
75
76
        dt = 1 / (ex_freq * self.single_ao.size)
        self.dvdt = np.diff(self.single_ao) / dt
        self.dvdt = np.append(self.dvdt, self.dvdt[-1])

Unknown's avatar
Unknown committed
77
78
79
        self.reverse_results = None
        self.forward_results = None

80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
    def _set_memory_and_cores(self, cores=1, mem=1024):
        """
        Checks hardware limitations such as memory, # cpus and sets the recommended datachunk sizes and the
        number of cores to be used by analysis methods.

        Parameters
        ----------
        cores : uint, optional
            Default - 1
            How many cores to use for the computation
        mem : uint, optional
            Default - 1024
            The amount a memory in Mb to use in the computation
        """
        super(GIVBayesian, self)._set_memory_and_cores(cores=cores, mem=mem)
95
        # Remember that the default number of pixels corresponds to only the raw data that can be held in memory
96
        # In the case of simplified Bayesian inference, four (roughly) equally sized datasets need to be held in memory:
97
        # raw, compensated current, resistance, variance
98
99
        self._max_pos_per_read = self._max_pos_per_read // 4  # Integer division
        # Since these computations take far longer than functional fitting, do in smaller batches:
Unknown's avatar
Unknown committed
100
        self._max_pos_per_read = min(500, self._max_pos_per_read)
101

102
103
    def _create_results_datasets(self):
        """
syz's avatar
syz committed
104
        Creates hdf5 datasets and datagroups to hold the resutls
105
106
107
108
109
110
111
112
113
114
115
116
117
        """
        # create all h5 datasets here:
        num_pos = self.h5_main.shape[0]

        if self.verbose:
            print('Now creating the datasets')

        ds_spec_inds, ds_spec_vals = build_ind_val_dsets([self.num_x_steps], is_spectral=True,
                                                         labels=['Bias'], units=['V'], verbose=self.verbose)

        cap_shape = (num_pos, 1)
        ds_cap = MicroDataset('Capacitance', data=[], maxshape=cap_shape, dtype=cap_dtype, chunking=cap_shape,
                              compression='gzip')
118
        ds_cap.attrs = {'quantity': 'Capacitance', 'units': 'pF'}
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
        ds_cap_spec_inds, ds_cap_spec_vals = build_ind_val_dsets([1], is_spectral=True,
                                                                 labels=['Direction'], units=[''], verbose=self.verbose)
        # the names of these datasets will clash with the ones created above. Change names manually:
        ds_cap_spec_inds.name = 'Spectroscopic_Indices_Cap'
        ds_cap_spec_vals.name = 'Spectroscopic_Values_Cap'

        ds_r_var = MicroDataset('R_variance', data=[], maxshape=(num_pos, self.num_x_steps), dtype=np.float32,
                                chunking=(1, self.num_x_steps), compression='gzip')
        ds_r_var.attrs = {'quantity': 'Resistance', 'units': 'GOhms'}
        ds_res = MicroDataset('Resistance', data=[], maxshape=(num_pos, self.num_x_steps), dtype=np.float32,
                              chunking=(1, self.num_x_steps), compression='gzip')
        ds_res.attrs = {'quantity': 'Resistance', 'units': 'GOhms'}
        ds_i_corr = MicroDataset('Corrected_Current', data=[], maxshape=(num_pos, self.single_ao.size),
                                 dtype=np.float32,
                                 chunking=(1, self.single_ao.size), compression='gzip')
        # don't bother adding any other attributes, all this will be taken from h5_main

136
        bayes_grp = MicroDataGroup(self.h5_main.name.split('/')[-1] + '-' + self.process_name + '_',
137
138
139
                                   parent=self.h5_main.parent.name)
        bayes_grp.addChildren([ds_spec_inds, ds_spec_vals, ds_cap, ds_r_var, ds_res, ds_i_corr,
                               ds_cap_spec_inds, ds_cap_spec_vals])
syz's avatar
syz committed
140
        bayes_grp.attrs = {'algorithm_author': 'Kody J. Law', 'last_pixel': 0}
141
        bayes_grp.attrs.update(self.parms_dict)
142
143
144
145
146
147
148
149
150
151
152
153
154

        if self.verbose:
            bayes_grp.showTree()

        self.hdf = ioHDF5(self.h5_main.file)
        h5_refs = self.hdf.writeData(bayes_grp, print_log=self.verbose)

        self.h5_new_spec_vals = getH5DsetRefs(['Spectroscopic_Values'], h5_refs)[0]
        h5_new_spec_inds = getH5DsetRefs(['Spectroscopic_Indices'], h5_refs)[0]
        h5_cap_spec_vals = getH5DsetRefs(['Spectroscopic_Values_Cap'], h5_refs)[0]
        h5_cap_spec_inds = getH5DsetRefs(['Spectroscopic_Indices_Cap'], h5_refs)[0]
        self.h5_cap = getH5DsetRefs(['Capacitance'], h5_refs)[0]
        self.h5_variance = getH5DsetRefs(['R_variance'], h5_refs)[0]
155
        self.h5_resistance = getH5DsetRefs(['Resistance'], h5_refs)[0]
156
        self.h5_i_corrected = getH5DsetRefs(['Corrected_Current'], h5_refs)[0]
157
        self.h5_results_grp = self.h5_cap.parent
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

        if self.verbose:
            print('Finished making room for the datasets. Now linking them')

        # Now link the datasets appropriately so that they become hubs:
        h5_pos_vals = getAuxData(self.h5_main, auxDataName=['Position_Values'])[0]
        h5_pos_inds = getAuxData(self.h5_main, auxDataName=['Position_Indices'])[0]

        # Capacitance main dataset:
        link_as_main(self.h5_cap, h5_pos_inds, h5_pos_vals, h5_cap_spec_inds, h5_cap_spec_vals)

        # the corrected current dataset is the same as the main dataset in every way
        copyAttributes(self.h5_main, self.h5_i_corrected, skip_refs=False)

        # The resistance datasets get new spec datasets but reuse the old pos datasets:
173
        for new_dset in [self.h5_resistance, self.h5_variance]:
174
175
176
177
178
179
            link_as_main(new_dset, h5_pos_inds, h5_pos_vals, h5_new_spec_inds, self.h5_new_spec_vals)

        if self.verbose:
            print('Finished linking all datasets!')

    def _write_results_chunk(self):
syz's avatar
syz committed
180
181
182
        """
        Writes data chunks back to the h5 file
        """
syz's avatar
syz committed
183
184
185

        if self.verbose:
            print('Started accumulating all results')
186
187
188
189
190
191
192
193
194
        num_pixels = len(self.forward_results)
        cap_mat = np.zeros((num_pixels, 2), dtype=np.float32)
        r_inf_mat = np.zeros((num_pixels, self.num_x_steps), dtype=np.float32)
        r_var_mat = np.zeros((num_pixels, self.num_x_steps), dtype=np.float32)
        i_cor_sin_mat = np.zeros((num_pixels, self.single_ao.size), dtype=np.float32)

        for pix_ind, i_meas, forw_results, rev_results in zip(range(num_pixels), self.data,
                                                              self.forward_results, self.reverse_results):
            full_results = dict()
syz's avatar
syz committed
195
            for item in ['cValue']:
196
197
198
                full_results[item] = np.hstack((forw_results[item], rev_results[item]))
                # print(item, full_results[item].shape)

199
200
201
            # Capacitance is always doubled - halve it now (locally):
            # full_results['cValue'] *= 0.5
            cap_val = np.mean(full_results['cValue']) * 0.5
202
203

            # Compensating the resistance..
syz's avatar
syz committed
204
            """
205
206
            omega = 2 * np.pi * self.ex_freq
            i_cap = cap_val * omega * self.rolled_bias
syz's avatar
syz committed
207
208
209
            """
            i_cap = cap_val * self.dvdt
            i_extra = self.r_extra * 2 * cap_val * self.single_ao
210
211
            i_corr_sine = i_meas - i_cap - i_extra

syz's avatar
syz committed
212
213
            # Equivalent to flipping the X:
            rev_results['x'] *= -1
214

syz's avatar
syz committed
215
            # Stacking the results - no flipping required for reverse:
216
            for item in ['x', 'mR', 'vR']:
syz's avatar
syz committed
217
                full_results[item] = np.hstack((forw_results[item], rev_results[item]))
218
219

            i_cor_sin_mat[pix_ind] = i_corr_sine
220
            cap_mat[pix_ind] = full_results['cValue'] * 1000  # convert from nF to pF
221
222
223
224
            r_inf_mat[pix_ind] = full_results['mR']
            r_var_mat[pix_ind] = full_results['vR']

        # Now write to h5 files:
syz's avatar
syz committed
225
226
227
        if self.verbose:
            print('Finished accumulating results. Writing to h5')

228
229
230
231
232
233
234
235
        if self._start_pos == 0:
            self.h5_new_spec_vals[0, :] = full_results['x']  # Technically this needs to only be done once

        pos_slice = slice(self._start_pos, self._end_pos)
        self.h5_cap[pos_slice] = np.atleast_2d(realToCompound(cap_mat, cap_dtype)).T
        self.h5_variance[pos_slice] = r_var_mat
        self.h5_resistance[pos_slice] = r_inf_mat
        self.h5_i_corrected[pos_slice] = i_cor_sin_mat
syz's avatar
syz committed
236

237
        # Leaving in this provision that will allow restarting of processes
Unknown's avatar
Unknown committed
238
        self.h5_results_grp.attrs['last_pixel'] = self._end_pos
239

240
        self.hdf.flush()
syz's avatar
syz committed
241

Unknown's avatar
Unknown committed
242
        print('Finished processing up to pixel ' + str(self._end_pos) + ' of ' + str(self.h5_main.shape[0]))
243

244
245
        # Now update the start position
        self._start_pos = self._end_pos
246
247
248
249
250
251
252

    @staticmethod
    def _unit_function():
        return do_bayesian_inference

    def compute(self, *args, **kwargs):
        """
syz's avatar
syz committed
253
        Creates placeholders for the results, applies the inference to the data, and writes the output to the file.
254
255
256
257
258
259

        Parameters
        ----------

        Returns
        -------
syz's avatar
syz committed
260
261
        h5_results_grp : h5py.Datagroup object
            Datagroup containing all the results
262
        """
263
264
        self._create_results_datasets()

265
266
        half_v_steps = self.single_ao.size // 2

syz's avatar
syz committed
267
        # remove additional parm and halve the x points
268
        bayes_parms = self.parms_dict.copy()
syz's avatar
syz committed
269
270
271
        bayes_parms['num_x_steps'] = self.num_x_steps // 2
        bayes_parms['econ'] = True
        del(bayes_parms['freq'])
272
273
274
275
276

        time_per_pix = 0

        num_pos = self.h5_main.shape[0]

syz's avatar
syz committed
277
        self._read_data_chunk()
278

syz's avatar
syz committed
279
        while self.data is not None:
280
281
282

            t_start = tm.time()

syz's avatar
syz committed
283
284
            # first roll the data
            rolled_raw_data = np.roll(self.data, self.roll_pts, axis=1)
syz's avatar
syz committed
285
286
            # Ensure that the bias has a positive slope. Multiply current by -1 accordingly
            self.reverse_results = parallel_compute(rolled_raw_data[:, :half_v_steps] * -1, do_bayesian_inference,
287
                                                    cores=self._cores,
syz's avatar
syz committed
288
                                                    func_args=[self.rolled_bias[:half_v_steps] * -1, self.ex_freq],
289
                                                    func_kwargs=bayes_parms, lengthy_computation=True)
290
291

            if self.verbose:
syz's avatar
syz committed
292
293
                print('Finished processing forward sections. Now working on reverse sections....')

syz's avatar
syz committed
294
            self.forward_results = parallel_compute(rolled_raw_data[:, half_v_steps:], do_bayesian_inference,
295
                                                    cores=self._cores,
syz's avatar
syz committed
296
                                                    func_args=[self.rolled_bias[half_v_steps:], self.ex_freq],
297
                                                    func_kwargs=bayes_parms, lengthy_computation=True)
298
299
300
            if self.verbose:
                print('Finished processing reverse loops')

301
            tot_time = np.round(tm.time() - t_start, decimals=2)
302
303
304

            if self.verbose:
                print('Done parallel computing in {} sec or {} sec per pixel'.format(tot_time,
syz's avatar
syz committed
305
306
307
                                                                                     tot_time / self._max_pos_per_read))
            if self._start_pos == 0:
                time_per_pix = tot_time / self._end_pos  # in seconds
308
            else:
syz's avatar
syz committed
309
                print('Time remaining: {} hours'.format(np.round((num_pos - self._end_pos) * time_per_pix / 3600, 2)))
310

311
312
            self._write_results_chunk()
            self._read_data_chunk()
313
314
315
316

        if self.verbose:
            print('Finished processing the dataset completely')

317
        return self.h5_results_grp