gmode_line.py 11.4 KB
Newer Older
Somnath, Suhas's avatar
Somnath, Suhas committed
1
2
3
4
5
6
# -*- coding: utf-8 -*-
"""
Created on Sat Nov 07 15:21:46 2015

@author: Suhas Somnath
"""
7
from __future__ import division, print_function, absolute_import, unicode_literals
8
import sys
9
from os import path, listdir, remove
Somnath, Suhas's avatar
Somnath, Suhas committed
10
from warnings import warn
11
import h5py
12
13
import numpy as np
from scipy.io.matlab import loadmat  # To load parameters stored in Matlab .mat file
14

15
16
17
from sidpy.sid import Translator
from sidpy.hdf.hdf_utils import write_simple_attrs

18
from pyUSID.io.write_utils import VALUES_DTYPE, Dimension
19
20
21
22
from pyUSID.io.hdf_utils import write_main_dataset, create_indexed_group, \
    write_ind_val_dsets

from .df_utils.be_utils import parmsToDict
23

24
25
26
if sys.version_info.major == 3:
    unicode = str

Somnath, Suhas's avatar
Somnath, Suhas committed
27

Somnath, Suhas's avatar
Somnath, Suhas committed
28
class GLineTranslator(Translator):
Somnath, Suhas's avatar
Somnath, Suhas committed
29
30
31
    """
    Translated G-mode line (bigtimedata.dat) files from actual BE line experiments to HDF5
    """
Unknown's avatar
Unknown committed
32
33
34
35
36
37
    def __init__(self, *args, **kwargs):
        super(Translator, self).__init__(*args, **kwargs)
        self.points_per_pixel = 1
        self.num_rows = 1
        self.__bytes_per_row__ = 1

38
    @staticmethod
39
    def is_valid_file(data_path):
40
41
42
43
44
        """
        Checks whether the provided file can be read by this translator

        Parameters
        ----------
45
        data_path : str
46
47
48
49
            Path to raw data file

        Returns
        -------
50
51
52
53
        obj : str
            Path to file that will be accepted by the translate() function if
            this translator is indeed capable of translating the provided file.
            Otherwise, None will be returned
54
        """
55
56
57

        data_path = path.abspath(data_path)

58
        orig_path = None
59
        if path.isfile(data_path):
60
61
62
63
64
65
66
            ext = data_path.split('.')[-1]
            if ext.lower() not in ['jpg', 'png', 'jpeg', 'tiff', 'mat', 'txt',
                                   'dat', 'xls', 'xlsx']:
                return None

            # we only care about the folder names at this point...
            orig_path = data_path
67
68
            # Assume that the file is amongst all other data files
            folder_path, _ = path.split(data_path)
69
        else:
70
71
72
73
74
            folder_path = data_path

        data_path = path.join(folder_path, listdir(path=folder_path)[0])
        basename, parm_paths, data_paths = GLineTranslator._parse_file_path(data_path)

75
76
77
78
79
80
        # The provided file must either p
        if orig_path is not None:
            if not any([orig_path in sel_dict.values() for sel_dict in
                        [parm_paths, data_paths]]):
                return None

81
82
83
84
        if len(parm_paths) == 2 and len(data_paths) > 0:
            return parm_paths['parm_txt']

        return None
85

Somnath, Suhas's avatar
Somnath, Suhas committed
86
87
88
89
    def translate(self, file_path):
        """
        The main function that translates the provided file into a .h5 file
        
90
91
92
93
94
95
96
97
98
        Parameters
        ----------
        file_path : String / unicode
            Absolute path of any file in the directory

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the h5 file
Somnath, Suhas's avatar
Somnath, Suhas committed
99
        """
100
        file_path = path.abspath(file_path)
Somnath, Suhas's avatar
Somnath, Suhas committed
101
        # Figure out the basename of the data:
102
        (basename, parm_paths, data_paths) = self._parse_file_path(file_path)
Somnath, Suhas's avatar
Somnath, Suhas committed
103
104
        
        (folder_path, unused) = path.split(file_path)
105
        h5_path = path.join(folder_path, basename+'.h5')
Somnath, Suhas's avatar
Somnath, Suhas committed
106
107
108
109
        
        if path.exists(h5_path):
            remove(h5_path)
        
110
111
        # Load parameters from .mat file - 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows'
        matread = loadmat(parm_paths['parm_mat'], variable_names=['BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows'])
112
        be_wave = np.float32(np.squeeze(matread['BE_wave']))
113

Somnath, Suhas's avatar
Somnath, Suhas committed
114
115
116
        # Need to take the complex conjugate if reading from a .mat file
        # FFT_BE_wave = np.conjugate(np.complex64(np.squeeze(matread['FFT_BE_wave'])))
        
117
        num_cols = int(matread['total_cols'][0][0])
Somnath, Suhas's avatar
Somnath, Suhas committed
118
        expected_rows = int(matread['total_rows'][0][0])
119
        self.points_per_pixel = len(be_wave)
Somnath, Suhas's avatar
Somnath, Suhas committed
120
121
        
        # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate'
122
        is_beps, parm_dict = parmsToDict(parm_paths['parm_txt'])
Somnath, Suhas's avatar
Somnath, Suhas committed
123
124
125
126
127
128
        
        # Get file byte size:
        # For now, assume that bigtime_00 always exists and is the main file
        file_size = path.getsize(data_paths[0])
        
        # Calculate actual number of lines since the first few lines may not be saved
129
        self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols)
Somnath, Suhas's avatar
Somnath, Suhas committed
130
131
132
133
134
        if self.num_rows % 1:
            warn('Error - File has incomplete rows')
            return None
        else:
            self.num_rows = int(self.num_rows)
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

        samp_rate = parm_dict['IO_rate_[Hz]']
        ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]']

        # method 1 for calculating the correct excitation frequency:
        pixel_duration = 1.0 * self.points_per_pixel / samp_rate
        num_periods = pixel_duration * ex_freq_nominal
        ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods))

        # method 2 for calculating the exact excitation frequency:
        """
        fft_ex_wfm = np.abs(np.fft.fftshift(np.fft.fft(be_wave)))
        w_vec = np.linspace(-0.5 * samp_rate, 0.5 * samp_rate - 1.0*samp_rate / self.points_per_pixel,
                            self.points_per_pixel)
        hot_bins = np.squeeze(np.argwhere(fft_ex_wfm > 1E+3))
        ex_freq_correct = w_vec[hot_bins[-1]]
        """

        # correcting the excitation frequency - will be VERY useful during analysis and filtering
        parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct

Somnath, Suhas's avatar
Somnath, Suhas committed
156
        # Some very basic information that can help the processing crew
157
        parm_dict['num_bins'] = self.points_per_pixel
Somnath, Suhas's avatar
Somnath, Suhas committed
158
        parm_dict['grid_num_rows'] = self.num_rows
159
        parm_dict['data_type'] = 'G_mode_line'
Somnath, Suhas's avatar
Somnath, Suhas committed
160
161
            
        if self.num_rows != expected_rows:
162
            print('Note: {} of {} lines found in data file'.format(self.num_rows, expected_rows))
Somnath, Suhas's avatar
Somnath, Suhas committed
163
164
165
        
        # Calculate number of points to read per line:
        self.__bytes_per_row__ = int(file_size/self.num_rows)
166

Somnath, Suhas's avatar
Somnath, Suhas committed
167
        # First finish writing all global parameters, create the file too:
168
        h5_f = h5py.File(h5_path, 'w')
169
        global_parms = dict()
170
171
        global_parms['data_type'] = 'G_mode_line'
        global_parms['translator'] = 'G_mode_line'
172
173
174
175
        write_simple_attrs(h5_f, global_parms)

        meas_grp = create_indexed_group(h5_f, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)
176

177
178
        pos_desc = Dimension('Y', 'm', np.arange(self.num_rows))
        spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols))
179

Unknown's avatar
Unknown committed
180
181
        first_dat = True
        for key in data_paths.keys():
182
183
184
185
186
187
188
189
190
            # Now that the file has been created, go over each raw data file:
            # 1. write all ancillary data. Link data. 2. Write main data sequentially

            """ We only allocate the space for the main data here.
            This does NOT change with each file. The data written to it does.
            The auxiliary datasets will not change with each raw data file since
            only one excitation waveform is used"""
            chan_grp = create_indexed_group(meas_grp, 'Channel')

Unknown's avatar
Unknown committed
191
192
193
194
195
196
197
198
199
200
            if first_dat:
                if len(data_paths) > 1:
                    # All positions and spectra are shared between channels
                    h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False)
                    h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True)
                elif len(data_paths) == 1:
                    h5_pos_inds, h5_pos_vals = write_ind_val_dsets(chan_grp, pos_desc, is_spectral=False)
                    h5_spec_inds, h5_spec_vals = write_ind_val_dsets(chan_grp, spec_desc, is_spectral=True)

                first_dat = False
201
202
            else:
                pass
203
204
205
206
207
208
209
210

            h5_main = write_main_dataset(chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data',
                                         'Deflection', 'V',
                                         None, None,
                                         h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals,
                                         h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals,
                                         chunks=(1, self.points_per_pixel), dtype=np.float16)

Somnath, Suhas's avatar
Somnath, Suhas committed
211
            # Now transfer scan data in the dat file to the h5 file:
Unknown's avatar
Unknown committed
212
            self._read_data(data_paths[key], h5_main)
Somnath, Suhas's avatar
Somnath, Suhas committed
213
            
214
        h5_f.close()
215
216
217
        print('G-Line translation complete!')

        return h5_path
218

Somnath, Suhas's avatar
Somnath, Suhas committed
219
    @staticmethod
220
    def _parse_file_path(data_filepath):
Somnath, Suhas's avatar
Somnath, Suhas committed
221
222
223
224
225
226
227
        """
        Goes through the file directory and figures out the basename and the 
        parameter (text and .mat), data file paths (for each analog input channel)
        
        Parameters
        -----------------
        data_filepath : string / unicode
228
            absolute path of any file in the data folder
Somnath, Suhas's avatar
Somnath, Suhas committed
229
230
231
232
233
234
235
        
        Returns
        ----------------
        basename : string / unicode
            base name of the experiment\n
        parm_paths : dictionary
            paths for the text and .mat parameter files\n
236
237
238
            parm_text : absolute file path of the parameter text file\n
            parm_mat : absolute file path of the parameter .mat file
        data_paths : dictionary of the paths for the big-time data files.
Somnath, Suhas's avatar
Somnath, Suhas committed
239
            key : index of the analog input that generated the data file\n
240
            value : absolute file path of the data file
Somnath, Suhas's avatar
Somnath, Suhas committed
241
242
        """
        # Return (basename, parameter text path)
243
244
245
        folder_path, _ = path.split(data_filepath)
        _, basename = path.split(folder_path)

Somnath, Suhas's avatar
Somnath, Suhas committed
246
247
248
249
        # There may be one or two bigdata files. May need both paths
        parm_paths = dict()
        data_paths = dict()
        targ_str = 'bigtime_0'
250
251
252
253
254
255
256
257
258
259
260
261
262
        for file_name in listdir(folder_path):

            if file_name.endswith('.txt') and file_name.find('parm') > 0:
                parm_paths['parm_txt'] = path.join(folder_path, file_name)

            elif file_name.endswith('_all.mat'):
                parm_paths['parm_mat'] = path.join(folder_path, file_name)

            elif file_name.endswith('.dat'):
                ind = file_name.find(targ_str)
                if ind > 0:
                    data_paths[int(file_name[ind + len(targ_str)])] = path.join(folder_path, file_name)

263
        return basename, parm_paths, data_paths
Somnath, Suhas's avatar
Somnath, Suhas committed
264

265
    def _read_data(self, filepath, h5_dset):
Somnath, Suhas's avatar
Somnath, Suhas committed
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
        """
        Reads the .dat file and populates the .h5 dataset

        Parameters
        ---------
        filepath : String / unicode
            absolute path of the data file for a particular analog input channel
        h5_dset : HDF5 dataset reference
            Reference to the target Raw_Data dataset

        Returns
        ---------
        None
        """
        # Create data matrix - Only need 16 bit floats (time) 
                
        # Read line by line and write to h5                 
        with open(filepath, 'rb') as file_handl:
Somnath, Suhas's avatar
Somnath, Suhas committed
284
            for row_indx in range(self.num_rows):
Somnath, Suhas's avatar
Somnath, Suhas committed
285
286
                
                if row_indx % 10 == 0:
287
                    print('Reading line {} of {}'.format(row_indx, self.num_rows))
Somnath, Suhas's avatar
Somnath, Suhas committed
288
                
289
                file_handl.seek(row_indx*self.__bytes_per_row__, 0)
Somnath, Suhas's avatar
Somnath, Suhas committed
290
                data_vec = np.fromstring(file_handl.read(self.__bytes_per_row__), dtype='f')
291
                h5_dset[row_indx] = np.float16(data_vec)
Somnath, Suhas's avatar
Somnath, Suhas committed
292
293
                h5_dset.file.flush()
        
294
        print('Finished reading file: {}!'.format(filepath))