Unverified Commit ebc9786b authored by CompPhysChris's avatar CompPhysChris Committed by GitHub
Browse files

Merge pull request #126 from pycroscopy/cades_dev

Merge in filtering updates
parents e65eb92a 6c4ece4e
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -30,48 +30,12 @@
# Finally, pycroscopy itself
import pycroscopy as px
# set up notebook to show plots within the notebook
% matplotlib inline
```
%% Cell type:code id: tags:
``` python
ui_file_window = False
try:
from PyQt5 import QtWidgets
def uiGetFile(filter='H5 file (*.h5)', caption='Select File'):
"""
Presents a File dialog used for selecting the .mat file
and returns the absolute filepath of the selecte file\n
Parameters
----------
extension : String or list of strings
file extensions to look for
caption : (Optional) String
Title for the file browser window
Returns
-------
file_path : String
Absolute path of the chosen file
"""
app = QtWidgets.QApplication([])
path = QtWidgets.QFileDialog.getOpenFileName(caption=caption, filter=filter)[0]
app.exit()
del app
return str(path)
ui_file_window = True
except ImportError:
print('***********************************************************')
print('* *')
print('* You will need to specify the file path manually below *')
print('* *')
print('***********************************************************')
save_plots = False
```
%% Cell type:markdown id: tags:
## Make the data pycroscopy compatible
......@@ -90,17 +54,13 @@
You can select desired file type by choosing the second option in the pull down menu on the bottom right of the file window
%% Cell type:code id: tags:
``` python
if ui_file_window:
input_file_path = uiGetFile(caption='Select translated .h5 file or raw experiment data',
filter='Parameters for raw G-Line data (*.txt);; \
input_file_path = px.io_utils.uiGetFile(caption='Select translated .h5 file or raw experiment data',
file_filter='Parameters for raw G-Line data (*.txt);; \
Translated file (*.h5)')
else:
input_file_path = '/Volumes/IFgroup/SPM software development/Raw_Data/G_mode/GVS/2015_04_08_PZT_AuCu_nanocaps/GLine_8V_10kHz_256x256_0001/GLine_8V_10kHz_256x256.h5'
folder_path, _ = path.split(input_file_path)
if input_file_path.endswith('.txt'):
print('Translating raw data to h5. Please wait')
tran = px.GLineTranslator()
......@@ -118,17 +78,11 @@
%% Cell type:code id: tags:
``` python
hdf = px.ioHDF5(h5_path)
h5_main = px.hdf_utils.getDataSet(hdf.file, 'Raw_Data')[-1]
parms_dict = h5_main.parent.parent.attrs
samp_rate = parms_dict['IO_rate_[Hz]']
ex_freq = parms_dict['BE_center_frequency_[Hz]']
h5_spec_vals = px.hdf_utils.getAuxData(h5_main, auxDataName='Spectroscopic_Values')[0]
pixel_ex_wfm = h5_spec_vals[0, :int(h5_spec_vals.shape[1]/parms_dict['grid_num_cols'])]
```
%% Cell type:markdown id: tags:
##### Inspect the contents of this h5 data file
......@@ -158,65 +112,138 @@
print('{} : {}'.format(key, hdf.file['/Measurement_000'].attrs[key]))
```
%% Cell type:markdown id: tags:
## Extract necessary parameters:
%% Cell type:code id: tags:
``` python
parms_dict = h5_main.parent.parent.attrs
samp_rate = parms_dict['IO_rate_[Hz]']
ex_freq = parms_dict['BE_center_frequency_[Hz]']
pixel_ex_wfm = h5_spec_vals[0, :int(h5_spec_vals.shape[1]/parms_dict['grid_num_cols'])]
pts_per_pix = pixel_ex_wfm.size
pts_per_line = h5_main.shape[1]
```
%% Cell type:markdown id: tags:
## Inspect the raw data:
%% Cell type:code id: tags:
``` python
row_ind = 40
raw_row = h5_main[row_ind].reshape(-1, pixel_ex_wfm.size)
raw_row = h5_main[row_ind].reshape(-1, pts_per_pix)
fig, axes = px.plot_utils.plot_loops(pixel_ex_wfm, raw_row, x_label='Bias (V)', title='Raw Measurement',
plots_on_side=4, y_label='Deflection (a.u.)',
subtitles='Row: ' + str(row_ind) + ' Col:')
```
%% Cell type:markdown id: tags:
## Visualizing information in Fourier space
Visualizing in the fourier space provides information about the noise floor, frequencies which are noise dominant or signal dominant, etc.
This visualization will guide the design of signal filters to remove the noise
%% Cell type:code id: tags:
``` python
# Preparing the frequency axis:
w_vec = 1E-3*np.linspace(-0.5*samp_rate, 0.5*samp_rate - samp_rate/pts_per_line, pts_per_line)
row_ind = 40
F_resp = np.fft.fftshift(np.fft.fft(h5_main[row_ind]))
fig, ax = plt.subplots(figsize=(12, 7))
ax.axvline(x=1E-3*ex_freq, color='r', linewidth=2, label='Excitation')
ax.plot(w_vec[int(0.5*len(w_vec)):], np.log10(np.abs(F_resp[int(0.5*len(w_vec)):])), label='Response')
ax.set_xlabel('Frequency (kHz)', fontsize=16)
ax.set_ylabel('Amplitude (a.u.)', fontsize=16)
ax.legend(fontsize=14)
ax.set_xscale('log')
ax.set_xlim(ex_freq*1E-4, samp_rate*0.5E-3)
ax.set_title('Noise Spectrum for row ' + str(row_ind), fontsize=16)
px.plot_utils.set_tick_font_size(ax, 14)
if save_plots:
fig.savefig(os.path.join(other_figures_folder,
'noise_spectrum_line_' + str(row_ind) +'.png'),
format='png', dpi=150);
```
%% Cell type:markdown id: tags:
## Try different FFT filters on the data
Good combinations for frequency filters are:
* Just a HarmonicPassFilter
* LowPassFilter + NoiseBandFilter
It is always a good idea to combine these frequency filters with noise thresholding. Try setting noise tolerance values of 1E-6 to 1E-3/
%% Cell type:code id: tags:
``` python
filter_parms = dict()
filter_parms['noise_threshold'] = 1E-4
filter_parms['comb_[Hz]'] = [ex_freq, 1E+3, 10]
# filter_parms['LPF_cutOff_[Hz]'] = -1
# Noise frequencies - 15.6 kHz ~ 14-17.5, 7.8-8.8, 45-49.9 ~ 48.9414 kHz
# filter_parms['band_filt_[Hz]'] = None # [[8.3E+3, 15.6E+3, 48.9414E+3], [1E+3, 0.5E+3, 0.1E+3]]
# filter_parms['phase_[rad]'] = 0
filter_parms['samp_rate_[Hz]'] = samp_rate
filter_parms['num_pix'] = 1
hpf = px.processing.fft.HarmonicPassFilter(pts_per_line, samp_rate, ex_freq, 1E+3, 10)
lpf = px.processing.fft.LowPassFilter(pts_per_line, samp_rate, 110E+3)
nbf = px.processing.fft.NoiseBandFilter(pts_per_line, samp_rate, [0], [17E+3])
freq_filts = [hpf]
noise_tolerance = 1E-4
# Test filter on a single line:
row_ind = 40
filt_line, fig_filt, axes_filt = px.processing.gmode_utils.test_filter(h5_main[row_ind], filter_parms, samp_rate,
show_plots=True, use_rainbow_plots=False)
fig_filt.savefig(path.join(folder_path, 'FFT_filter_on_line_{}.png'.format(row_ind)), format='png', dpi=300)
filt_line, fig_filt, axes_filt = px.processing.gmode_utils.test_filter(h5_main[row_ind],
frequency_filters=freq_filts,
noise_threshold=noise_tolerance,
show_plots=True)
if save_plots:
fig_filt.savefig(path.join(folder_path, 'FFT_filter_on_line_{}.png'.format(row_ind)), format='png', dpi=300)
filt_row = filt_line.reshape(-1, pixel_ex_wfm.size)
# raw_row = h5_main[row_ind].reshape(-1, pts_per_pix)
fig, axes = px.plot_utils.plot_loops(pixel_ex_wfm, filt_row, x_label='Bias (V)', title='FFT Filtering',
plots_on_side=4, y_label='Deflection (a.u.)',
subtitles='Row: ' + str(row_ind) + ' Col:')
# fig.savefig(path.join(folder_path, 'FFT_filtered_loops_on_line_{}.png'.format(row_ind)), format='png', dpi=300)
if save_plots:
fig.savefig(path.join(folder_path, 'FFT_filtered_loops_on_line_{}.png'.format(row_ind)), format='png', dpi=300)
```
%% Cell type:markdown id: tags:
## Apply selected filter to entire dataset
%% Cell type:code id: tags:
``` python
# h5_filt_grp = px.hdf_utils.findH5group(h5_main, 'FFT_Filtering')[-1]
h5_filt_grp = px.processing.gmode_utils.fft_filter_dataset(h5_main, filter_parms, write_filtered=True)
filter_parms = dict()
if freq_filts is not None:
for filter in freq_filts:
filter_parms.update(filter.get_parms())
if noise_tolerance is not None:
filter_parms['noise_threshold'] = noise_tolerance
h5_filt_grp = px.hdf_utils.check_for_old(h5_main, 'FFT_Filtering', new_parms=filter_parms)
if h5_filt_grp is None:
sig_filt = px.processing.SignalFilter(h5_main, frequency_filters=freq_filts, noise_threshold=noise_tolerance,
write_filtered=True, write_condensed=False, num_pix=1, verbose=True)
h5_filt_grp = sig_filt.compute()
else:
print('Taking previously computed results')
h5_filt = h5_filt_grp['Filtered_Data']
```
%% Cell type:code id: tags:
``` python
# Test to make sure the filter gave the same results
filt_row = h5_filt[row_ind].reshape(-1, pixel_ex_wfm.size)
fig, axes = px.plot_utils.plot_loops(pixel_ex_wfm, filt_row, x_label='Bias (V)', title='FFT Filtering',
plots_on_side=4, y_label='Deflection (a.u.)',
subtitles='Row: ' + str(row_ind) + ' Col:')
......
This diff is collapsed.
%% Cell type:markdown id: tags:
# Loading, reshaping, visualizing data using pycroscopy
### Suhas Somnath, Chris R. Smith and Stephen Jesse
The Center for Nanophase Materials Science and The Institute for Functional Imaging for Materials <br>
Oak Ridge National Laboratory<br>
8/01/2017
Here, we will demonstrate how to load, reshape, and visualize multidimensional imaging datasets. For this example, we will load a three dimensional Band Excitation imaging dataset acquired from an atomic force microscope.
%% Cell type:code id: tags:
``` python
# Make sure pycroscopy and wget are installed
!pip install pycroscopy
!pip install -U wget
```
%% Cell type:code id: tags:
``` python
# Ensure python 3 compatibility
from __future__ import division, print_function, absolute_import
# Import necessary libraries:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from IPython.display import display
from os import remove
import pycroscopy as px
# set up notebook to show plots within the notebook
% matplotlib inline
```
%% Cell type:markdown id: tags:
## Load pycroscopy compatible file
For simplicity we will use a dataset that has already been transalated form its original data format into a pycroscopy compatible hierarchical data format (HDF5 or H5) file
#### HDF5 or H5 files:
* are like smart containers that can store matrices with data, folders to organize these datasets, images, metadata like experimental parameters, links or shortcuts to datasets, etc.
* are readily compatible with high-performance computing facilities
* scale very efficiently from few kilobytes to several terabytes
* can be read and modified using any language including Python, Matlab, C/C++, Java, Fortran, Igor Pro, etc.
Python uses the h5py libaray to read, write, and access HDF5 files
%% Cell type:code id: tags:
``` python
# Downloading the example file from the pycroscopy Github project
url = 'https://raw.githubusercontent.com/pycroscopy/pycroscopy/master/data/BELine_0004.h5'
h5_path = 'temp.h5'
_ = wget.download(url, h5_path)
print('Working on:\n' + h5_path)
# Open the file in read-only mode
h5_file = h5py.File(h5_path, mode='r')
# Here, h5_file is an active handle to the open file
```
%% Cell type:markdown id: tags:
## Inspect the contents of this h5 data file
The file contents are stored in a tree structure, just like files on a contemporary computer. The file contains datagroups (similar to file folders) and datasets (similar to spreadsheets).
There are several datasets in the file and these store:
* the actual measurement collected from the experiment,
* spatial location on the sample where each measurement was collected,
* information to support and explain the spectral data collected at each location
* Since pycroscopy stores results from processing and analyses performed on the data in the same file, these datasets and datagroups are present as well
* any other relevant ancillary information
%% Cell type:code id: tags:
``` python
print('Datasets and datagroups within the file:\n------------------------------------')
px.hdf_utils.print_tree(h5_file)
```
%% Cell type:markdown id: tags:
#### Accessing datasests and datagroups
Datasets and datagroups can be accessed by specifying the path, just like a webpage or a file in a directory
%% Cell type:code id: tags:
``` python
print('Datagroup corresponding to a channel of information:')
print(h5_file['/Measurement_000/Channel_000/'])
print('\nDataset containing the raw data collected from the microscope:')
print(h5_file['/Measurement_000/Channel_000/Raw_Data'])
```
%% Cell type:markdown id: tags:
The output above shows that the "Raw_Data" dataset is a two dimensional dataset, and has complex value (a +bi) entries at each element in the 2D matrix.
This dataset is contained in a datagroup called "Channel_000" which itself is contained in a datagroup called "Measurement_000"
The datagroup "Channel_000" contains several "members", where these members could be datasets like "Raw_Data" or datagroups like "Channel_000"
### Attributes
HDF5 datasets and datagroups can also store metadata such as experimental parameters. These metadata can be text, numbers, small lists of numbers or text etc. These metadata can be very important for understanding the datasets and guide the analysis routines
%% Cell type:code id: tags:
``` python
print('\nMetadata or attributes in a datagroup\n------------------------------------')
for key in h5_file['/Measurement_000'].attrs:
print('{} : {}'.format(key, px.hdf_utils.get_attr(h5_file['/Measurement_000'], key)))
```
%% Cell type:markdown id: tags:
In the case of the spectral dataset under investigation, a spectra with a single peak was collected at each spatial location on a two dimensional grid of points. Thus, this dataset has two position dimensions and one spectroscopic dimension (spectra).
In pycroscopy, all spatial dimensions are collapsed to a single dimension and similarly, all spectroscopic dimensions are also collapsed to a single dimension. Thus, the data is stored as a two-dimensional (N x P) matrix with N spatial locations each with P spectroscopic datapoints.
This general and intuitive format allows imaging data from any instrument, measurement scheme, size, or dimensionality to be represented in the same way.
Such an instrument independent data format enables a single set of anaysis and processing functions to be reused for multiple image formats or modalities.
%% Cell type:code id: tags:
``` python
h5_chan_grp = h5_file['/Measurement_000/']
h5_main = h5_chan_grp['Channel_000/Raw_Data']
print('\nThe main dataset:\n------------------------------------')
print(h5_main)
print('Original three dimensional matrix had {} rows and {} columns \
each having {} spectral points'.format(h5_chan_grp.attrs['grid_num_rows'],
h5_chan_grp.attrs['grid_num_cols'],
h5_chan_grp.attrs['num_bins']))
print('Collapsing the position dimensions: ({}x{}, {}) -> ({}, {})'.format(
h5_chan_grp.attrs['grid_num_rows'],
h5_chan_grp.attrs['grid_num_cols'],
h5_chan_grp.attrs['num_bins'],
h5_chan_grp.attrs['grid_num_rows'] * h5_chan_grp.attrs['grid_num_cols'],
h5_chan_grp.attrs['num_bins']))
```
%% Cell type:markdown id: tags:
Each main dataset is always accompanied by four ancillary datasets that explain:
* the position and spectroscopic value of any given element in the dataset
* the original dimensionality of the dataset
* how to reshape the data back to its N dimensional form
In the case of the 3d dataset under investigation, the positions will be arranged as row0-col0, row0-col1.... row0-colN, row1-col0....
The spectroscopic information remains unchanged.
%% Cell type:code id: tags:
``` python
print('\nThe ancillary datasets:\n------------------------------------')
print(h5_file['/Measurement_000/Channel_000/Position_Indices'])
print(h5_file['/Measurement_000/Channel_000/Position_Values'])
print(h5_file['/Measurement_000/Channel_000/Spectroscopic_Indices'])
print(h5_file['/Measurement_000/Channel_000/Spectroscopic_Values'])
print('\nSpatial dimensions:', px.hdf_utils.get_attr(
h5_file['/Measurement_000/Channel_000/Position_Values'], 'labels'))
print('Spectroscopic dimensions:', px.hdf_utils.get_attr(
h5_file['/Measurement_000/Channel_000/Spectroscopic_Values'], 'labels'))
```
%% Cell type:markdown id: tags:
## Visualizing the position dimensions
%% Cell type:code id: tags:
``` python
fig, axis = plt.subplots(figsize=(6,6))
axis.plot(h5_file['/Measurement_000/Channel_000/Position_Indices'][:, 0],
'orange', label='column')
axis.plot(h5_file['/Measurement_000/Channel_000/Position_Indices'][:, 1],
'black', label='row')
axis.legend()
axis.set_title('Position Indices');
```
%% Cell type:markdown id: tags:
## Inspecting the measurement at a single spatial pixel:
%% Cell type:code id: tags:
``` python
# specify a pixel index of interest
pixel_ind = 128
# ensuring that this index is within the bounds of the dataset
pixel_ind = max(0, min(int(pixel_ind), h5_main.shape[0]))
# Extracting the frequency vector (x-axis) to plot the spectra against
freq_vec = h5_file['/Measurement_000/Channel_000/Bin_Frequencies'][()] * 1E-3
fig, axis = plt.subplots(figsize=(6,6))
axis.plot(freq_vec, np.abs(h5_main[pixel_ind]))
axis.set_xlabel('Frequency (kHz)')
axis.set_ylabel('Amplitude (a. u.)')
axis.set_title('Spectra at position {}'.format(pixel_ind));
```
%% Cell type:markdown id: tags:
## Inspecting the spatial distribution of the amplitude at a single frequency
If the frequency is fixed, the spatial distribution would result in a 2D spatial map.
Note that the spatial dimensions are collapsed to a single dimension in all pycroscopy datasets. Thus, the 1D vector at the specified frequency needs to be reshaped back to a 2D map
%% Cell type:code id: tags:
``` python
# specify a pixel index of interest
freq_ind = 40
# ensuring that this index is within the bounds of the dataset
freq_ind = max(0, min(int(freq_ind), h5_main.shape[1]))
# extracting the position data (1D) at the spcified frequency index
data_vec = np.abs(h5_main[:, freq_ind])
# Constructing the 2D spatial map from the 1D vector:
spat_map = np.reshape(data_vec, (h5_chan_grp.attrs['grid_num_rows'],
h5_chan_grp.attrs['grid_num_cols']))
fig, axis = plt.subplots(figsize=(6,6))
axis.imshow(spat_map, cmap='inferno')
axis.set_xlabel('Columns')
axis.set_ylabel('Rows')
freq_vec = h5_file['/Measurement_000/Channel_000/Bin_Frequencies'][()] * 1E-3
axis.set_title('Amplitude at frequency {} kHz '.format(np.round(freq_vec[freq_ind], 2)));
```
%% Cell type:markdown id: tags:
## Reshaping data back to N dimensions
There are several utility functions in pycroscopy that make it easy to access and reshape datasets. Here we show you how to return your dat to the N dimensional form in one easy step.
While this data is a simple example and can be reshaped manually, such reshape operations become especially useful for 5,6,7 or larger dimensional datasets.
%% Cell type:code id: tags:
``` python
ndim_data, success = px.hdf_utils.reshape_to_Ndims(h5_main)
if not success:
print('There was a problem automatically reshaping the dataset. \
Attempting to reshape manually')
ndim_data = np.reshape(h5_main[()], (h5_chan_grp.attrs['grid_num_rows'],
h5_chan_grp.attrs['grid_num_cols'],
h5_chan_grp.attrs['num_bins']))
else:
print('Collapsed dataset originally of shape: ', h5_main.shape)
print('Reshaped dataset of shape: ', ndim_data.shape)
```
%% Cell type:markdown id: tags:
## The same data investigation can be performed on the N dimensional dataset:
Here we will plot the spatial maps of the sample at a given frequency again. The reshape operation is no longer necessary and we get the same spatial map again.
%% Cell type:code id: tags:
``` python
# specify a pixel index of interest
freq_ind = 40
# ensuring that this index is within the bounds of the dataset
freq_ind = max(0, min(int(freq_ind), h5_main.shape[1]))
# Constructing the 2D spatial map from the 3D dataset
spat_map = np.abs(ndim_data[:, :, freq_ind])
fig, axis = plt.subplots(figsize=(6,6))
axis.imshow(spat_map, cmap='inferno')
axis.set_xlabel('Columns')
axis.set_ylabel('Rows')
freq_vec = h5_file['/Measurement_000/Channel_000/Bin_Frequencies'][()] * 1E-3
axis.set_title('Amplitude at frequency {} kHz '.format(np.round(freq_vec[freq_ind], 2)));
```
%% Cell type:markdown id: tags:
## Closing the HDF5 file after data processing or visualization
%% Cell type:code id: tags:
``` python
h5_file.close()
```
%% Cell type:code id: tags:
``` python
# Removing the temporary data file:
remove(h5_path)
```
%% Cell type:code id: tags:
``` python
```
This diff is collapsed.
......@@ -21,10 +21,11 @@ from .giv_utils import do_bayesian_inference
cap_dtype = np.dtype({'names': ['Forward', 'Reverse'],
'formats': [np.float32, np.float32]})
# TODO : Take lesser used bayesian inference params from kwargs if provided
# TODO: Allow resuming of computation
class GIVBayesian(Process):
def __init__(self, h5_main, ex_freq, gain, num_x_steps=250, r_extra=220, **kwargs):
def __init__(self, h5_main, ex_freq, gain, num_x_steps=250, r_extra=110, **kwargs):
"""
Applies Bayesian Inference to General Mode IV (G-IV) data to extract the true current
......@@ -54,10 +55,10 @@ class GIVBayesian(Process):
print('ensuring that half steps should be odd, num_x_steps is now', self.num_x_steps)
# take these from kwargs
self.bayesian_parms = {'gam': 0.03, 'e': 10.0, 'sigma': 10.0, 'sigmaC': 1.0, 'num_samples': 2E3}
bayesian_parms = {'gam': 0.03, 'e': 10.0, 'sigma': 10.0, 'sigmaC': 1.0, 'num_samples': 2E3}
self.parm_dict = {'freq': self.ex_freq, 'num_x_steps': self.num_x_steps}
self.parm_dict.update(self.bayesian_parms)
self.parm_dict = {'freq': self.ex_freq, 'num_x_steps': self.num_x_steps, 'r_extra': self.r_extra}
self.parm_dict.update(bayesian_parms)