Unverified Commit ebc9786b authored by CompPhysChris's avatar CompPhysChris Committed by GitHub
Browse files

Merge pull request #126 from pycroscopy/cades_dev

Merge in filtering updates
parents e65eb92a 6c4ece4e
This source diff could not be displayed because it is too large. You can view the blob instead.
%% Cell type:code id: tags:
``` python
# fake BEPS generator
from __future__ import division, print_function, unicode_literals, absolute_import
import os
import matplotlib.pyplot as plt
% matplotlib inline
import numpy as np
import pandas as pd
import sys
from PIL import Image
from scipy.special import erf
import pycroscopy as px
```
%% Cell type:code id: tags:
``` python
max_mem = 1024*8 # Maximum memory to use, in Mbs. Default = 1024
max_cores = None # Number of logical cores to use in fitting. None uses all but 2 available cores.
```
%% Cell type:code id: tags:
``` python
# Set-up parameters and file paths
data_type = 'BEPSData'
mode = 'DC modulation mode'
field_mode = 'in and out-of-field'
cwd = os.getcwd()
image_folder = px.io.translators.df_utils.beps_gen_utils.beps_image_folder
# Define the dimensions
binning_factor = 4 # Factor by which to downsample the images
n_steps = 64 # Number of UDVS steps per cycle
n_bins = 77 # Number of frequency bins
w1 = 250E3 # Starting frequency
w2 = 325E3 # Ending frequency
FORC_cycles = 3
FORC_repeats = 2
n_cycles = 2
# Define the loop coefficients
a = 1
b = 4
# Path to the HDF5 file that will be created
h5_path = os.path.join(cwd, 'fake.h5')
```
%% Cell type:code id: tags:
``` python
# Generate the data using the input parameters
fdg = px.FakeDataGenerator(max_mem)
h5_path = fdg.translate(h5_path, n_steps, n_bins, w1, w2,
n_cycles=n_cycles, FORC_cycles=FORC_cycles,
FORC_repeats=FORC_repeats, loop_a=a, loop_b=b, data_type=data_type,
mode=mode, field_mode=field_mode, image_folder=image_folder,
bin_factor=binning_factor)
```
%% Cell type:code id: tags:
``` python
# Open the H5 file and find the Raw_Data dataset
hdf = px.ioHDF5(h5_path)
print('Working on:\n' + h5_path)
h5_main = px.hdf_utils.getDataSet(hdf.file, 'Raw_Data')[-1]
```
%% Cell type:markdown id: tags:
##### Inspect the contents of this h5 data file
The file contents are stored in a tree structure, just like files on a conventional computer.
The data is stored as a 2D matrix (position, spectroscopic value) regardless of the dimensionality of the data. Thus, the positions will be arranged as row0-col0, row0-col1.... row0-colN, row1-col0.... and the data for each position is stored as it was chronologically collected
The main dataset is always accompanied by four ancillary datasets that explain the position and spectroscopic value of any given element in the dataset.
%% Cell type:code id: tags:
``` python
print('Datasets and datagroups within the file:\n------------------------------------')
px.io.hdf_utils.print_tree(hdf.file)
print('\nThe main dataset:\n------------------------------------')
print(h5_main)
print('\nThe ancillary datasets:\n------------------------------------')
print(hdf.file['/Measurement_000/Channel_000/Position_Indices'])
print(hdf.file['/Measurement_000/Channel_000/Position_Values'])
print(hdf.file['/Measurement_000/Channel_000/Spectroscopic_Indices'])
print(hdf.file['/Measurement_000/Channel_000/Spectroscopic_Values'])
print('\nMetadata or attributes in a datagroup\n------------------------------------')
for key in hdf.file['/Measurement_000'].attrs:
print('{} : {}'.format(key, hdf.file['/Measurement_000'].attrs[key]))
```
%% Cell type:code id: tags:
``` python
h5_pos_inds = px.hdf_utils.getAuxData(h5_main, auxDataName='Position_Indices')[-1]
pos_sort = px.hdf_utils.get_sort_order(np.transpose(h5_pos_inds))
pos_dims = px.hdf_utils.get_dimensionality(np.transpose(h5_pos_inds), pos_sort)
pos_labels = np.array(h5_pos_inds.attrs['labels'])[pos_sort]
print(pos_labels, pos_dims)
parm_dict = hdf.file['/Measurement_000'].attrs
is_ckpfm = hdf.file.attrs['data_type'] == 'cKPFMData'
if is_ckpfm:
num_write_steps = parm_dict['VS_num_DC_write_steps']
num_read_steps = parm_dict['VS_num_read_steps']
num_fields = 2
```
%% Cell type:code id: tags:
``` python
px.be_viz_utils.jupyter_visualize_be_spectrograms(h5_main)
```
%%%% Output: display_data
%%%% Output: display_data
![]()
%% Cell type:code id: tags:
``` python
# Get the generated SHO guess and fit
h5_sho_gen_group = px.hdf_utils.findH5group(h5_main, 'SHO_Fit')
h5_sho_gen_guess = h5_sho_gen_group[-1]['Guess']
h5_sho_gen_fit = h5_sho_gen_group[-1]['Fit']
# Do the fit on the generated Raw_Data
sho_fitter = px.BESHOmodel(h5_main, parallel=True)
h5_sho_guess = sho_fitter.do_guess(strategy='complex_gaussian', processors=max_cores)
h5_sho_fit = sho_fitter.do_fit(processors=max_cores)
h5_sho_group = h5_sho_guess.parent
```
%% Cell type:code id: tags:
``` python
h5_sho_spec_inds = px.hdf_utils.getAuxData(h5_sho_fit, auxDataName='Spectroscopic_Indices')[0]
sho_spec_labels = px.io.hdf_utils.get_attr(h5_sho_spec_inds,'labels')
if is_ckpfm:
# It turns out that the read voltage index starts from 1 instead of 0
# Also the VDC indices are NOT repeating. They are just rising monotonically
write_volt_index = np.argwhere(sho_spec_labels == 'write_bias')[0][0]
read_volt_index = np.argwhere(sho_spec_labels == 'read_bias')[0][0]
h5_sho_spec_inds[read_volt_index, :] -= 1
h5_sho_spec_inds[write_volt_index, :] = np.tile(np.repeat(np.arange(num_write_steps), num_fields), num_read_steps)
(Nd_mat, success) = px.io.hdf_utils.reshape_to_Ndims(h5_sho_fit)
print('Reshape Success: ' + str(success))
sho_spec_sort_order = px.hdf_utils.get_sort_order(h5_sho_spec_inds)
sho_spec_labels_sorted = sho_spec_labels[sho_spec_sort_order]
nd_labels = np.hstack((pos_labels, sho_spec_labels_sorted))
print(nd_labels)
print(Nd_mat.shape)
```
%% Cell type:code id: tags:
``` python
px.be_viz_utils.jupyter_visualize_beps_sho(h5_sho_gen_fit, 'AC_Amplitude')
```
%%%% Output: error
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-10-30530fe69cf8> in <module>()
----> 1 px.be_viz_utils.jupyter_visualize_beps_sho(h5_sho_gen_fit, 'AC_Amplitude')
~/workspace/pycroscopy/pycroscopy/viz/be_viz_utils.py in jupyter_visualize_beps_sho(h5_sho_dset, step_chan, resp_func, resp_label)
238
239 # reshape to X, Y, step, all others
--> 240 spec_step_dim_ind = np.where(sho_spec_labels == step_chan)[0][0]
241 step_dim_ind = len(pos_dims) + spec_step_dim_ind
242
IndexError: index 0 is out of bounds for axis 0 with size 0
%% Cell type:code id: tags:
``` python
px.be_viz_utils.jupyter_visualize_beps_sho(h5_sho_fit, 'AC_Amplitude')
```
%% Cell type:code id: tags:
``` python
# Do the Loop Fitting on the SHO Fit dataset
loop_success = False
h5_loop_group = px.hdf_utils.findH5group(h5_sho_fit, 'Loop_Fit')
if len(h5_loop_group) == 0:
try:
loop_fitter = px.BELoopModel(h5_sho_fit, parallel=True)
print('No loop fits found. Fitting now....')
h5_loop_guess = loop_fitter.do_guess(processors=max_cores, max_mem=max_mem)
h5_loop_fit = loop_fitter.do_fit(processors=max_cores, max_mem=max_mem)
loop_success = True
except ValueError:
print('Loop fitting is applicable only to DC spectroscopy datasets!')
else:
loop_success = True
print('Taking previously computed loop fits')
h5_loop_guess = h5_loop_group[-1]['Guess']
h5_loop_fit = h5_loop_group[-1]['Fit']
```
%% Cell type:code id: tags:
``` python
# Prepare some variables for plotting loops fits and guesses
# Plot the Loop Guess and Fit Results
if loop_success:
h5_projected_loops = px.analysis.utils.be_loop.projectLoop()
h5_proj_spec_inds = px.hdf_utils.getAuxData(h5_projected_loops,
auxDataName='Spectroscopic_Indices')[-1]
h5_proj_spec_vals = px.hdf_utils.getAuxData(h5_projected_loops,
auxDataName='Spectroscopic_Values')[-1]
# reshape the vdc_vec into DC_step by Loop
sort_order = px.hdf_utils.get_sort_order(h5_proj_spec_inds)
dims = px.hdf_utils.get_dimensionality(h5_proj_spec_inds[()],
sort_order[::-1])
vdc_vec = np.reshape(h5_proj_spec_vals[h5_proj_spec_vals.attrs['DC_Offset']], dims).T
#Also reshape the projected loops to Positions-DC_Step-Loop
# Also reshape the projected loops to Positions-DC_Step-Loop
proj_nd, _ = px.hdf_utils.reshape_to_Ndims(h5_projected_loops)
proj_3d = np.reshape(proj_nd, [h5_projected_loops.shape[0],
proj_nd.shape[2], -1])
```
%% Cell type:code id: tags:
``` python
use_static_plots = False
if loop_success:
if not use_static_plots:
try:
px.be_viz_utils.jupyter_visualize_beps_loops(h5_projected_loops, h5_loop_guess, h5_loop_fit)
except:
print('There was a problem with the interactive visualizer')
use_static_plots = True
if use_static_plots:
for iloop in range(h5_loop_guess.shape[1]):
fig, ax = px.be_viz_utils.plot_loop_guess_fit(vdc_vec[:, iloop], proj_3d[:, :, iloop],
h5_loop_guess[:, iloop], h5_loop_fit[:, iloop],
title='Loop {} - All Positions'.format(iloop))
```
%% Cell type:code id: tags:
``` python
```
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -30,48 +30,12 @@
# Finally, pycroscopy itself
import pycroscopy as px
# set up notebook to show plots within the notebook
% matplotlib inline
```
%% Cell type:code id: tags:
``` python
ui_file_window = False
try:
from PyQt5 import QtWidgets
def uiGetFile(filter='H5 file (*.h5)', caption='Select File'):
"""
Presents a File dialog used for selecting the .mat file
and returns the absolute filepath of the selecte file\n
Parameters
----------
extension : String or list of strings
file extensions to look for
caption : (Optional) String
Title for the file browser window
Returns
-------
file_path : String
Absolute path of the chosen file
"""
app = QtWidgets.QApplication([])
path = QtWidgets.QFileDialog.getOpenFileName(caption=caption, filter=filter)[0]
app.exit()
del app
return str(path)
ui_file_window = True
except ImportError:
print('***********************************************************')
print('* *')
print('* You will need to specify the file path manually below *')
print('* *')
print('***********************************************************')
save_plots = False
```
%% Cell type:markdown id: tags:
## Make the data pycroscopy compatible
......@@ -90,17 +54,13 @@
You can select desired file type by choosing the second option in the pull down menu on the bottom right of the file window
%% Cell type:code id: tags:
``` python
if ui_file_window:
input_file_path = uiGetFile(caption='Select translated .h5 file or raw experiment data',
filter='Parameters for raw G-Line data (*.txt);; \
input_file_path = px.io_utils.uiGetFile(caption='Select translated .h5 file or raw experiment data',
file_filter='Parameters for raw G-Line data (*.txt);; \
Translated file (*.h5)')
else:
input_file_path = '/Volumes/IFgroup/SPM software development/Raw_Data/G_mode/GVS/2015_04_08_PZT_AuCu_nanocaps/GLine_8V_10kHz_256x256_0001/GLine_8V_10kHz_256x256.h5'
folder_path, _ = path.split(input_file_path)
if input_file_path.endswith('.txt'):
print('Translating raw data to h5. Please wait')
tran = px.GLineTranslator()
......@@ -118,17 +78,11 @@
%% Cell type:code id: tags:
``` python
hdf = px.ioHDF5(h5_path)
h5_main = px.hdf_utils.getDataSet(hdf.file, 'Raw_Data')[-1]
parms_dict = h5_main.parent.parent.attrs
samp_rate = parms_dict['IO_rate_[Hz]']
ex_freq = parms_dict['BE_center_frequency_[Hz]']
h5_spec_vals = px.hdf_utils.getAuxData(h5_main, auxDataName='Spectroscopic_Values')[0]
pixel_ex_wfm = h5_spec_vals[0, :int(h5_spec_vals.shape[1]/parms_dict['grid_num_cols'])]
```
%% Cell type:markdown id: tags:
##### Inspect the contents of this h5 data file
......@@ -158,65 +112,138 @@
print('{} : {}'.format(key, hdf.file['/Measurement_000'].attrs[key]))
```
%% Cell type:markdown id: tags:
## Extract necessary parameters:
%% Cell type:code id: tags:
``` python
parms_dict = h5_main.parent.parent.attrs
samp_rate = parms_dict['IO_rate_[Hz]']
ex_freq = parms_dict['BE_center_frequency_[Hz]']
pixel_ex_wfm = h5_spec_vals[0, :int(h5_spec_vals.shape[1]/parms_dict['grid_num_cols'])]
pts_per_pix = pixel_ex_wfm.size
pts_per_line = h5_main.shape[1]
```
%% Cell type:markdown id: tags:
## Inspect the raw data:
%% Cell type:code id: tags:
``` python
row_ind = 40
raw_row = h5_main[row_ind].reshape(-1, pixel_ex_wfm.size)
raw_row = h5_main[row_ind].reshape(-1, pts_per_pix)
fig, axes = px.plot_utils.plot_loops(pixel_ex_wfm, raw_row, x_label='Bias (V)', title='Raw Measurement',
plots_on_side=4, y_label='Deflection (a.u.)',
subtitles='Row: ' + str(row_ind) + ' Col:')
```
%% Cell type:markdown id: tags:
## Visualizing information in Fourier space
Visualizing in the fourier space provides information about the noise floor, frequencies which are noise dominant or signal dominant, etc.
This visualization will guide the design of signal filters to remove the noise
%% Cell type:code id: tags:
``` python
# Preparing the frequency axis:
w_vec = 1E-3*np.linspace(-0.5*samp_rate, 0.5*samp_rate - samp_rate/pts_per_line, pts_per_line)
row_ind = 40
F_resp = np.fft.fftshift(np.fft.fft(h5_main[row_ind]))
fig, ax = plt.subplots(figsize=(12, 7))
ax.axvline(x=1E-3*ex_freq, color='r', linewidth=2, label='Excitation')
ax.plot(w_vec[int(0.5*len(w_vec)):], np.log10(np.abs(F_resp[int(0.5*len(w_vec)):])), label='Response')
ax.set_xlabel('Frequency (kHz)', fontsize=16)
ax.set_ylabel('Amplitude (a.u.)', fontsize=16)
ax.legend(fontsize=14)
ax.set_xscale('log')
ax.set_xlim(ex_freq*1E-4, samp_rate*0.5E-3)
ax.set_title('Noise Spectrum for row ' + str(row_ind), fontsize=16)
px.plot_utils.set_tick_font_size(ax, 14)
if save_plots:
fig.savefig(os.path.join(other_figures_folder,
'noise_spectrum_line_' + str(row_ind) +'.png'),
format='png', dpi=150);
```
%% Cell type:markdown id: tags:
## Try different FFT filters on the data
Good combinations for frequency filters are:
* Just a HarmonicPassFilter
* LowPassFilter + NoiseBandFilter
It is always a good idea to combine these frequency filters with noise thresholding. Try setting noise tolerance values of 1E-6 to 1E-3/
%% Cell type:code id: tags:
``` python
filter_parms = dict()
filter_parms['noise_threshold'] = 1E-4
filter_parms['comb_[Hz]'] = [ex_freq, 1E+3, 10]
# filter_parms['LPF_cutOff_[Hz]'] = -1
# Noise frequencies - 15.6 kHz ~ 14-17.5, 7.8-8.8, 45-49.9 ~ 48.9414 kHz
# filter_parms['band_filt_[Hz]'] = None # [[8.3E+3, 15.6E+3, 48.9414E+3], [1E+3, 0.5E+3, 0.1E+3]]
# filter_parms['phase_[rad]'] = 0
filter_parms['samp_rate_[Hz]'] = samp_rate
filter_parms['num_pix'] = 1
hpf = px.processing.fft.HarmonicPassFilter(pts_per_line, samp_rate, ex_freq, 1E+3, 10)
lpf = px.processing.fft.LowPassFilter(pts_per_line, samp_rate, 110E+3)
nbf = px.processing.fft.NoiseBandFilter(pts_per_line, samp_rate, [0], [17E+3])
freq_filts = [hpf]
noise_tolerance = 1E-4
# Test filter on a single line:
row_ind = 40
filt_line, fig_filt, axes_filt = px.processing.gmode_utils.test_filter(h5_main[row_ind], filter_parms, samp_rate,
show_plots=True, use_rainbow_plots=False)
fig_filt.savefig(path.join(folder_path, 'FFT_filter_on_line_{}.png'.format(row_ind)), format='png', dpi=300)
filt_line, fig_filt, axes_filt = px.processing.gmode_utils.test_filter(h5_main[row_ind],
frequency_filters=freq_filts,
noise_threshold=noise_tolerance,
show_plots=True)
if save_plots:
fig_filt.savefig(path.join(folder_path, 'FFT_filter_on_line_{}.png'.format(row_ind)), format='png', dpi=300)
filt_row = filt_line.reshape(-1, pixel_ex_wfm.size)
# raw_row = h5_main[row_ind].reshape(-1, pts_per_pix)
fig, axes = px.plot_utils.plot_loops(pixel_ex_wfm, filt_row, x_label='Bias (V)', title='FFT Filtering',
plots_on_side=4, y_label='Deflection (a.u.)',
subtitles='Row: ' + str(row_ind) + ' Col:')
# fig.savefig(path.join(folder_path, 'FFT_filtered_loops_on_line_{}.png'.format(row_ind)), format='png', dpi=300)
if save_plots:
fig.savefig(path.join(folder_path, 'FFT_filtered_loops_on_line_{}.png'.format(row_ind)), format='png', dpi=300)
```
%% Cell type:markdown id: tags:
## Apply selected filter to entire dataset
%% Cell type:code id: tags:
``` python
# h5_filt_grp = px.hdf_utils.findH5group(h5_main, 'FFT_Filtering')[-1]
h5_filt_grp = px.processing.gmode_utils.fft_filter_dataset(h5_main, filter_parms, write_filtered=True)
filter_parms = dict()
if freq_filts is not None:
for filter in freq_filts:
filter_parms.update(filter.get_parms())
if noise_tolerance is not None:
filter_parms['noise_threshold'] = noise_tolerance
h5_filt_grp = px.hdf_utils.check_for_old(h5_main, 'FFT_Filtering', new_parms=filter_parms)
if h5_filt_grp is None:
sig_filt = px.processing.SignalFilter(h5_main, frequency_filters=freq_filts, noise_threshold=noise_tolerance,
write_filtered=True, write_condensed=False, num_pix=1, verbose=True)
h5_filt_grp = sig_filt.compute()
else:
print('Taking previously computed results')
h5_filt = h5_filt_grp['Filtered_Data']
```
%% Cell type:code id: tags:
``` python
# Test to make sure the filter gave the same results
filt_row = h5_filt[row_ind].reshape(-1, pixel_ex_wfm.size)
fig, axes = px.plot_utils.plot_loops(pixel_ex_wfm, filt_row, x_label='Bias (V)', title='FFT Filtering',
plots_on_side=4, y_label='Deflection (a.u.)',
subtitles='Row: ' + str(row_ind) + ' Col:')
......
This source diff could not be displayed because it is too large. You can view the blob instead.
%% Cell type:markdown id: tags:
# Loading, reshaping, visualizing data using pycroscopy
### Suhas Somnath, Chris R. Smith and Stephen Jesse
The Center for Nanophase Materials Science and The Institute for Functional Imaging for Materials <br>
Oak Ridge National Laboratory<br>
8/01/2017
Here, we will demonstrate how to load, reshape, and visualize multidimensional imaging datasets. For this example, we will load a three dimensional Band Excitation imaging dataset acquired from an atomic force microscope.
%% Cell type:code id: tags:
``` python
# Make sure pycroscopy and wget are installed
!pip install pycroscopy
!pip install -U wget
```
%% Cell type:code id: tags:
``` python
# Ensure python 3 compatibility
from __future__ import division, print_function, absolute_import
# Import necessary libraries:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from IPython.display import display
from os import remove
import pycroscopy as px
# set up notebook to show plots within the notebook
% matplotlib inline
```
%% Cell type:markdown id: tags:
## Load pycroscopy compatible file
For simplicity we will use a dataset that has already been transalated form its original data format into a pycroscopy compatible hierarchical data format (HDF5 or H5) file
#### HDF5 or H5 files:
* are like smart containers that can store matrices with data, folders to organize these datasets, images, metadata like experimental parameters, links or shortcuts to datasets, etc.
* are readily compatible with high-performance computing facilities
* scale very efficiently from few kilobytes to several terabytes
* can be read and modified using any language including Python, Matlab, C/C++, Java, Fortran, Igor Pro, etc.
Python uses the h5py libaray to read, write, and access HDF5 files
%% Cell type:code id: tags:
``` python
# Downloading the example file from the pycroscopy Github project
url = 'https://raw.githubusercontent.com/pycroscopy/pycroscopy/master/data/BELine_0004.h5'
h5_path = 'temp.h5'
_ = wget.download(url, h5_path)
print('Working on:\n' + h5_path)
# Open the file in read-only mode
h5_file = h5py.File(h5_path, mode='r')