Commit 6c4ece4e authored by syz's avatar syz
Browse files

Merge branch 'cades_dev' of https://github.com/pycroscopy/pycroscopy into cades_dev_local

parents 14ab6511 9d3fe2d9
......@@ -11,18 +11,16 @@ Documentation
* Include examples in documentation
* Links to references for all functions and methods used in our workflows.
Short tutorials on how to use pycroscopy
Fundamental tutorials on how to use pycroscopy
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Access h5 files
* Find a specific dataset/group in the file
* chunking the main dataset
* Links to tutorials on how to use pycharm, Git,
Longer examples (via specific scientific usecases)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* A tour of the many functions in hdf_utils and io_utils since these functions need data to show / explain them.
* A tour of the hdf_utils functions used for writing h5 files since these functions need data to show / explain them.
* chunking the main dataset
* A tour of the io_utils functions since these functions need data to show / explain them.
* A tour of plot_utils
* pycroscopy pacakge organization - a short writeup on what is where and differences between the process / analyis submodules
* How to write your own analysis class based on the (to-be simplified) Model class
* Links to tutorials on how to use pycharm, Git,
Rama's (older and more applied / specific) tutorial goals
~~~~~~~~~~~~~~~~~~~~
......
......@@ -411,7 +411,7 @@ sphinx_gallery_conf = dict(examples_dirs='../examples',
reference_url=dict(pycroscopy=None,
matplotlib='https://matplotlib.org',
numpy='https://docs.scipy.org/doc/numpy',
scipy='https://docs.scipy.org/doc/scipy',
scipy='https://docs.scipy.org/doc/scipy/reference',
h5py='http://docs.h5py.org/en/latest/'),
# directory where function granular galleries are stored
backreferences_dir='_autosummary/backreferences',
......
This diff is collapsed.
......@@ -15,7 +15,7 @@ Introduction
In pycroscopy, all position dimensions of a dataset are collapsed into the first dimension and all other
(spectroscopic) dimensions are collapsed to the second dimension to form a two dimensional matrix. The ancillary
matrices, namely the spectroscopic indices and values matrix as well as the position indicies and values matrices
matrices, namely the spectroscopic indices and values matrix as well as the position indices and values matrices
will be essential for reshaping the data back to its original N dimensional form and for slicing multidimensional
datasets
......@@ -54,8 +54,8 @@ import pycroscopy as px
# imaging datasets, a single spectra is acquired at each location in a two dimensional grid of spatial locations.
# Thus, BE imaging datasets have two position dimensions (X, Y) and one spectroscopic dimension (frequency - against
# which the spectra is recorded). The BEPS dataset used in this example has a spectra for each combination of
# three other paramaters (DC offset, Field, and Cycle). Thus, this dataset has three new spectral
# dimensions in addition to the spectra itself. Hence, this dataet becomes a 2+4 = 6 dimensional dataset
# three other parameters (DC offset, Field, and Cycle). Thus, this dataset has three new spectral
# dimensions in addition to the spectra itself. Hence, this dataset becomes a 2+4 = 6 dimensional dataset
# download the raw data file from Github:
h5_path = 'temp_3.h5'
......@@ -118,6 +118,8 @@ def myfun(pos_index, spec_index):
print(dim_name, ':', h5_pos_ind[pos_index, dim_ind])
for dim_ind, dim_name in enumerate(spec_labels):
print(dim_name, ':', h5_spec_ind[dim_ind, spec_index])
interact(myfun, pos_index=(0, h5_main.shape[0]-1, 1), spec_index=(0, h5_main.shape[1]-1, 1))
#########################################################################
......@@ -175,13 +177,14 @@ for dim_ind, axis, dim_label, dim_array in zip(range(h5_spec_ind.shape[0]), rhs_
def describe_dimensions(h5_aux):
for name, unit in zip(px.hdf_utils.get_attr(h5_aux, 'labels'),
px.hdf_utils.get_attr(h5_aux, 'units')):
px.hdf_utils.get_attr(h5_aux, 'units')):
print(name, '[', unit, ']')
print('Position dimension names and units:')
describe_dimensions(h5_pos_ind)
print('\nSpectrocopic dimension names and units:')
print('\nSpectroscopic dimension names and units:')
describe_dimensions(h5_spec_ind)
#########################################################################
......@@ -269,7 +272,7 @@ fig, axis = plt. subplots()
axis.imshow(np.abs(spectrogram3), origin='lower')
axis.set_xlabel('Frequency Index')
axis.set_ylabel('DC Offset Index')
axis.set_title('Spectrogram Amplitude');
axis.set_title('Spectrogram Amplitude')
#########################################################################
# Approach 2 - N-dimensional form
......@@ -283,16 +286,19 @@ print('Shape of the N-dimensional dataset:', ds_nd.shape)
print(labels)
#########################################################################
# Now that we have the data in its original N dimensional form, we can easily slice the dataset:
spectrogram2 = ds_nd[2, 3, :, :, 0, 1]
# Now the spectrogram is of order (frequency x DC_Offset).
spectrogram2 = spectrogram2.T
# Now the spectrogram is of order (DC_Offset x frequency)
fig, axis = plt. subplots()
axis.imshow(np.abs(spectrogram2), origin='lower')
axis.set_xlabel('Frequency Index')
axis.set_ylabel('DC Offset Index')
axis.set_title('Spectrogram Amplitude');
axis.set_title('Spectrogram Amplitude')
#########################################################################
# Approach 3 - slicing the 2D matrix
......@@ -301,10 +307,10 @@ axis.set_title('Spectrogram Amplitude');
# This approach is hands-on and requires that we be very careful with the indexing and slicing. Nonetheless,
# the process is actually fairly intuitive. We rely entirely upon the spectroscopic and position ancillary datasets
# to find the indices for slicing the dataset. Unlike the main dataset, the ancillary datasets are very small and
# can be stored easily in memory. Once the slicing indices are calculated, we __only read the desired portion of
# `main` data to memory__. Thus the amount of data loaded into memory is only the amount that we absolutely need.
# __This is the only approach that can be applied to slice very large datasets without ovwhelming memory overheads__.
# The comments for each line explain the entire process comprehensively
# can be stored easily in memory. Once the slicing indices are calculated, we *only read the desired portion of
# `main` data to memory*. Thus the amount of data loaded into memory is only the amount that we absolutely need.
# *This is the only approach that can be applied to slice very large datasets without overwhelming memory overheads*.
# The comments for each line explain the entire process comprehensively.
#
# Get only the spectroscopic dimension names:
......@@ -312,21 +318,22 @@ spec_dim_names = px.hdf_utils.get_attr(h5_spec_ind, 'labels')
# Find the row in the spectroscopic indices that corresponds to the dimensions we want to slice:
cycle_row_ind = np.where(spec_dim_names == 'Cycle')[0][0]
# Find the row correspoding to field in the same way:
# Find the row corresponding to field in the same way:
field_row_ind = np.where(spec_dim_names == 'Field')[0][0]
# Find all the spectral indices corresponding to the second cycle:
desired_cycle = h5_spec_ind[cycle_row_ind] == 1
# Do the same to find the spectral indicies for the first field:
# Do the same to find the spectral indices for the first field:
desired_field = h5_spec_ind[field_row_ind] == 0
# Now find the indices where the cycle = 1 and the field = 0 using a logical AND statement:
spec_slice = np.logical_and(desired_cycle, desired_field)
# We will use the same approach to find the position indices
# corresponding to the row index of 3 and colum index of 2:
pos_dim_names = px.hdf_utils.get_attr(h5_pos_ind,'labels')
# corresponding to the row index of 3 and column index of 2:
pos_dim_names = px.hdf_utils.get_attr(h5_pos_ind, 'labels')
x_col_ind = np.where(pos_dim_names == 'X')[0][0]
y_col_ind = np.where(pos_dim_names == 'Y')[0][0]
......@@ -352,7 +359,7 @@ print('Sliced data is of shape:', data_vec.shape)
# For this we need to find the size of the data in the DC_offset and Frequency dimensions:
dc_dim_ind = np.where(spec_dim_names == 'DC_Offset')[0][0]
# Find the row correspoding to field in the same way:
# Find the row corresponding to field in the same way:
freq_dim_ind = np.where(spec_dim_names == 'Frequency')[0][0]
dc_dim_size = spec_dim_sizes[dc_dim_ind]
......@@ -366,7 +373,7 @@ print('We need to reshape the vector by the tuple:', (dc_dim_size, freq_dim_size
# The dimensions in the ancillary datasets may or may not be arranged from fastest to slowest even though that is
# part of the requirements. We can still account for this. In the event that we don't know the order in which to
# reshape the data vector because we don't know which dimension varies faster than the other(s), we would need to
# sort the dimensions by how fast their indices change. Fortuantely, pycroscopy has a function called `px.hdf_utils.
# sort the dimensions by how fast their indices change. Fortunately, pycroscopy has a function called `px.hdf_utils.
# get_sort_order` that does just this. Knowing the sort order, we can easily reshape correctly in an automated manner.
# We will do this below
......@@ -376,7 +383,7 @@ print('Spectroscopic dimensions arranged as is:\n',
spec_dim_names)
print('Dimension indices arranged from fastest to slowest:',
spec_sort_order)
print('Dimension namess now arranged from fastest to slowest:\n',
print('Dimension names now arranged from fastest to slowest:\n',
spec_dim_names[spec_sort_order])
if spec_sort_order[dc_dim_ind] > spec_sort_order[freq_dim_ind]:
......
......@@ -20,10 +20,11 @@
# Import necessary libraries:
# General utilities:
import sys
import os
import shutil
# Computation:
import numpy as np
import h5py
......@@ -57,12 +58,12 @@
This notebook performs some functional fitting whose duration can be substantially decreased by using more memory and CPU cores. We have provided default values below but you may choose to change them if necessary.
%% Cell type:code id: tags:
``` python
max_mem = 1024*8 # Maximum memory to use, in Mbs. Default = 1024
max_cores = None # Number of logical cores to use in fitting. None uses all but 2 available cores.
max_mem = 1024*2 # Maximum memory to use, in Mbs. Default = 1024
max_cores = 2 # Number of logical cores to use in fitting. None uses all but 2 available cores.
```
%% Cell type:markdown id: tags:
## Make the data pycroscopy compatible
......@@ -85,11 +86,19 @@
``` python
input_file_path = px.io_utils.uiGetFile(caption='Select translated .h5 file or raw experiment data',
file_filter='Parameters for raw BE data (*.txt *.mat *xls *.xlsx);; \
Translated file (*.h5)')
(data_dir, data_name) = os.path.split(input_file_path)
(data_dir, filename) = os.path.split(input_file_path)
if copy_input_file:
_, ext = os.path.splitext(filename)
temp_path = os.path.join(data_dir, 'temp_file'+ext)
if os.path.exists(temp_path):
os.remove(temp_path)
shutil.copy2(input_file_path, temp_path)
input_file_path = temp_path
if input_file_path.endswith('.h5'):
# No translation here
h5_path = input_file_path
force = False # Set this to true to force patching of the datafile.
......@@ -367,12 +376,13 @@
``` python
map_parm = 'Work of Switching'
plot_cycle = 0
plot_position = (int(pos_dims[0]/2), int(pos_dims[1]/2))
plot_bias_step = 0
h5_main.pos_dim_sizes
px.viz.be_viz_utils.plot_loop_sho_raw_comparison(h5_loop_parameters, map_parm, plot_cycle, plot_position, plot_bias_step)
# display(px.viz.plot_utils.save_fig_filebox_button(fig, 'plot.png'))
```
%% Cell type:markdown id: tags:
## Save and close
......
......@@ -11,10 +11,11 @@
## Configure the notebook first
%% Cell type:code id: tags:
``` python
!pip install -U numpy scipy skimage h5py matplotlib Ipython ipywidgets pycroscopy
# set up notebook to show plots within the notebook
% matplotlib notebook
# Import necessary libraries:
# General utilities:
......@@ -57,12 +58,11 @@
## Load the image that will be cleaned:
%% Cell type:code id: tags:
``` python
# image_path = px.io.uiGetFile('*.png *PNG *TIFF * TIF *tif *tiff *BMP *bmp','Images')
image_path = '/home/challtdow/workspace/pycroscopy_data/Image Windowing/16_55_31_256_256b/16_55_31_256_256b.tif'
image_path = px.io.uiGetFile('*.png *PNG *TIFF * TIF *tif *tiff *BMP *bmp','Images')
print('Working on: \n{}'.format(image_path))
folder_path, file_name = os.path.split(image_path)
base_name, _ = os.path.splitext(file_name)
......@@ -202,11 +202,11 @@
col_offset = int(0.5*(num_y-win_size))
plt.figure()
plt.imshow(raw_image_mat[row_offset:row_offset+win_size,
col_offset:col_offset+win_size],
cmap=px.plot_utils.cmap_jet_white_center(),
origin='lower');
origin='lower');
# the result should be about the size of a unit cell
# if it is the wrong size, just choose on manually by setting the win_size
plt.show()
```
......@@ -615,10 +615,16 @@
fig.axes[0].set_xlabel('Cluster number', fontsize=20)
fig.axes[0].set_ylabel('Cluster separation', fontsize=20)
px.plot_utils.set_tick_font_size(fig.axes[0], 12)
```
%% Cell type:code id: tags:
``` python
vert_line.get_xdata()
```
%% Cell type:markdown id: tags:
## Identifiying the principal patterns
Here, we will interactively identify N windows, each centered on a distinct class / kind of atom.
......@@ -628,57 +634,90 @@
``` python
motif_win_size = win_size
half_wind = int(motif_win_size*0.5)
current_center = [int(0.5*cropped_clean_image.shape[0]), int(0.5*cropped_clean_image.shape[1])]
row, col = [int(0.5*cropped_clean_image.shape[0]), int(0.5*cropped_clean_image.shape[1])]
fig, axes = plt.subplots(ncols=2, figsize=(14,7))
axes[0].hold(True)
axes[0].imshow(cropped_clean_image,cmap=px.plot_utils.cmap_jet_white_center(), origin='lower')
clean_img = axes[0].imshow(cropped_clean_image,cmap=px.plot_utils.cmap_jet_white_center(), origin='lower')
axes[0].set_title('Cleaned Image', fontsize=16)
axes[1].set_title('Zoomed area', fontsize=16)
vert_line = axes[0].axvline(x=current_center[1], color='k')
hor_line = axes[0].axhline(y=current_center[0], color='k')
motif_box = axes[0].add_patch(patches.Rectangle((current_center[1] - half_wind, current_center[0] - half_wind),
vert_line = axes[0].axvline(x=col, color='k')
hor_line = axes[0].axhline(y=row, color='k')
motif_box = axes[0].add_patch(patches.Rectangle((col - half_wind, row - half_wind),
motif_win_size, motif_win_size, fill=False,
color='black', linewidth=2))
add_motif_button = widgets.Button(description="Set as motif")
display(add_motif_button)
indices = (slice(row - half_wind, row + half_wind),
slice(col - half_wind, col + half_wind))
motif_img = axes[1].imshow(cropped_clean_image[indices],cmap=px.plot_utils.cmap_jet_white_center(),
vmax=np.max(cropped_clean_image), vmin=np.min(cropped_clean_image), origin='lower')
axes[1].axvline(x=half_wind, color='k')
axes[1].axhline(y=half_wind, color='k')
def move_zoom_box(coarse_row, coarse_col, fine_row, fine_col):
row = coarse_row + fine_row
col = coarse_col + fine_col
vert_line.set_xdata((col, col))
hor_line.set_ydata((row, row))
plt.show()
def _update_motif_img(row, col):
current_center[0] = row
current_center[1] = col
indices = (slice(row - half_wind, row + half_wind),
slice(col - half_wind, col + half_wind))
motif_box.set_x(col - half_wind)
motif_box.set_y(row - half_wind)
axes[1].imshow(cropped_clean_image[indices],cmap=px.plot_utils.cmap_jet_white_center(),
vmax=np.max(cropped_clean_image), vmin=np.min(cropped_clean_image), origin='lower')
axes[1].axvline(x=half_wind, color='k')
axes[1].axhline(y=half_wind, color='k')
motif_img.set_data(cropped_clean_image[indices])
def move_zoom_box(event):
if not clean_img.axes.in_axes(event):
return
col = int(round(event.xdata))
row = int(round(event.ydata))
vert_line.set_xdata((col, col))
hor_line.set_ydata((row, row))
_update_motif_img(row, col)
fig.canvas.draw()
def _motif_fine_select(event):
if not motif_img.axes.in_axes(event):
return
col_shift = int(round(event.xdata)) - half_wind
row_shift = int(round(event.ydata)) - half_wind
col = vert_line.get_xdata()[0] + col_shift
row = hor_line.get_ydata()[0] + row_shift
vert_line.set_xdata((col, col))
hor_line.set_ydata((row, row))
_update_motif_img(row, col)
fig.canvas.draw()
motif_win_centers = list()
add_motif_button = widgets.Button(description="Set as motif")
display(add_motif_button)
def add_motif(butt):
row = hor_line.get_ydata()[0]
col = vert_line.get_xdata()[0]
#print("Setting motif with coordinates ({}, {})".format(current_center[0], current_center[1]))
axes[0].add_patch(patches.Rectangle((current_center[1] - int(0.5*motif_win_size),
current_center[0] - int(0.5*motif_win_size)),
motif_win_size, motif_win_size, fill=False,
axes[0].add_patch(patches.Rectangle((col - int(0.5*motif_win_size),
row - int(0.5*motif_win_size)),
motif_win_size, motif_win_size, fill=False,
color='black', linewidth=2))
motif_win_centers.append((current_center[0], current_center[1]))
# print motif_win_centers
widgets.interact(move_zoom_box, coarse_row=(motif_win_size, cropped_clean_image.shape[0] - motif_win_size, 1),
coarse_col=(motif_win_size, cropped_clean_image.shape[1] - motif_win_size, 1),
fine_row=(-half_wind,half_wind,1), fine_col=(-half_wind,half_wind,1));
cid = clean_img.figure.canvas.mpl_connect('button_press_event', move_zoom_box)
cid2 = motif_img.figure.canvas.mpl_connect('button_press_event', _motif_fine_select)
add_motif_button.on_click(add_motif)
```
%% Cell type:markdown id: tags:
......@@ -806,12 +845,12 @@
thresholded_maps = list()
motif_imgs = list()
base_color_map = plt.cm.jet
fig, axis = plt.subplots(figsize=(10, 10))
axis.hold(True)
axis.imshow(double_cropped_image, cmap="gray")
plt.hold(True);
plt.imshow(double_cropped_image, cmap="gray")
handles = list()
if num_motifs > 1:
motif_colors = [base_color_map(int(255 * motif_ind / (num_motifs - 1))) for motif_ind in range(num_motifs)]
else:
motif_colors = [base_color_map(0)]
......@@ -821,21 +860,21 @@
my_cmap = px.plot_utils.make_linear_alpha_cmap('fdfd', current_solid_color, 1, max_alpha=0.5)
bin_map = np.where(match_mat > t_hold,
np.ones(shape=match_mat.shape, dtype=np.uint8),
np.zeros(shape=match_mat.shape, dtype=np.uint8))
thresholded_maps.append(bin_map)
motif_imgs.append(axis.imshow(bin_map, interpolation='none', cmap=my_cmap))
motif_imgs.append(plt.imshow(bin_map, interpolation='none', cmap=my_cmap))
current_solid_color = list(current_solid_color)
current_solid_color[3] = 0.5
handles.append(patches.Patch(color=current_solid_color,label='Motif {}'.format(motif_ind)))
axis.set_xticklabels([])
axis.set_yticklabels([])
axis.get_xaxis().set_visible(False)
axis.get_yaxis().set_visible(False)
plt.legend(handles=handles, bbox_to_anchor=(1.01, 1), loc=2, borderaxespad=0.)
plt.hold(False)
plt.hold(False);
def threshold_images(thresholds):
# thresholded_maps = list()
# empty the thresholded maps:
del thresholded_maps[:]
......
This diff is collapsed.
import unittest
import tempfile
import os
import numpy as np
import nanonispy as nap
class TestNanonisFileBaseClass(unittest.TestCase):
"""
Testing class for NanonisFile base class.
"""
def setUp(self):
self.temp_dir = tempfile.TemporaryDirectory()
def tearDown(self):
self.temp_dir.cleanup()
def test_is_instance_nanonis_file(self):
"""
Check for correct instance of NanonisFile object.
"""
f = tempfile.NamedTemporaryFile(mode='wb',
suffix='.3ds',
dir=self.temp_dir.name,
delete=False)
f.write(b':HEADER_END:')
f.close()
NF = nap.read.NanonisFile(f.name)
self.assertIsInstance(NF, nap.read.NanonisFile)
def test_unsupported_filetype(self):
"""
Handle unsupported file gracefully.
"""
with self.assertRaises(nap.read.UnhandledFileError):
f = tempfile.NamedTemporaryFile(mode='wb',
suffix='.txt',
dir=self.temp_dir.name,
delete=False)
f.close()
NF = nap.read.NanonisFile(f.name)
def test_3ds_suffix_parsed(self):
"""
3ds file recognized.
"""
f = tempfile.NamedTemporaryFile(mode='wb',
suffix='.3ds',
dir=self.temp_dir.name,
delete=False)
f.write(b':HEADER_END:')
f.close()
NF = nap.read.NanonisFile(f.name)
self.assertEqual(NF.filetype, 'grid')
def test_sxm_suffix_parsed(self):
"""
Sxm file recognized.
"""
f = tempfile.NamedTemporaryFile(mode='wb',
suffix='.sxm',
dir=self.temp_dir.name,
delete=False)
f.write(b'SCANIT_END')
f.close()
NF = nap.read.NanonisFile(f.name)
self.assertEqual(NF.filetype, 'scan')
def test_dat_suffix_parsed(self):
"""
Dat file recognized.
"""
f = tempfile.NamedTemporaryFile(mode='wb',
suffix='.dat',
dir=self.temp_dir.name,
delete=False)
f.write(b'[DATA]')
f.close()
NF = nap.read.NanonisFile(f.name)
self.assertEqual(NF.filetype, 'spec')
def test_find_start_byte(self):
f = tempfile.NamedTemporaryFile(mode='wb',
suffix='.3ds',
dir=self.temp_dir.name,
delete=False)
f.write(b'header_entry\n:HEADER_END:\n')
f.close()
NF = nap.read.NanonisFile(f.name)
byte_offset = NF.start_byte()
self.assertEqual(byte_offset, 26)
def test_no_header_tag_found(self):
with self.assertRaises(nap.read.FileHeaderNotFoundError):
f = tempfile.NamedTemporaryFile(mode='wb',
suffix='.3ds',
dir=self.temp_dir.name,
delete=False)
f.close()
NF = nap.read.NanonisFile(f.name)
def test_header_raw_is_str(self):
f = tempfile.NamedTemporaryFile(mode='wb',
suffix='.3ds',
dir=self.temp_dir.name,
delete=False)
f.write(b'header_entry\n:HEADER_END:\n')
f.close()
NF = nap.read.NanonisFile(f.name)
self.assertIsInstance(NF.header_raw, str)
class TestGridFile(unittest.TestCase):
def setUp(self):
self.temp_dir = tempfile.TemporaryDirectory()
def tearDown(self):
self.temp_dir.cleanup()
def create_dummy_grid_data(self, suffix='3ds'):
"""
return tempfile file object with dummy header info
"""
f = tempfile.NamedTemporaryFile(mode='wb',
suffix=suffix,
dir=self.temp_dir.name,
delete=False)
f.write(b'Grid dim="230 x 230"\r\nGrid settings=4.026839E-8;-4.295725E-8;1.500000E-7;1.500000E-7;0.000000E+0\r\nSweep Signal="Bias (V)"\r\nFixed parameters="Sweep Start;Sweep End"\r\nExperiment parameters="X (m);Y (m);Z (m);Z offset (m);Settling time (s);Integration time (s);Z-Ctrl hold;Fina