Commit 40e3b999 authored by Unknown's avatar Unknown
Browse files

Merge remote-tracking branch 'origin/cades_dev'

parents 757fdebe 5f9d8206
......@@ -49,15 +49,26 @@ Core development
1. Check if the same process has been performed with the same paramters. When initializing the process, throw an exception. This is better than checking in the notebook stage.
2. (Gracefully) Abort and resume processing.
* consolidate _get_component_slice used in Cluster with duplicate in svd_utils
* Legacy processes **MUST** extend Process:
* Image Windowing
* Image Cleaning
* sklearn wrapper classes:
* Cluter
* Decomposition
* The computation will continue to be performed by sklearn. No need to use parallel_compute() or resume computation.
* Own classes:
* Image Windowing
* Image Cleaning
* As time permits, ensure that these can resume processing
* All these MUST implement the check for previous computations at the very least
* As time permits, ensure that these can resume processing
* Absorb functionality from Process into Model
* Bayesian GIV should actually be an analysis <-- depends on above
* Reogranize processing and analysis - promote / demote classes etc.
* multi-node computing capability in parallel_compute
* Demystify analyis / optimize. Use parallel_compute instead of optimize and guess_methods and fit_methods
* Consistency in the naming of and placement of attributes (chan or meas group) in all translators - Some put attributes in the measurement level, some in the channel level! hyperspy appears to create datagroups solely for the purpose of organizing metadata in a tree structure!
* Consider developing a generic curve fitting class a la `hyperspy <http://nbviewer.jupyter.org/github/hyperspy/hyperspy-demos/blob/master/Fitting_tutorial.ipynb>`_
......
......@@ -181,8 +181,8 @@ px.plot_utils.plot_map_stack(abun_maps, num_comps=9, heading='SVD Abundance Maps
num_clusters = 4
estimators = px.Cluster(h5_main, 'KMeans', n_clusters=num_clusters)
h5_kmeans_grp = estimators.do_cluster(h5_main)
estimators = px.Cluster(h5_main, KMeans(n_clusters=num_clusters))
h5_kmeans_grp = estimators.compute(h5_main)
h5_kmeans_labels = h5_kmeans_grp['Labels']
h5_kmeans_mean_resp = h5_kmeans_grp['Mean_Response']
......
......@@ -181,8 +181,8 @@ px.plot_utils.plot_map_stack(abun_maps, num_comps=9, heading='SVD Abundance Maps
num_clusters = 4
estimators = px.Cluster(h5_main, 'KMeans', n_clusters=num_clusters)
h5_kmeans_grp = estimators.do_cluster(h5_main)
estimators = px.Cluster(h5_main, KMeans(n_clusters=num_clusters))
h5_kmeans_grp = estimators.compute(h5_main)
h5_kmeans_labels = h5_kmeans_grp['Labels']
h5_kmeans_mean_resp = h5_kmeans_grp['Mean_Response']
......
......@@ -28,10 +28,11 @@
import numpy as np
import h5py
from skimage import measure
from scipy.cluster.hierarchy import linkage, dendrogram
from scipy.spatial.distance import pdist
from sklearn.cluster import KMeans
# Visualization:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from mpl_toolkits.axes_grid1 import make_axes_locatable
......@@ -169,14 +170,11 @@
%% Cell type:code id: tags:
``` python
fig, axis = plt.subplots(figsize=(10,10))
img = axis.imshow(raw_image_mat,cmap=px.plot_utils.cmap_jet_white_center(), origin='lower');
divider = make_axes_locatable(axis)
cax = divider.append_axes("right", size="5%", pad=0.2)
plt.colorbar(img, cax=cax)
px.plot_utils.plot_map(axis, raw_image_mat, cmap=px.plot_utils.cmap_jet_white_center())
axis.set_title('Raw Image', fontsize=16);
```
%% Cell type:markdown id: tags:
......@@ -198,19 +196,17 @@
# win_size = 8
# plot a single window
row_offset = int(0.5*(num_x-win_size))
col_offset = int(0.5*(num_y-win_size))
plt.figure()
plt.imshow(raw_image_mat[row_offset:row_offset+win_size,
col_offset:col_offset+win_size],
cmap=px.plot_utils.cmap_jet_white_center(),
origin='lower');
fig, axis = plt.subplots(figsize=(5, 5))
px.plot_utils.plot_map(axis, raw_image_mat[row_offset:row_offset+win_size,
col_offset:col_offset+win_size],
cmap=px.plot_utils.cmap_jet_white_center())
# the result should be about the size of a unit cell
# if it is the wrong size, just choose on manually by setting the win_size
plt.show()
axis.set_title('Example window', fontsize=18);
```
%% Cell type:markdown id: tags:
## Now break the image into a sequence of small windows
......@@ -260,11 +256,11 @@
for rand_ind, rand_pos in enumerate(rand_positions):
example_wins[:, :, rand_ind] = np.reshape(h5_wins[rand_pos], (windowing_parms['win_x'], windowing_parms['win_y']))
px.plot_utils.plot_map_stack(example_wins, heading='Example Windows', cmap=px.plot_utils.cmap_jet_white_center(),
title=['Window # ' + str(win_pos) for win_pos in rand_positions]);
title=['Window # ' + str(win_pos) for win_pos in rand_positions], fig_title_yoffset=0.93);
```
%% Cell type:markdown id: tags:
## Performing Singular Value Decompostion (SVD) on the windowed data
......@@ -278,16 +274,18 @@
# check to make sure number of components is correct:
num_comp = 1024
num_comp = min(num_comp,
min(h5_wins.shape)*len(h5_wins.dtype))
h5_svd = px.hdf_utils.check_for_old(h5_wins, 'SVD', {'num_components':num_comp})
if h5_svd is None:
print('SVD was either not performed or was performed with different parameters')
h5_svd = px.processing.doSVD(h5_wins, num_comps=num_comp)
proc = px.processing.SVD(h5_wins, num_components=num_comp)
if proc.duplicate_h5_groups is None:
print('SVD not performed with these parameters')
h5_svd = proc.compute()
else:
print('Taking existing SVD results')
print('Taking existing results!')
h5_svd = proc.duplicate_h5_groups
h5_U = h5_svd['U']
h5_S = h5_svd['S']
h5_V = h5_svd['V']
......@@ -318,11 +316,11 @@
Note also that the plot below is a log-log plot. The importance of each subsequent component drops exponentially.
%% Cell type:code id: tags:
``` python
fig_S, ax_S = px.plot_utils.plotScree(h5_S[()]);
fig_S, ax_S = px.plot_utils.plot_scree(h5_S[()]);
```
%% Cell type:markdown id: tags:
#### V (Eigenvectors or end-members)
......@@ -490,20 +488,15 @@
## Check the cleaned image now:
%% Cell type:code id: tags:
``` python
num_comps = 12
num_comps = 24
fig, axis = plt.subplots(figsize=(7, 7))
clean_image_mat = image_components[:, :, num_comps]
img_clean = axis.imshow(clean_image_mat, cmap=px.plot_utils.cmap_jet_white_center(), origin='lower')
mean_val = np.mean(clean_image_mat)
std_val = np.std(clean_image_mat)
img_clean.set_clim(vmin=mean_val-img_stdevs*std_val, vmax=mean_val+img_stdevs*std_val)
axis.get_yaxis().set_visible(False)
axis.get_xaxis().set_visible(False)
_ = px.plot_utils.plot_map(axis, clean_image_mat, cmap=px.plot_utils.cmap_jet_white_center())
axis.set_title('Cleaned Image', fontsize=16);
```
%% Cell type:markdown id: tags:
......@@ -516,61 +509,21 @@
We want a large enough number of clusters so that K-means identifies fine nuances in the data. At the same time, we want to minimize computational time by reducing the number of clusters. We recommend 32 - 64 clusters.
%% Cell type:code id: tags:
``` python
clean_components = 32
num_clusters = 32
num_clusters = 4
estimator = px.Cluster(h5_U, KMeans(n_clusters=num_clusters), num_comps=num_comps)
# Check for existing Clustering results
estimator = px.Cluster(h5_U, 'KMeans', num_comps=clean_components, n_clusters=num_clusters)
do_cluster = False
# See if there are existing cluster results
try:
h5_kmeans = h5_svd['U-Cluster_000']
print( 'Clustering results loaded. Will now check parameters')
except Exception:
print( 'Could not load Clustering results.')
do_cluster = True
# Check that the same components are used
if not do_cluster:
new_clean = estimator.data_slice[1]
if isinstance(new_clean, np.ndarray):
new_clean = new_clean.tolist()
else:
# print(new_clean)
if new_clean.step is None:
new_clean = range(new_clean.start, new_clean.stop)
else:
new_clean = range(new_clean.start, new_clean.stop, new_clean.step)
if np.array_equal(h5_kmeans.attrs['components_used'], new_clean):
print( 'Clustering results used the same components as those requested.')
else:
do_cluster = True
print( 'Clustering results used the different components from those requested.')
# Check that the same number of clusters were used
if not do_cluster:
old_clusters = len(np.unique(h5_kmeans['Cluster_Indices']))
if old_clusters==num_clusters:
print( 'Clustering results used the same number of clusters as requested.')
else:
do_cluster = True
print( 'Clustering results used a different number of clusters from those requested.')
# Perform k-means clustering on the U matrix now using the list of components only if needed:
if do_cluster:
if estimator.duplicate_h5_groups is None:
t0 = time()
h5_kmeans = estimator.do_cluster()
print( 'kMeans took {} seconds.'.format(round(time()-t0, 2)))
h5_kmeans = estimator.compute()
print('kMeans took {} seconds.'.format(round(time()-t0, 2)))
else:
print( 'Using existing results.')
h5_kmeans = estimator.duplicate_h5_groups[-1]
print( 'Using existing results.')
print( 'Clustering results in {}.'.format(h5_kmeans.name))
half_wind = int(win_size*0.5)
# generate a cropped image that was effectively the area that was used for pattern searching
# Need to get the math righ on the counting
......
......@@ -25,10 +25,12 @@
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from IPython.display import display, HTML
import ipywidgets as widgets
from sklearn.cluster import KMeans
import pycroscopy as px
# set up notebook to show plots within the notebook
% matplotlib notebook
......@@ -167,18 +169,19 @@
Here, SVD essentially compares every single ronchigram with every other ronchigram to find statistically significant trends in the dataset. Such correlation would be infeasible if the ronchigrams were averaged to bright-field and dark-field scalar values.
%% Cell type:code id: tags:
``` python
proc = px.SVD(h5_main, num_comps=256)
# First check if SVD was already computed on this dataset:
h5_svd_group = px.hdf_utils.findH5group(h5_main, 'SVD')
if len(h5_svd_group) == 0:
if proc.duplicate_h5_groups is None:
print('No prior SVD results found - doing SVD now')
h5_svd_group = px.doSVD(h5_main, num_comps=256)
h5_svd_group = proc.compute()
else:
print('Taking previous SVD results already present in file')
h5_svd_group = h5_svd_group[-1]
h5_svd_group = proc.duplicate_h5_groups[-1]
h5_u = h5_svd_group['U']
h5_v = h5_svd_group['V']
h5_s = h5_svd_group['S']
num_comps = 16
......@@ -241,20 +244,22 @@
%% Cell type:code id: tags:
``` python
# Attempt to find any previous computation
h5_kmeans_group = px.hdf_utils.findH5group(h5_u, 'Cluster')
if len(h5_kmeans_group) == 0:
num_clusters = 32
spectral_components = 128
estimator = KMeans(n_clusters=num_clusters)
proc = px.Cluster(h5_u, estimator, num_comps=spectral_components)
if proc.duplicate_h5_groups is None:
print('No k-Means computation found. Doing K-Means now')
num_clusters = 32
num_comps_for_clustering = 128
estimator = px.Cluster(h5_u, 'KMeans', num_comps=num_comps_for_clustering, n_clusters=num_clusters)
h5_kmeans_group = estimator.do_cluster()
h5_kmeans_group = proc.compute()
else:
print('Taking existing results of previous K-Means computation')
h5_kmeans_group = h5_kmeans_group[-1]
h5_kmeans_group = proc.duplicate_h5_groups[-1]
h5_labels = h5_kmeans_group['Labels']
h5_centroids = h5_kmeans_group['Mean_Response']
# In case we take existing results, we need to get these parameters
......@@ -281,11 +286,11 @@
The vertical length of the branches indicates the relative separation between neighboring clusters.
%% Cell type:code id: tags:
``` python
e_vals = np.reshape(h5_u[:, :num_comps_for_clustering],
e_vals = np.reshape(h5_u[:, :spectral_components],
(num_rows, num_cols, -1))
fig = px.plot_utils.plot_cluster_dendrogram(label_mat, e_vals,
num_comps_for_clustering,
num_clusters,
last=num_clusters);
......
......@@ -582,16 +582,10 @@
#fig.tight_layout()
"""fig.savefig(os.path.join(figure_folder, 'capacitance_map.png'), format='png', dpi=300)
fig.savefig(os.path.join(figure_folder, 'capacitance_map.pdf'), format='pdf',bbox_inches = 'tight', pad_inches = 2.0)"""
```
%% Cell type:code id: tags:
``` python
cap_vec
```
%% Cell type:markdown id: tags:
## Extracting Switching Current
* The forward switching current is calculated as the difference between the forward and reverse currents for positive biases.
* The reverse switching current is calculated as the difference between the reverse and the forward currents for negative biases
......@@ -952,17 +946,17 @@
P is half the full charge (-Ps to +Ps = 2Ps)
%% Cell type:code id: tags:
``` python
cap_diameter = 500E-9 #Diameter of cap
cap_diameter = 500E-9 # Diameter of cap in m
cap_area = np.pi*(cap_diameter/2)**2
P_forward = 0.5*(Q_integrated[:,:,0]/(cap_area)) # Polarization in C/m^2,
P_forward = P_forward*1E6*1E-4 #Polarization in uC (*1E6) /cm^2 (*1E-4)
P_forward = P_forward*1E6*1E-4 # Polarization in uC (*1E6) /cm^2 (*1E-4)
P_reverse = 0.5*(Q_integrated[:,:,1]/(cap_area)) #Polarization in C/m^2
P_reverse = P_reverse*1E6*1E-4 #Polarization in uC (*1E6) /cm^2 (*1E-4)
P_reverse = P_reverse*1E6*1E-4 # Polarization in uC (*1E6) /cm^2 (*1E-4)
```
%% Cell type:markdown id: tags:
### Now visualize
......@@ -1094,49 +1088,49 @@
Then fit to a Gaussian, with the constraint that the area equals that of the switched fraction
%% Cell type:code id: tags:
``` python
#Define some functions
# Define some functions
def gauss1(x,*p1):
A1,x1,sigma1= p1
return (A1*np.exp(-((x-x1)**2)/(2*sigma1*sigma1)))
# here you include the penalization factor
def residuals(p,x,y):
#Calcualte the area given the whole xvector
# Calcualte the area given the whole xvector
fitted_gaussian = gauss1(xvec, *p) #Gaussian, for whole x
integral = np.trapz(fitted_gaussian,xvec) #Area, for whole x
penalization = abs(integral - area_of_Rayleigh)*1E4 #c.f. with area of Rayleigh
return y - gauss1(x, *p) - penalization
coef_disorder_fit = np.zeros(shape=(P_switching_fwd.shape[0],3))
middle_ind_mat= np.zeros(shape=(P_switching_fwd.shape[0],1))
#Fitting guesses
# Fitting guesses
p0 = [1, 4E-4, 1E-4]
lb = [0,0,0]
ub=[1,1,1]
bounds = (lb,ub)
xvec = np.linspace(0,tstep*time_ratio_fwd, len(xvec_Pr_fwd[:])) #xvec
for pix_ind, yvec in enumerate(P_switching_fwd):
#If it's a switching event
if pix_ind in switching_points[0]:
#Calculate the area
#del area_of_Rayleigh
# Calculate the area
# del area_of_Rayleigh
global area_of_Rayleigh
area_of_Rayleigh = np.trapz(yvec, xvec)
cum_area = scipy.integrate.cumtrapz(yvec,xvec)
#Take the point where the area is half of the total as the cutoff point
# Take the point where the area is half of the total as the cutoff point
middle_index = np.argsort((cum_area - 0.5*area_of_Rayleigh)**2)[0]
#Smaller x,yvecs
# Smaller x,yvecs
new_x, new_y = xvec[middle_index:],yvec[middle_index:]
#Apply the area constraint
# Apply the area constraint
if len(new_x)>4:
popt2, pcov2 = scipy.optimize.leastsq(func=residuals, x0=p0, args=(new_x,new_y))
y_fit2 = gauss1(xvec, *popt2)
coef_disorder_fit[pix_ind,:] = popt2
middle_ind_mat[pix_ind,:] = middle_index
......@@ -1149,11 +1143,11 @@
At a specific location
%% Cell type:code id: tags:
``` python
#row, col = 115, 51
# row, col = 115, 51
row, col = 110, 134
pix_ind = row * num_cols + col
xvec = np.linspace(0,tstep*time_ratio_fwd, len(xvec_Pr_fwd[:]))
yvec = P_switching_fwd[pix_ind,:] #yvec
......@@ -1336,19 +1330,20 @@
%% Cell type:code id: tags:
``` python
num_clusts = 6
estimator = KMeans(n_clusters=num_clusts)
proc = px.Cluster(h5_siv_raw, estimator)
h5_siv_kmeans_grp = px.hdf_utils.check_for_old(h5_siv_raw, 'Cluster',
{'n_clusters':num_clusts, 'n_jobs':1})
if h5_siv_kmeans_grp is None:
if proc.duplicate_h5_groups is None:
print('Performing K-means now')
clusterer = px.processing.Cluster(h5_siv_raw, 'KMeans', n_clusters=num_clusts)
h5_siv_kmeans_grp = clusterer.do_cluster()
h5_siv_kmeans_grp = proc.compute()
else:
print('Taking previous results')
h5_siv_kmeans_grp = proc.duplicate_h5_groups[-1]
# fig_km, axes_km = px.plot_utils.plot_cluster_h5_group(h5_siv_kmeans_grp, siv_y_label)
fig, axes = plt.subplots(ncols=2, figsize=(11.5,5))
_ = px.plot_utils.plot_map(axes[0], np.reshape(h5_siv_kmeans_grp['Labels'][()],(siv_num_rows, siv_num_cols)),
......
......@@ -8,6 +8,7 @@ Created on Tue Nov 3 21:14:25 2015
from __future__ import division, print_function, absolute_import, unicode_literals
import sys
import h5py
import collections
from warnings import warn
import numpy as np
from .microdata import MicroDataset
......@@ -180,9 +181,10 @@ def get_attr(h5_object, attr_name):
att_val = att_val.decode('utf-8')
elif type(att_val) == np.ndarray:
if att_val.dtype.type in [np.bytes_, np.object_]:
att_val = np.array([str(x, 'utf-8') for x in att_val])
if sys.version_info.major == 3:
if att_val.dtype.type in [np.bytes_, np.object_]:
att_val = np.array([str(x, 'utf-8') for x in att_val])
return att_val
......@@ -1509,39 +1511,79 @@ def copy_main_attributes(h5_main, h5_new):
val = h5_main.attrs[att_name]
h5_new.attrs[att_name] = val
return
def check_for_old(h5_base, tool_name, new_parms=dict()):
def check_for_old(h5_base, tool_name, new_parms=dict(), verbose=False):
"""
Check to see if the results of a tool already exist and if they
were performed with the same parameters.
Parameters
----------
h5_base
tool_name
new_parms
h5_base : h5py.Dataset object
Dataset on which the tool is being applied to
tool_name : str
process or analysis name
new_parms : dict, optional
Parameters with which this tool will be performed.
verbose : bool, optional, default = False
Whether or not to print debugging statements
Returns
-------
group : h5py.Group or None
Group with parameters matching those in `new_parms`
Group with parameters matching those in `new_parms`
"""
groups = findH5group(h5_base, tool_name)
for group in groups:
if verbose:
print('Looking at group - {}'.format(group.name.split('/')[-1]))
tests = []
for key in new_parms.keys():
old_parm = get_attr(group, key)
if isinstance(old_parm, np.ndarray):
# HDF5 cannot store None as an attribute anyway. ignore
if new_parms[key] is None:
continue
try:
old_value = get_attr(group, key)
except KeyError:
# if parameter was not found assume that something has changed
if verbose:
print('New parm: {} \t- new parm not in group *****'.format(key))
tests.append(False)
break
if isinstance(old_value, np.ndarray):
if not isinstance(new_parms[key], collections.Iterable):
if verbose:
print('New parm: {} \t- new parm not iterable unlike old parm *****'.format(key))
tests.append(False)
break
new_array = np.array(new_parms[key])
if old_parm.size == np.array():
tests.append(np.all(np.isclose(old_parm, new_array)))
if old_value.size != new_array.size:
if verbose:
print('New parm: {} \t- are of different sizes ****'.format(key))
tests.append(False)
else:
answer = np.all(np.isclose(old_value, new_array))
if verbose:
print('New parm: {} \t- match: {}'.format(key, answer))
tests.append(answer)
else:
tests.append(new_parms[key] == old_parm)
"""if isinstance(new_parms[key], collections.Iterable):
if verbose:
print('New parm: {} \t- new parm is iterable unlike old parm *****'.format(key))
tests.append(False)
break"""
answer = np.all(new_parms[key] == old_value)
if verbose:
print('New parm: {} \t- match: {}'.format(key, answer))
tests.append(answer)
if verbose:
print('')
if all(tests):
return group
......
......@@ -13,6 +13,7 @@ from psutil import virtual_memory as vm
from warnings import warn
import h5py
import numpy as np
import itertools
__all__ = ['getAvailableMem', 'getTimeStamp', 'transformToTargetType', 'transformToReal',
'complex_to_float', 'compound_to_scalar', 'realToComplex', 'realToCompound', 'check_dtype',
......@@ -349,3 +350,28 @@ def transformToReal(ds_main):
return compound_to_scalar(ds_main)
else:
return ds_main
def to_ranges(iterable):
"""
Converts a sequence of iterables to range tuples
From https://stackoverflow.com/questions/4628333/converting-a-list-of-integers-into-range-in-python
Credits: @juanchopanza and @luca
Parameters