Using the new formulation of the Cluster and SVD classes

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from IPython.display import display, HTML
import ipywidgets as widgets
from sklearn.cluster import KMeans
import pycroscopy as px
# set up notebook to show plots within the notebook
% matplotlib notebook
Here, SVD essentially compares every single ronchigram with every other ronchigram to find statistically significant trends in the dataset. Such correlation would be infeasible if the ronchigrams were averaged to bright-field and dark-field scalar values.
``` python
proc = px.SVD(h5_main, num_comps=256)
# First check if SVD was already computed on this dataset:
h5_svd_group = px.hdf_utils.findH5group(h5_main, 'SVD')
if len(h5_svd_group) == 0:
if proc.duplicate_h5_groups is None:
print('No prior SVD results found - doing SVD now')
h5_svd_group = px.doSVD(h5_main, num_comps=256)
h5_svd_group = proc.compute()
print('Taking previous SVD results already present in file')
h5_svd_group = h5_svd_group[-1]
h5_svd_group = proc.duplicate_h5_groups[-1]
h5_u = h5_svd_group['U']
h5_v = h5_svd_group['V']
h5_s = h5_svd_group['S']
num_comps = 16
``` python
# Attempt to find any previous computation
h5_kmeans_group = px.hdf_utils.findH5group(h5_u, 'Cluster')
if len(h5_kmeans_group) == 0:
num_clusters = 32
spectral_components = 128
estimator = KMeans(n_clusters=num_clusters)
proc = px.Cluster(h5_u, estimator, num_comps=spectral_components)
if proc.duplicate_h5_groups is None:
print('No k-Means computation found. Doing K-Means now')
num_clusters = 32
num_comps_for_clustering = 128
estimator = px.Cluster(h5_u, 'KMeans', num_comps=num_comps_for_clustering, n_clusters=num_clusters)
h5_kmeans_group = estimator.do_cluster()
h5_kmeans_group = proc.compute()
print('Taking existing results of previous K-Means computation')
h5_kmeans_group = h5_kmeans_group[-1]
h5_kmeans_group = proc.duplicate_h5_groups[-1]
h5_labels = h5_kmeans_group['Labels']
h5_centroids = h5_kmeans_group['Mean_Response']
# In case we take existing results, we need to get these parameters
The vertical length of the branches indicates the relative separation between neighboring clusters.
``` python
e_vals = np.reshape(h5_u[:, :num_comps_for_clustering],
e_vals = np.reshape(h5_u[:, :spectral_components],
(num_rows, num_cols, -1))
fig = px.plot_utils.plot_cluster_dendrogram(label_mat, e_vals,
