Commit 5f9d8206 authored by Somnath, Suhas's avatar Somnath, Suhas
Browse files

Fixed bugs related to plot_utils, SVD, and kMeans changes

parent 20a25952
......@@ -170,14 +170,11 @@
%% Cell type:code id: tags:
``` python
fig, axis = plt.subplots(figsize=(10,10))
img = axis.imshow(raw_image_mat,cmap=px.plot_utils.cmap_jet_white_center(), origin='lower');
divider = make_axes_locatable(axis)
cax = divider.append_axes("right", size="5%", pad=0.2)
plt.colorbar(img, cax=cax)
px.plot_utils.plot_map(axis, raw_image_mat, cmap=px.plot_utils.cmap_jet_white_center())
axis.set_title('Raw Image', fontsize=16);
```
%% Cell type:markdown id: tags:
......@@ -199,19 +196,17 @@
# win_size = 8
# plot a single window
row_offset = int(0.5*(num_x-win_size))
col_offset = int(0.5*(num_y-win_size))
plt.figure()
plt.imshow(raw_image_mat[row_offset:row_offset+win_size,
col_offset:col_offset+win_size],
cmap=px.plot_utils.cmap_jet_white_center(),
origin='lower');
fig, axis = plt.subplots(figsize=(5, 5))
px.plot_utils.plot_map(axis, raw_image_mat[row_offset:row_offset+win_size,
col_offset:col_offset+win_size],
cmap=px.plot_utils.cmap_jet_white_center())
# the result should be about the size of a unit cell
# if it is the wrong size, just choose on manually by setting the win_size
plt.show()
axis.set_title('Example window', fontsize=18);
```
%% Cell type:markdown id: tags:
## Now break the image into a sequence of small windows
......@@ -261,11 +256,11 @@
for rand_ind, rand_pos in enumerate(rand_positions):
example_wins[:, :, rand_ind] = np.reshape(h5_wins[rand_pos], (windowing_parms['win_x'], windowing_parms['win_y']))
px.plot_utils.plot_map_stack(example_wins, heading='Example Windows', cmap=px.plot_utils.cmap_jet_white_center(),
title=['Window # ' + str(win_pos) for win_pos in rand_positions]);
title=['Window # ' + str(win_pos) for win_pos in rand_positions], fig_title_yoffset=0.93);
```
%% Cell type:markdown id: tags:
## Performing Singular Value Decompostion (SVD) on the windowed data
......@@ -279,19 +274,18 @@
# check to make sure number of components is correct:
num_comp = 1024
num_comp = min(num_comp,
min(h5_wins.shape)*len(h5_wins.dtype))
proc = px.SVD(h5_main, num_comps=num_comp)
proc = px.processing.SVD(h5_wins, num_components=num_comp)
# First check if SVD was already computed on this dataset:
if proc.duplicate_h5_groups is None:
print('SVD was either not performed or was performed with different parameters')
print('SVD not performed with these parameters')
h5_svd = proc.compute()
else:
print('Taking previous SVD results already present in file')
h5_svd = proc.duplicate_h5_groups[-1]
print('Taking existing results!')
h5_svd = proc.duplicate_h5_groups
h5_U = h5_svd['U']
h5_S = h5_svd['S']
h5_V = h5_svd['V']
......@@ -322,11 +316,11 @@
Note also that the plot below is a log-log plot. The importance of each subsequent component drops exponentially.
%% Cell type:code id: tags:
``` python
fig_S, ax_S = px.plot_utils.plotScree(h5_S[()]);
fig_S, ax_S = px.plot_utils.plot_scree(h5_S[()]);
```
%% Cell type:markdown id: tags:
#### V (Eigenvectors or end-members)
......@@ -494,20 +488,15 @@
## Check the cleaned image now:
%% Cell type:code id: tags:
``` python
num_comps = 12
num_comps = 24
fig, axis = plt.subplots(figsize=(7, 7))
clean_image_mat = image_components[:, :, num_comps]
img_clean = axis.imshow(clean_image_mat, cmap=px.plot_utils.cmap_jet_white_center(), origin='lower')
mean_val = np.mean(clean_image_mat)
std_val = np.std(clean_image_mat)
img_clean.set_clim(vmin=mean_val-img_stdevs*std_val, vmax=mean_val+img_stdevs*std_val)
axis.get_yaxis().set_visible(False)
axis.get_xaxis().set_visible(False)
_ = px.plot_utils.plot_map(axis, clean_image_mat, cmap=px.plot_utils.cmap_jet_white_center())
axis.set_title('Cleaned Image', fontsize=16);
```
%% Cell type:markdown id: tags:
......@@ -520,25 +509,21 @@
We want a large enough number of clusters so that K-means identifies fine nuances in the data. At the same time, we want to minimize computational time by reducing the number of clusters. We recommend 32 - 64 clusters.
%% Cell type:code id: tags:
``` python
clean_components = 32
num_clusters = 32
num_clusters = 4
estimator = px.Cluster(h5_U, KMeans(n_clusters=num_clusters), num_comps=num_comps)
proc = px.Cluster(h5_U, KMeans(n_clusters=num_clusters), num_comps=clean_components)
# Check for existing Clustering results
if proc.duplicate_h5_groups is None:
print('No k-Means computation found. Doing K-Means now')
if estimator.duplicate_h5_groups is None:
t0 = time()
h5_kmeans = proc.compute()
h5_kmeans = estimator.compute()
print('kMeans took {} seconds.'.format(round(time()-t0, 2)))
else:
print('Taking existing results of previous K-Means computation')
h5_kmeans = proc.duplicate_h5_groups[-1]
h5_kmeans = estimator.duplicate_h5_groups[-1]
print( 'Using existing results.')
print( 'Clustering results in {}.'.format(h5_kmeans.name))
half_wind = int(win_size*0.5)
# generate a cropped image that was effectively the area that was used for pattern searching
# Need to get the math righ on the counting
......@@ -564,24 +549,24 @@
%% Cell type:code id: tags:
``` python
# Plot dendrogram here
# Get the distrance between cluster means
#Get the distrance between cluster means
distance_mat = pdist(h5_kmeans['Mean_Response'][()])
# get hierachical pairings of clusters
#get hierachical pairings of clusters
linkage_pairing = linkage(distance_mat,'weighted')
# Normalize the pairwise distance with the maximum distance
linkage_pairing[:,2] = linkage_pairing[:,2]/max(linkage_pairing[:,2])
# Visualize dendrogram
fig = plt.figure(figsize=(10,3))
retval = dendrogram(linkage_pairing, count_sort=True,
distance_sort=True, leaf_rotation=90)
# fig.axes[0].set_title('Dendrogram')
#fig.axes[0].set_title('Dendrogram')
fig.axes[0].set_xlabel('Cluster number', fontsize=20)
fig.axes[0].set_ylabel('Cluster separation', fontsize=20)
px.plot_utils.set_tick_font_size(fig.axes[0], 12)
```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment