Commit 40e3b999 authored by Unknown's avatar Unknown
Browse files

Merge remote-tracking branch 'origin/cades_dev'

parents 757fdebe 5f9d8206
......@@ -49,15 +49,26 @@ Core development
1. Check if the same process has been performed with the same paramters. When initializing the process, throw an exception. This is better than checking in the notebook stage.
2. (Gracefully) Abort and resume processing.
* consolidate _get_component_slice used in Cluster with duplicate in svd_utils
* Legacy processes **MUST** extend Process:
* Image Windowing
* Image Cleaning
* sklearn wrapper classes:
* Cluter
* Decomposition
* The computation will continue to be performed by sklearn. No need to use parallel_compute() or resume computation.
* Own classes:
* Image Windowing
* Image Cleaning
* As time permits, ensure that these can resume processing
* All these MUST implement the check for previous computations at the very least
* As time permits, ensure that these can resume processing
* Absorb functionality from Process into Model
* Bayesian GIV should actually be an analysis <-- depends on above
* Reogranize processing and analysis - promote / demote classes etc.
* multi-node computing capability in parallel_compute
* Demystify analyis / optimize. Use parallel_compute instead of optimize and guess_methods and fit_methods
* Consistency in the naming of and placement of attributes (chan or meas group) in all translators - Some put attributes in the measurement level, some in the channel level! hyperspy appears to create datagroups solely for the purpose of organizing metadata in a tree structure!
* Consider developing a generic curve fitting class a la `hyperspy <http://nbviewer.jupyter.org/github/hyperspy/hyperspy-demos/blob/master/Fitting_tutorial.ipynb>`_
......
......@@ -181,8 +181,8 @@ px.plot_utils.plot_map_stack(abun_maps, num_comps=9, heading='SVD Abundance Maps
num_clusters = 4
estimators = px.Cluster(h5_main, 'KMeans', n_clusters=num_clusters)
h5_kmeans_grp = estimators.do_cluster(h5_main)
estimators = px.Cluster(h5_main, KMeans(n_clusters=num_clusters))
h5_kmeans_grp = estimators.compute(h5_main)
h5_kmeans_labels = h5_kmeans_grp['Labels']
h5_kmeans_mean_resp = h5_kmeans_grp['Mean_Response']
......
......@@ -181,8 +181,8 @@ px.plot_utils.plot_map_stack(abun_maps, num_comps=9, heading='SVD Abundance Maps
num_clusters = 4
estimators = px.Cluster(h5_main, 'KMeans', n_clusters=num_clusters)
h5_kmeans_grp = estimators.do_cluster(h5_main)
estimators = px.Cluster(h5_main, KMeans(n_clusters=num_clusters))
h5_kmeans_grp = estimators.compute(h5_main)
h5_kmeans_labels = h5_kmeans_grp['Labels']
h5_kmeans_mean_resp = h5_kmeans_grp['Mean_Response']
......
......@@ -21,7 +21,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"!pip install -U numpy scipy skimage h5py matplotlib Ipython ipywidgets pycroscopy\n",
......@@ -41,6 +43,7 @@
"from skimage import measure\n",
"from scipy.cluster.hierarchy import linkage, dendrogram\n",
"from scipy.spatial.distance import pdist \n",
"from sklearn.cluster import KMeans\n",
"\n",
"# Visualization:\n",
"import matplotlib.pyplot as plt\n",
......@@ -74,7 +77,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"image_path = px.io.uiGetFile('*.png *PNG *TIFF * TIF *tif *tiff *BMP *bmp','Images')\n",
......@@ -102,7 +107,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Check if an HDF5 file with the chosen image already exists.\n",
......@@ -150,7 +157,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"print('Datasets and datagroups within the file:')\n",
......@@ -180,7 +189,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Initialize the windowing class\n",
......@@ -208,14 +219,13 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"fig, axis = plt.subplots(figsize=(10,10))\n",
"img = axis.imshow(raw_image_mat,cmap=px.plot_utils.cmap_jet_white_center(), origin='lower');\n",
"divider = make_axes_locatable(axis)\n",
"cax = divider.append_axes(\"right\", size=\"5%\", pad=0.2)\n",
"plt.colorbar(img, cax=cax)\n",
"px.plot_utils.plot_map(axis, raw_image_mat, cmap=px.plot_utils.cmap_jet_white_center())\n",
"axis.set_title('Raw Image', fontsize=16);"
]
},
......@@ -229,7 +239,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"num_peaks = 2\n",
......@@ -241,7 +253,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Uncomment this line if you need to manually specify a window size\n",
......@@ -250,15 +264,13 @@
"# plot a single window\n",
"row_offset = int(0.5*(num_x-win_size))\n",
"col_offset = int(0.5*(num_y-win_size))\n",
"plt.figure()\n",
"plt.imshow(raw_image_mat[row_offset:row_offset+win_size,\n",
" col_offset:col_offset+win_size], \n",
" cmap=px.plot_utils.cmap_jet_white_center(),\n",
" origin='lower');\n",
"\n",
"fig, axis = plt.subplots(figsize=(5, 5))\n",
"px.plot_utils.plot_map(axis, raw_image_mat[row_offset:row_offset+win_size,\n",
" col_offset:col_offset+win_size], \n",
" cmap=px.plot_utils.cmap_jet_white_center())\n",
"# the result should be about the size of a unit cell\n",
"# if it is the wrong size, just choose on manually by setting the win_size\n",
"plt.show()"
"axis.set_title('Example window', fontsize=18);"
]
},
{
......@@ -272,7 +284,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"windowing_parms = {\n",
......@@ -309,7 +323,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Peek at a few random windows\n",
......@@ -321,7 +337,7 @@
" example_wins[:, :, rand_ind] = np.reshape(h5_wins[rand_pos], (windowing_parms['win_x'], windowing_parms['win_y']))\n",
" \n",
"px.plot_utils.plot_map_stack(example_wins, heading='Example Windows', cmap=px.plot_utils.cmap_jet_white_center(),\n",
" title=['Window # ' + str(win_pos) for win_pos in rand_positions]);"
" title=['Window # ' + str(win_pos) for win_pos in rand_positions], fig_title_yoffset=0.93);"
]
},
{
......@@ -337,7 +353,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# check to make sure number of components is correct:\n",
......@@ -345,12 +363,14 @@
"num_comp = min(num_comp, \n",
" min(h5_wins.shape)*len(h5_wins.dtype))\n",
"\n",
"h5_svd = px.hdf_utils.check_for_old(h5_wins, 'SVD', {'num_components':num_comp})\n",
"if h5_svd is None:\n",
" print('SVD was either not performed or was performed with different parameters')\n",
" h5_svd = px.processing.doSVD(h5_wins, num_comps=num_comp)\n",
"proc = px.processing.SVD(h5_wins, num_components=num_comp)\n",
"\n",
"if proc.duplicate_h5_groups is None:\n",
" print('SVD not performed with these parameters')\n",
" h5_svd = proc.compute()\n",
"else:\n",
" print('Taking existing SVD results')\n",
" print('Taking existing results!')\n",
" h5_svd = proc.duplicate_h5_groups \n",
" \n",
"h5_U = h5_svd['U']\n",
"h5_S = h5_svd['S']\n",
......@@ -388,10 +408,12 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"fig_S, ax_S = px.plot_utils.plotScree(h5_S[()]);"
"fig_S, ax_S = px.plot_utils.plot_scree(h5_S[()]);"
]
},
{
......@@ -406,7 +428,8 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
"collapsed": false,
"scrolled": false
},
"outputs": [],
"source": [
......@@ -427,7 +450,8 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
"collapsed": false,
"scrolled": false
},
"outputs": [],
"source": [
......@@ -452,7 +476,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"clean_components = range(36) # np.append(range(5,9),(17,18))\n",
......@@ -482,7 +508,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Building a stack of images from here:\n",
......@@ -518,7 +546,8 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
"collapsed": false,
"scrolled": false
},
"outputs": [],
"source": [
......@@ -540,6 +569,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [],
......@@ -604,19 +634,16 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"num_comps = 12\n",
"num_comps = 24\n",
"\n",
"fig, axis = plt.subplots(figsize=(7, 7))\n",
"clean_image_mat = image_components[:, :, num_comps]\n",
"img_clean = axis.imshow(clean_image_mat, cmap=px.plot_utils.cmap_jet_white_center(), origin='lower')\n",
"mean_val = np.mean(clean_image_mat)\n",
"std_val = np.std(clean_image_mat)\n",
"img_clean.set_clim(vmin=mean_val-img_stdevs*std_val, vmax=mean_val+img_stdevs*std_val)\n",
"axis.get_yaxis().set_visible(False)\n",
"axis.get_xaxis().set_visible(False)\n",
"_ = px.plot_utils.plot_map(axis, clean_image_mat, cmap=px.plot_utils.cmap_jet_white_center())\n",
"axis.set_title('Cleaned Image', fontsize=16);"
]
},
......@@ -637,61 +664,22 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [],
"source": [
"clean_components = 32\n",
"num_clusters = 32\n",
"\n",
"# Check for existing Clustering results\n",
"estimator = px.Cluster(h5_U, 'KMeans', num_comps=clean_components, n_clusters=num_clusters)\n",
"do_cluster = False\n",
"\n",
"# See if there are existing cluster results\n",
"try:\n",
" h5_kmeans = h5_svd['U-Cluster_000']\n",
" print( 'Clustering results loaded. Will now check parameters')\n",
"except Exception:\n",
" print( 'Could not load Clustering results.')\n",
" do_cluster = True\n",
"\n",
"# Check that the same components are used\n",
"if not do_cluster:\n",
" new_clean = estimator.data_slice[1]\n",
" if isinstance(new_clean, np.ndarray):\n",
" new_clean = new_clean.tolist()\n",
" else:\n",
" # print(new_clean)\n",
" if new_clean.step is None:\n",
" new_clean = range(new_clean.start, new_clean.stop)\n",
" else:\n",
" new_clean = range(new_clean.start, new_clean.stop, new_clean.step)\n",
" \n",
" if np.array_equal(h5_kmeans.attrs['components_used'], new_clean):\n",
" print( 'Clustering results used the same components as those requested.')\n",
" else:\n",
" do_cluster = True\n",
" print( 'Clustering results used the different components from those requested.')\n",
"\n",
"# Check that the same number of clusters were used\n",
"if not do_cluster:\n",
" old_clusters = len(np.unique(h5_kmeans['Cluster_Indices']))\n",
" \n",
" if old_clusters==num_clusters:\n",
" print( 'Clustering results used the same number of clusters as requested.')\n",
" else:\n",
" do_cluster = True\n",
" print( 'Clustering results used a different number of clusters from those requested.')\n",
"\n",
"# Perform k-means clustering on the U matrix now using the list of components only if needed:\n",
"if do_cluster:\n",
"num_clusters = 4\n",
"estimator = px.Cluster(h5_U, KMeans(n_clusters=num_clusters), num_comps=num_comps)\n",
"\n",
"if estimator.duplicate_h5_groups is None:\n",
" t0 = time()\n",
" h5_kmeans = estimator.do_cluster()\n",
" print( 'kMeans took {} seconds.'.format(round(time()-t0, 2)))\n",
" h5_kmeans = estimator.compute()\n",
" print('kMeans took {} seconds.'.format(round(time()-t0, 2)))\n",
"else:\n",
" print( 'Using existing results.')\n",
"\n",
" h5_kmeans = estimator.duplicate_h5_groups[-1]\n",
" print( 'Using existing results.') \n",
" \n",
"print( 'Clustering results in {}.'.format(h5_kmeans.name))\n",
"\n",
"half_wind = int(win_size*0.5)\n",
......@@ -723,7 +711,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Plot dendrogram here\n",
......@@ -760,6 +750,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": false
},
"outputs": [],
......@@ -859,6 +850,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": false
},
"outputs": [],
......@@ -914,7 +906,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"motif_match_coeffs = list()\n",
......@@ -947,7 +941,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"show_legend = True\n",
......@@ -994,6 +990,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": false
},
"outputs": [],
......@@ -1068,7 +1065,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"print(thresholds)\n",
......@@ -1093,7 +1092,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# overlay atom positions on original image\n",
......@@ -1133,7 +1134,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"h5_file.close()"
......@@ -1142,7 +1145,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
......@@ -1151,7 +1156,7 @@
"anaconda-cloud": {},
"celltoolbar": "Raw Cell Format",
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
......@@ -1165,61 +1170,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
},
"widgets": {
"state": {
"29604cba705348bebab931e8c5a8f33b": {
"views": [
{
"cell_index": 41
}
]
},
"3148e6a4894e42d2856f6c31aa70805a": {
"views": [
{
"cell_index": 33
}
]
},
"4c6e4e0c659542ca937b2ca85f7f33cf": {
"views": [
{
"cell_index": 49
}
]
},
"9236753466b34d9eb471907b4a33fd73": {
"views": [
{
"cell_index": 38
}
]
},
"ba247d838fef4411b7c74d7055033284": {
"views": [
{
"cell_index": 38
}
]
},
"bafd596f9f254b4a8652bbf8e7bdc096": {
"views": [
{
"cell_index": 41
}
]
},
"e687973fb1ff41949de8de5587ad6461": {
"views": [
{
"cell_index": 38
}
]
}
},
"version": "1.2.0"
"version": "3.5.2"
}
},
"nbformat": 4,
......
......@@ -38,6 +38,8 @@
"import matplotlib.patches as patches\n",
"from IPython.display import display, HTML\n",
"import ipywidgets as widgets\n",
"from sklearn.cluster import KMeans\n",
" \n",
"import pycroscopy as px\n",
"\n",
"# set up notebook to show plots within the notebook\n",
......@@ -210,14 +212,15 @@
"metadata": {},
"outputs": [],
"source": [
"proc = px.SVD(h5_main, num_comps=256)\n",
"\n",
"# First check if SVD was already computed on this dataset:\n",
"h5_svd_group = px.hdf_utils.findH5group(h5_main, 'SVD')\n",
"if len(h5_svd_group) == 0:\n",
"if proc.duplicate_h5_groups is None:\n",
" print('No prior SVD results found - doing SVD now')\n",
" h5_svd_group = px.doSVD(h5_main, num_comps=256)\n",
" h5_svd_group = proc.compute()\n",
"else:\n",
" print('Taking previous SVD results already present in file')\n",
" h5_svd_group = h5_svd_group[-1]\n",
" h5_svd_group = proc.duplicate_h5_groups[-1]\n",
" \n",
"h5_u = h5_svd_group['U']\n",
"h5_v = h5_svd_group['V']\n",
......@@ -312,16 +315,18 @@
"outputs": [],
"source": [
"# Attempt to find any previous computation\n",
"h5_kmeans_group = px.hdf_utils.findH5group(h5_u, 'Cluster')\n",
"if len(h5_kmeans_group) == 0:\n",
"num_clusters = 32\n",
"spectral_components = 128\n",
"estimator = KMeans(n_clusters=num_clusters)\n",
"\n",
"proc = px.Cluster(h5_u, estimator, num_comps=spectral_components)\n",
"\n",
"if proc.duplicate_h5_groups is None:\n",
" print('No k-Means computation found. Doing K-Means now')\n",
" num_clusters = 32\n",
" num_comps_for_clustering = 128\n",
" estimator = px.Cluster(h5_u, 'KMeans', num_comps=num_comps_for_clustering, n_clusters=num_clusters)\n",
" h5_kmeans_group = estimator.do_cluster()\n",
" h5_kmeans_group = proc.compute()\n",
"else:\n",
" print('Taking existing results of previous K-Means computation')\n",
" h5_kmeans_group = h5_kmeans_group[-1]\n",
" h5_kmeans_group = proc.duplicate_h5_groups[-1]\n",
" \n",
"h5_labels = h5_kmeans_group['Labels']\n",
"h5_centroids = h5_kmeans_group['Mean_Response']\n",
......@@ -364,7 +369,7 @@
"metadata": {},
"outputs": [],
"source": [
"e_vals = np.reshape(h5_u[:, :num_comps_for_clustering], \n",
"e_vals = np.reshape(h5_u[:, :spectral_components], \n",
" (num_rows, num_cols, -1))\n",
"fig = px.plot_utils.plot_cluster_dendrogram(label_mat, e_vals, \n",
" num_comps_for_clustering, \n",
......@@ -423,7 +428,7 @@
"74a037e0ed7f4854a0c8e337ac2d6798": {
"views": [
{
"cell_index": 10
"cell_index": 10.0
}
]
}
......
......@@ -758,17 +758,6 @@
"fig.savefig(os.path.join(figure_folder, 'capacitance_map.pdf'), format='pdf',bbox_inches = 'tight', pad_inches = 2.0)\"\"\""