Commit 40e3b999 authored by Unknown's avatar Unknown
Browse files

Merge remote-tracking branch 'origin/cades_dev'

parents 757fdebe 5f9d8206
...@@ -49,15 +49,26 @@ Core development ...@@ -49,15 +49,26 @@ Core development
1. Check if the same process has been performed with the same paramters. When initializing the process, throw an exception. This is better than checking in the notebook stage. 1. Check if the same process has been performed with the same paramters. When initializing the process, throw an exception. This is better than checking in the notebook stage.
2. (Gracefully) Abort and resume processing. 2. (Gracefully) Abort and resume processing.
* consolidate _get_component_slice used in Cluster with duplicate in svd_utils
* Legacy processes **MUST** extend Process: * Legacy processes **MUST** extend Process:
* Image Windowing * sklearn wrapper classes:
* Image Cleaning
* Cluter
* Decomposition
* The computation will continue to be performed by sklearn. No need to use parallel_compute() or resume computation.
* Own classes:
* Image Windowing
* Image Cleaning
* As time permits, ensure that these can resume processing
* All these MUST implement the check for previous computations at the very least * All these MUST implement the check for previous computations at the very least
* As time permits, ensure that these can resume processing * As time permits, ensure that these can resume processing
* Absorb functionality from Process into Model * Absorb functionality from Process into Model
* Bayesian GIV should actually be an analysis <-- depends on above
* Reogranize processing and analysis - promote / demote classes etc.
* multi-node computing capability in parallel_compute
* Demystify analyis / optimize. Use parallel_compute instead of optimize and guess_methods and fit_methods * Demystify analyis / optimize. Use parallel_compute instead of optimize and guess_methods and fit_methods
* Consistency in the naming of and placement of attributes (chan or meas group) in all translators - Some put attributes in the measurement level, some in the channel level! hyperspy appears to create datagroups solely for the purpose of organizing metadata in a tree structure! * Consistency in the naming of and placement of attributes (chan or meas group) in all translators - Some put attributes in the measurement level, some in the channel level! hyperspy appears to create datagroups solely for the purpose of organizing metadata in a tree structure!
* Consider developing a generic curve fitting class a la `hyperspy <http://nbviewer.jupyter.org/github/hyperspy/hyperspy-demos/blob/master/Fitting_tutorial.ipynb>`_ * Consider developing a generic curve fitting class a la `hyperspy <http://nbviewer.jupyter.org/github/hyperspy/hyperspy-demos/blob/master/Fitting_tutorial.ipynb>`_
......
...@@ -181,8 +181,8 @@ px.plot_utils.plot_map_stack(abun_maps, num_comps=9, heading='SVD Abundance Maps ...@@ -181,8 +181,8 @@ px.plot_utils.plot_map_stack(abun_maps, num_comps=9, heading='SVD Abundance Maps
num_clusters = 4 num_clusters = 4
estimators = px.Cluster(h5_main, 'KMeans', n_clusters=num_clusters) estimators = px.Cluster(h5_main, KMeans(n_clusters=num_clusters))
h5_kmeans_grp = estimators.do_cluster(h5_main) h5_kmeans_grp = estimators.compute(h5_main)
h5_kmeans_labels = h5_kmeans_grp['Labels'] h5_kmeans_labels = h5_kmeans_grp['Labels']
h5_kmeans_mean_resp = h5_kmeans_grp['Mean_Response'] h5_kmeans_mean_resp = h5_kmeans_grp['Mean_Response']
......
...@@ -181,8 +181,8 @@ px.plot_utils.plot_map_stack(abun_maps, num_comps=9, heading='SVD Abundance Maps ...@@ -181,8 +181,8 @@ px.plot_utils.plot_map_stack(abun_maps, num_comps=9, heading='SVD Abundance Maps
num_clusters = 4 num_clusters = 4
estimators = px.Cluster(h5_main, 'KMeans', n_clusters=num_clusters) estimators = px.Cluster(h5_main, KMeans(n_clusters=num_clusters))
h5_kmeans_grp = estimators.do_cluster(h5_main) h5_kmeans_grp = estimators.compute(h5_main)
h5_kmeans_labels = h5_kmeans_grp['Labels'] h5_kmeans_labels = h5_kmeans_grp['Labels']
h5_kmeans_mean_resp = h5_kmeans_grp['Mean_Response'] h5_kmeans_mean_resp = h5_kmeans_grp['Mean_Response']
......
...@@ -21,7 +21,9 @@ ...@@ -21,7 +21,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install -U numpy scipy skimage h5py matplotlib Ipython ipywidgets pycroscopy\n", "!pip install -U numpy scipy skimage h5py matplotlib Ipython ipywidgets pycroscopy\n",
...@@ -41,6 +43,7 @@ ...@@ -41,6 +43,7 @@
"from skimage import measure\n", "from skimage import measure\n",
"from scipy.cluster.hierarchy import linkage, dendrogram\n", "from scipy.cluster.hierarchy import linkage, dendrogram\n",
"from scipy.spatial.distance import pdist \n", "from scipy.spatial.distance import pdist \n",
"from sklearn.cluster import KMeans\n",
"\n", "\n",
"# Visualization:\n", "# Visualization:\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
...@@ -74,7 +77,9 @@ ...@@ -74,7 +77,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"image_path = px.io.uiGetFile('*.png *PNG *TIFF * TIF *tif *tiff *BMP *bmp','Images')\n", "image_path = px.io.uiGetFile('*.png *PNG *TIFF * TIF *tif *tiff *BMP *bmp','Images')\n",
...@@ -102,7 +107,9 @@ ...@@ -102,7 +107,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Check if an HDF5 file with the chosen image already exists.\n", "# Check if an HDF5 file with the chosen image already exists.\n",
...@@ -150,7 +157,9 @@ ...@@ -150,7 +157,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"print('Datasets and datagroups within the file:')\n", "print('Datasets and datagroups within the file:')\n",
...@@ -180,7 +189,9 @@ ...@@ -180,7 +189,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Initialize the windowing class\n", "# Initialize the windowing class\n",
...@@ -208,14 +219,13 @@ ...@@ -208,14 +219,13 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"fig, axis = plt.subplots(figsize=(10,10))\n", "fig, axis = plt.subplots(figsize=(10,10))\n",
"img = axis.imshow(raw_image_mat,cmap=px.plot_utils.cmap_jet_white_center(), origin='lower');\n", "px.plot_utils.plot_map(axis, raw_image_mat, cmap=px.plot_utils.cmap_jet_white_center())\n",
"divider = make_axes_locatable(axis)\n",
"cax = divider.append_axes(\"right\", size=\"5%\", pad=0.2)\n",
"plt.colorbar(img, cax=cax)\n",
"axis.set_title('Raw Image', fontsize=16);" "axis.set_title('Raw Image', fontsize=16);"
] ]
}, },
...@@ -229,7 +239,9 @@ ...@@ -229,7 +239,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"num_peaks = 2\n", "num_peaks = 2\n",
...@@ -241,7 +253,9 @@ ...@@ -241,7 +253,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Uncomment this line if you need to manually specify a window size\n", "# Uncomment this line if you need to manually specify a window size\n",
...@@ -250,15 +264,13 @@ ...@@ -250,15 +264,13 @@
"# plot a single window\n", "# plot a single window\n",
"row_offset = int(0.5*(num_x-win_size))\n", "row_offset = int(0.5*(num_x-win_size))\n",
"col_offset = int(0.5*(num_y-win_size))\n", "col_offset = int(0.5*(num_y-win_size))\n",
"plt.figure()\n", "fig, axis = plt.subplots(figsize=(5, 5))\n",
"plt.imshow(raw_image_mat[row_offset:row_offset+win_size,\n", "px.plot_utils.plot_map(axis, raw_image_mat[row_offset:row_offset+win_size,\n",
" col_offset:col_offset+win_size], \n", " col_offset:col_offset+win_size], \n",
" cmap=px.plot_utils.cmap_jet_white_center(),\n", " cmap=px.plot_utils.cmap_jet_white_center())\n",
" origin='lower');\n",
"\n",
"# the result should be about the size of a unit cell\n", "# the result should be about the size of a unit cell\n",
"# if it is the wrong size, just choose on manually by setting the win_size\n", "# if it is the wrong size, just choose on manually by setting the win_size\n",
"plt.show()" "axis.set_title('Example window', fontsize=18);"
] ]
}, },
{ {
...@@ -272,7 +284,9 @@ ...@@ -272,7 +284,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"windowing_parms = {\n", "windowing_parms = {\n",
...@@ -309,7 +323,9 @@ ...@@ -309,7 +323,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Peek at a few random windows\n", "# Peek at a few random windows\n",
...@@ -321,7 +337,7 @@ ...@@ -321,7 +337,7 @@
" example_wins[:, :, rand_ind] = np.reshape(h5_wins[rand_pos], (windowing_parms['win_x'], windowing_parms['win_y']))\n", " example_wins[:, :, rand_ind] = np.reshape(h5_wins[rand_pos], (windowing_parms['win_x'], windowing_parms['win_y']))\n",
" \n", " \n",
"px.plot_utils.plot_map_stack(example_wins, heading='Example Windows', cmap=px.plot_utils.cmap_jet_white_center(),\n", "px.plot_utils.plot_map_stack(example_wins, heading='Example Windows', cmap=px.plot_utils.cmap_jet_white_center(),\n",
" title=['Window # ' + str(win_pos) for win_pos in rand_positions]);" " title=['Window # ' + str(win_pos) for win_pos in rand_positions], fig_title_yoffset=0.93);"
] ]
}, },
{ {
...@@ -337,7 +353,9 @@ ...@@ -337,7 +353,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# check to make sure number of components is correct:\n", "# check to make sure number of components is correct:\n",
...@@ -345,12 +363,14 @@ ...@@ -345,12 +363,14 @@
"num_comp = min(num_comp, \n", "num_comp = min(num_comp, \n",
" min(h5_wins.shape)*len(h5_wins.dtype))\n", " min(h5_wins.shape)*len(h5_wins.dtype))\n",
"\n", "\n",
"h5_svd = px.hdf_utils.check_for_old(h5_wins, 'SVD', {'num_components':num_comp})\n", "proc = px.processing.SVD(h5_wins, num_components=num_comp)\n",
"if h5_svd is None:\n", "\n",
" print('SVD was either not performed or was performed with different parameters')\n", "if proc.duplicate_h5_groups is None:\n",
" h5_svd = px.processing.doSVD(h5_wins, num_comps=num_comp)\n", " print('SVD not performed with these parameters')\n",
" h5_svd = proc.compute()\n",
"else:\n", "else:\n",
" print('Taking existing SVD results')\n", " print('Taking existing results!')\n",
" h5_svd = proc.duplicate_h5_groups \n",
" \n", " \n",
"h5_U = h5_svd['U']\n", "h5_U = h5_svd['U']\n",
"h5_S = h5_svd['S']\n", "h5_S = h5_svd['S']\n",
...@@ -388,10 +408,12 @@ ...@@ -388,10 +408,12 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"fig_S, ax_S = px.plot_utils.plotScree(h5_S[()]);" "fig_S, ax_S = px.plot_utils.plot_scree(h5_S[()]);"
] ]
}, },
{ {
...@@ -406,7 +428,8 @@ ...@@ -406,7 +428,8 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {
"scrolled": true "collapsed": false,
"scrolled": false
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -427,7 +450,8 @@ ...@@ -427,7 +450,8 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {
"scrolled": true "collapsed": false,
"scrolled": false
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -452,7 +476,9 @@ ...@@ -452,7 +476,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"clean_components = range(36) # np.append(range(5,9),(17,18))\n", "clean_components = range(36) # np.append(range(5,9),(17,18))\n",
...@@ -482,7 +508,9 @@ ...@@ -482,7 +508,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Building a stack of images from here:\n", "# Building a stack of images from here:\n",
...@@ -518,7 +546,8 @@ ...@@ -518,7 +546,8 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {
"scrolled": true "collapsed": false,
"scrolled": false
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
...@@ -540,6 +569,7 @@ ...@@ -540,6 +569,7 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {
"collapsed": false,
"scrolled": false "scrolled": false
}, },
"outputs": [], "outputs": [],
...@@ -604,19 +634,16 @@ ...@@ -604,19 +634,16 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"num_comps = 12\n", "num_comps = 24\n",
"\n", "\n",
"fig, axis = plt.subplots(figsize=(7, 7))\n", "fig, axis = plt.subplots(figsize=(7, 7))\n",
"clean_image_mat = image_components[:, :, num_comps]\n", "clean_image_mat = image_components[:, :, num_comps]\n",
"img_clean = axis.imshow(clean_image_mat, cmap=px.plot_utils.cmap_jet_white_center(), origin='lower')\n", "_ = px.plot_utils.plot_map(axis, clean_image_mat, cmap=px.plot_utils.cmap_jet_white_center())\n",
"mean_val = np.mean(clean_image_mat)\n",
"std_val = np.std(clean_image_mat)\n",
"img_clean.set_clim(vmin=mean_val-img_stdevs*std_val, vmax=mean_val+img_stdevs*std_val)\n",
"axis.get_yaxis().set_visible(False)\n",
"axis.get_xaxis().set_visible(False)\n",
"axis.set_title('Cleaned Image', fontsize=16);" "axis.set_title('Cleaned Image', fontsize=16);"
] ]
}, },
...@@ -637,61 +664,22 @@ ...@@ -637,61 +664,22 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {
"collapsed": false,
"scrolled": false "scrolled": false
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"clean_components = 32\n", "num_clusters = 4\n",
"num_clusters = 32\n", "estimator = px.Cluster(h5_U, KMeans(n_clusters=num_clusters), num_comps=num_comps)\n",
"\n", "\n",
"# Check for existing Clustering results\n", "if estimator.duplicate_h5_groups is None:\n",
"estimator = px.Cluster(h5_U, 'KMeans', num_comps=clean_components, n_clusters=num_clusters)\n",
"do_cluster = False\n",
"\n",
"# See if there are existing cluster results\n",
"try:\n",
" h5_kmeans = h5_svd['U-Cluster_000']\n",
" print( 'Clustering results loaded. Will now check parameters')\n",
"except Exception:\n",
" print( 'Could not load Clustering results.')\n",
" do_cluster = True\n",
"\n",
"# Check that the same components are used\n",
"if not do_cluster:\n",
" new_clean = estimator.data_slice[1]\n",
" if isinstance(new_clean, np.ndarray):\n",
" new_clean = new_clean.tolist()\n",
" else:\n",
" # print(new_clean)\n",
" if new_clean.step is None:\n",
" new_clean = range(new_clean.start, new_clean.stop)\n",
" else:\n",
" new_clean = range(new_clean.start, new_clean.stop, new_clean.step)\n",
" \n",
" if np.array_equal(h5_kmeans.attrs['components_used'], new_clean):\n",
" print( 'Clustering results used the same components as those requested.')\n",
" else:\n",
" do_cluster = True\n",
" print( 'Clustering results used the different components from those requested.')\n",
"\n",
"# Check that the same number of clusters were used\n",
"if not do_cluster:\n",
" old_clusters = len(np.unique(h5_kmeans['Cluster_Indices']))\n",
" \n",
" if old_clusters==num_clusters:\n",
" print( 'Clustering results used the same number of clusters as requested.')\n",
" else:\n",
" do_cluster = True\n",
" print( 'Clustering results used a different number of clusters from those requested.')\n",
"\n",
"# Perform k-means clustering on the U matrix now using the list of components only if needed:\n",
"if do_cluster:\n",
" t0 = time()\n", " t0 = time()\n",
" h5_kmeans = estimator.do_cluster()\n", " h5_kmeans = estimator.compute()\n",
" print( 'kMeans took {} seconds.'.format(round(time()-t0, 2)))\n", " print('kMeans took {} seconds.'.format(round(time()-t0, 2)))\n",
"else:\n", "else:\n",
" print( 'Using existing results.')\n", " h5_kmeans = estimator.duplicate_h5_groups[-1]\n",
"\n", " print( 'Using existing results.') \n",
" \n",
"print( 'Clustering results in {}.'.format(h5_kmeans.name))\n", "print( 'Clustering results in {}.'.format(h5_kmeans.name))\n",
"\n", "\n",
"half_wind = int(win_size*0.5)\n", "half_wind = int(win_size*0.5)\n",
...@@ -723,7 +711,9 @@ ...@@ -723,7 +711,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Plot dendrogram here\n", "# Plot dendrogram here\n",
...@@ -760,6 +750,7 @@ ...@@ -760,6 +750,7 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {
"collapsed": true,
"scrolled": false "scrolled": false
}, },
"outputs": [], "outputs": [],
...@@ -859,6 +850,7 @@ ...@@ -859,6 +850,7 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {
"collapsed": true,
"scrolled": false "scrolled": false
}, },
"outputs": [], "outputs": [],
...@@ -914,7 +906,9 @@ ...@@ -914,7 +906,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"motif_match_coeffs = list()\n", "motif_match_coeffs = list()\n",
...@@ -947,7 +941,9 @@ ...@@ -947,7 +941,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],
"source": [ "source": [
"show_legend = True\n", "show_legend = True\n",
...@@ -994,6 +990,7 @@ ...@@ -994,6 +990,7 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {
"collapsed": true,
"scrolled": false "scrolled": false
}, },
"outputs": [], "outputs": [],
...@@ -1068,7 +1065,9 @@ ...@@ -1068,7 +1065,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"collapsed": true
},
"outputs": [], "outputs": [],