Skip to content
Snippets Groups Projects
Commit abd4ce52 authored by David Pugmire's avatar David Pugmire
Browse files

Add parallel (using threads) histogram compute to speed loading.

parent 136b21bf
No related branches found
No related tags found
1 merge request!13Resolve "Improve computation of histogram on large data"
Pipeline #669537 failed
......@@ -7,6 +7,10 @@ from pathlib import Path
from time import time
from typing import Optional
import numpy as np
import threading
from concurrent.futures import ThreadPoolExecutor
import numpy as np
import vtkmodules.vtkRenderingVolumeOpenGL2 # noqa
from natsort import natsorted
......@@ -22,11 +26,44 @@ from vtkmodules.vtkRenderingCore import (
vtkVolumeProperty,
)
from vtkmodules.vtkRenderingVolume import vtkGPUVolumeRayCastMapper
from vtkmodules.util import numpy_support
from ctscan_viz.models.config import SharedConfig
PRESETS = {item.get("Name"): item for item in json.loads(Path(__file__).with_name("preset.json").read_text())}
class IncrementalHistogram:
def __init__(self, bin_edges):
self.bin_edges = bin_edges
self.histogram = np.zeros(len(bin_edges) - 1, dtype=np.int64)
self.lock = threading.Lock() # Ensure thread-safe updates
def compute_histogram(self, sub_array):
# Compute histogram for a given sub-array.
hist, _ = np.histogram(sub_array, bins=self.bin_edges)
with self.lock: # Ensure safe updates across threads
self.histogram += hist
def add_images(self, arr, num_threads=4):
#Splits array arr into num_threads views (no copy) and processes them in parallel.
# convert vtk array to numpy array
arr_np = numpy_support.vtk_to_numpy(arr)
# Create non-copying views of arr_np for parallel processing
chunk_size = len(arr_np) // num_threads
sub_arrays = [arr_np[i * chunk_size:(i + 1) * chunk_size] for i in range(num_threads)]
# The last chunk takes the remainder elements
if len(arr_np) % num_threads:
sub_arrays.append(arr_np[num_threads * chunk_size:])
# Run histogram computation in parallel
with ThreadPoolExecutor(max_workers=num_threads) as executor:
executor.map(self.compute_histogram, sub_arrays)
def get_histogram(self):
return self.histogram, self.bin_edges
class MainModel:
"""Main model."""
......@@ -155,6 +192,7 @@ class MainModel:
end = time()
print(f" => {end - start:.2f}s")
start = time()
# Extract data stats
num_histogram_bins = 100
print(f"Dataset information:\n - Shape: {dataset.extent}\n - Memory size: {dataset.GetActualMemorySize()}")
......@@ -162,12 +200,30 @@ class MainModel:
name = scalars.name
min, max = map(partial(round, ndigits=5), scalars.GetRange())
print(f" - {name}: [{min}, {max}]")
bins, bin_edges = np.histogram(scalars, bins=num_histogram_bins, range=(min, max))
bin_centers = (bin_edges[1:] + bin_edges[:-1]) / 2
# bins[0] = 0 # Remove first bin (usually 0 or empty data)
end = time()
print(f" =LOAD=> {end - start:.2f}s")
use_new_histogram = True
if use_new_histogram :
print(' ********** using parallel histogram')
## do it with some threads
bin_edges = np.linspace(min,max, num_histogram_bins)
histogramer = IncrementalHistogram(bin_edges)
histogramer.add_images(scalars, num_threads=16)
bins, bin_edges = histogramer.get_histogram()
else :
print(' ********** using serial histogram')
bins, bin_edges = np.histogram(scalars, bins=num_histogram_bins, range=(min, max))
bin_centers = (bin_edges[1:] + bin_edges[:-1]) / 2
# bins[0] = 0 # Remove first bin (usually 0 or empty data)
end = time()
print(f" =HIST=> {end - start:.2f}s")
max_count = bins.max()
# bins = bins * 100 / max_count
print(f" - Bins={bins.tolist()}")
#print(f" - Bins={bins.tolist()}")
bin_centers = (bin_edges[1:] + bin_edges[:-1]) / 2
# Find mins and maxes
min_counts = [0]
......@@ -204,7 +260,7 @@ class MainModel:
for i in range(len(max_counts) - 1, -1, -1):
left_size = max_counts[i] - min_counts[i]
right_size = max_counts[i] - min_counts[i + 1]
print(left_size, right_size)
#print(left_size, right_size)
if (left_size < diff_threshold) or (right_size < diff_threshold):
del max_counts[i]
del max_values[i]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment