Commit 8e326fbb authored by Moreland, Ken's avatar Moreland, Ken
Browse files

Make bubble finding a bit better and faster

Use objects to manage holding bubbles and atoms.
parent de7a7f08
Loading
Loading
Loading
Loading
+193 −195
Original line number Diff line number Diff line
%% Cell type:code id:0e02bb89-8003-47a2-839f-58536b067490 tags:

``` python
# Some parameters for the behavior of the extraction

# Specifies the spacing of the sampling grid.
lattice_parameter = 3.177
# The radius to use for a He splat
He_radius = 0.01
# Number of He atoms that have to escape for a bubble to be
# considered burst
burst_size = 100
# Directory containing VTK files holding atoms
# Minimum number of He atoms that have to escape for a bubble to be
# considered burst.
burst_size = 25
# Minimum fraction of He atoms in a bubble that have to escape for
# the buble to be considered burst.
burst_fraction = 0.75
# Number of time steps to pass to check for escaped atoms.
time_to_escape = 10
# Directory containing VTK files holding atoms.
#data_path = '/Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm/vtk-files'
data_path = '/Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files'
```

%% Cell type:code id:21fe4234-a349-402d-afdf-c9cb9a04f36a tags:

``` python
import sys
sys.path.append('/Users/4d5/builds/VTK/Debug/lib/python3.11/site-packages')
sys.path.append('/Users/4d5/builds/VTK/Release/lib/python3.11/site-packages')
import vtk

import numpy as np
from vtk.util.numpy_support import vtk_to_numpy
```

%% Cell type:code id:9eaffe57-7c2c-401b-a253-7ee2628e1160 tags:
%% Cell type:code id:ed2a82cd-2c92-4e73-a56b-06fd2ffd931b tags:

``` python
def read_he_atoms(filename):
    '''Given a vtk file containing points from a LAMMPS atom file, creates a
point cloud containing a point and vertex cell for each He atom. All W
atoms are removed. This is returned as a `vtkUnstructuredGrid`.'''
    reader = vtk.vtkDataSetReader()
    reader.SetFileName(filename)

    tocloud = vtk.vtkConvertToPointCloud()
    tocloud.SetInputConnection(reader.GetOutputPort())
    tocloud.SetCellGenerationMode(tocloud.VERTEX_CELLS)

    extract_he = vtk.vtkThreshold()
    extract_he.SetInputConnection(tocloud.GetOutputPort())
    extract_he.SetInputArrayToProcess(
        0, 0, 0, 'vtkDataObject::FIELD_ASSOCIATION_POINTS', 'type')
    extract_he.SetLowerThreshold(1.9)
    extract_he.SetUpperThreshold(2.1)

    extract_he.Update()
    return extract_he.GetOutput()
```

%% Cell type:code id:07203a73-966e-4e20-a766-b5cf27f43f59 tags:

``` python
def find_bubbles(atoms):
    '''Given a VTK data object containing a point cloud of atoms, identify
which atoms together form a bubble by clustering nearby atoms. Each
bubble is given a unique identifier. This function returns a VTK data
object of the same structure as the input with a `RegionId` field added
that identifies which bubble each atom belongs to.'''
    # If there are no atoms, then there is nothing to find.
    if atoms.GetNumberOfPoints() < 1:
        return atoms

    # Create a density field of atoms by splatting the atoms into a grid.
    # Here we use `vtkGaussianSplatter`. If this is taking too long, it
    # is possible to speed things up using a `vtkFastSplatter`.
    bounds = atoms.GetBounds()
    sampling_resolution = (
        int((bounds[1] - bounds[0]) / (lattice_parameter / 2)),
        int((bounds[3] - bounds[2]) / (lattice_parameter / 2)),
        int((bounds[5] - bounds[4]) / (lattice_parameter / 2)))

    splatter = vtk.vtkGaussianSplatter()
    splatter.SetInputDataObject(atoms)
    splatter.SetSampleDimensions(sampling_resolution)
    splatter.SetRadius(He_radius)
    splatter.SetNormalWarping(0)
    splatter.SetAccumulationModeToSum()

    # Use an image connectivity filter to find regions with nearby atoms.
    # The filter will assign each connected region a unique ID and write
    # that to point data.
    connectivity = vtk.vtkImageConnectivityFilter()
    connectivity.SetInputConnection(splatter.GetOutputPort())
    connectivity.SetLabelScalarTypeToInt()
    #Lowering threshold to include regions to prevent missing atoms at edge
    # of bubble.
    connectivity.SetScalarRange(0.1, 100000000000)

    # Frustratingly, there appears to be a bug in `vtkImageConnectivityFilter`
    # that does not properly pass the origin and spacing of the image, which
    # means that it will no longer align with the atom data. Restore it to
    # the proper values.
    splatter.Update()
    splatter_output = splatter.GetOutput()
    reset_position = vtk.vtkImageChangeInformation()
    reset_position.SetInputConnection(connectivity.GetOutputPort())
    reset_position.SetOutputOrigin(splatter_output.GetOrigin())
    reset_position.SetOutputSpacing(splatter_output.GetSpacing())

    # Sample the connectivity field back onto the atoms that generated them.
    # This will add a `RegionId` field to the atoms that identifes which
    # cluster each one belongs to.
    probe = vtk.vtkProbeFilter()
    probe.SetInputDataObject(0, atoms)
    probe.SetInputConnection(1, reset_position.GetOutputPort())
    probe.CategoricalDataOn()
    probe.PassPointArraysOn()

    probe.Update()
    return probe.GetOutput()
```

%% Cell type:code id:0c64f950-83bb-4c26-9f13-87aa550ffc3f tags:

``` python
def make_bubble_dict(atoms):
    '''Given a VTK data object containing a point cloud of atoms and a
`RegionId` field identifying a containing bubble for each atom (e.g.
atoms read with `read_he_atoms` and then processed with `find_bubbles`),
returns a dictionary with a key for each bubble id. Each value is a set
of the ids of atoms in that bubble.'''
    if atoms.GetNumberOfPoints() < 1:
        return {}
    bubble_ids = atoms.GetPointData().GetArray('RegionId')
    atom_ids = atoms.GetPointData().GetArray('id')
    bubbles = {}
    for bubble_id in range(1, bubble_ids.GetValueRange()[1] + 1):
        bubbles[bubble_id] = set()
    for index in range(bubble_ids.GetNumberOfValues()):
        bubble_id = bubble_ids.GetValue(index)
        # Note: if bubble_id == 0, then the atom is not part of a bubble
        if bubble_id > 0:
            atom_id = int(atom_ids.GetTuple1(index))
            bubbles[bubble_id].add(atom_id)
    return bubbles
```

%% Cell type:code id:e11aa19a-4946-4468-80dd-13546d7a2b68 tags:

``` python
def find_burst_bubbles(bubbles, next_atoms):
    '''Given a dictionary of bubbles (created by `make_bubble_dict` and a VTK
data object containing the atoms for the next timestep, find bubbles that
appear to have burst. When a bubble bursts, the atoms become evacuated
from their cavity into a vacuum and disappear from the simulation. When
a large number of atoms in a bubble are not in the next timestep, that is
class Bubble:
    '''Holds data for a bubble of atoms.

Bubble is constructed with a vtkDataSet that contains the atoms
of the bubble. Alternately, it can be constructed with a set of
classified atoms and the identifier of the atom in the `RegionId`
field (as comes from find_bubbles).'''
    def __init__(self, data, bubble_id = 0):
        if (bubble_id < 1):
            self.data = data
        else:
            threshold = vtk.vtkThreshold()
            threshold.SetInputData(data)
            threshold.SetInputArrayToProcess(
                0, 0, 0,
                'vtkDataObject::FIELD_ASSOCIATION_POINTS', 'RegionId')
            threshold.SetLowerThreshold(bubble_id - 0.1)
            threshold.SetUpperThreshold(bubble_id + 0.1)
            threshold.Update()
            self.data = threshold.GetOutput()
        self._center = None

    def get_number_of_atoms(self):
        return self.data.GetNumberOfPoints()

    def get_center(self):
        if not self._center:
            compute_center = vtk.vtkCenterOfMass()
            compute_center.SetInputData(self.data)
            compute_center.Update()
            self._center = compute_center.GetCenter()
        return self._center
```

%% Cell type:code id:122aa890-fdfe-4f24-9726-ec4fa0f9e8f6 tags:

``` python
class HeAtoms:
    '''Holds information about one timestep of He atoms.

This class is initialized by providing a VTK dataset representing
a LAMMPS atom file containing an `id` field giving unique identifiers
to atoms and a `type` field identifying the type of each atom
(`type` == 2 is He atoms). The data needs no cell data. The constructor
must also be given a timestep.

Use the `ReadVTKFile()` method to load the data from a VTK data set.

The `bubbles` field contains a list of `Bubble` objects holding the
extracted bubbles. The `timestep` field contains the time of the
atoms. The `filename` field provides the file the atoms came from,
when available.'''
    def ReadVTKFile(filename):
        import re
        timestep_match = re.search(r'([0-9]+)\.vtk', filename)
        timestep = int(timestep_match.group(1))

        reader = vtk.vtkDataSetReader()
        reader.SetFileName(filename)
        reader.Update()

        return HeAtoms(reader.GetOutput(), timestep, filename)

    def __init__(self, data, timestep, filename=None):
        self.timestep = timestep
        self.filename = filename

        # Many filters expect the data to have cells. Make one cell per point.
        tocloud = vtk.vtkConvertToPointCloud()
        tocloud.SetInputData(data)
        tocloud.SetCellGenerationMode(tocloud.VERTEX_CELLS)

        # Extract the He atoms.
        extract_he = vtk.vtkThreshold()
        extract_he.SetInputConnection(tocloud.GetOutputPort())
        extract_he.SetInputArrayToProcess(
            0, 0, 0, 'vtkDataObject::FIELD_ASSOCIATION_POINTS', 'type')
        extract_he.SetLowerThreshold(1.9)
        extract_he.SetUpperThreshold(2.1)

        # Create a density field of atoms by splatting the atoms into a grid.
        # Here we use `vtkGaussianSplatter`. If this is taking too long, it
        # is possible to speed things up using a `vtkFastSplatter`.
        bounds = data.GetBounds()
        sampling_resolution = (
            int((bounds[1] - bounds[0]) / (lattice_parameter / 2)),
            int((bounds[3] - bounds[2]) / (lattice_parameter / 2)),
            int((bounds[5] - bounds[4]) / (lattice_parameter / 2)))

        splatter = vtk.vtkGaussianSplatter()
        splatter.SetInputConnection(extract_he.GetOutputPort())
        splatter.SetSampleDimensions(sampling_resolution)
        splatter.SetRadius(He_radius)
        splatter.SetNormalWarping(0)
        splatter.SetAccumulationModeToSum()

        # Use an image connectivity filter to find regions with nearby atoms.
        # The filter will assign each connected region a unique ID and write
        # that to point data.
        connectivity = vtk.vtkImageConnectivityFilter()
        connectivity.SetInputConnection(splatter.GetOutputPort())
        connectivity.SetLabelScalarTypeToInt()
        connectivity.SetLabelModeToSizeRank()
        # Lowering threshold to include regions to prevent missing atoms at edge
        # of bubble.
        connectivity.SetScalarRange(0.1, 100000000000)

        # Frustratingly, there appears to be a bug in `vtkImageConnectivityFilter`
        # that does not properly pass the origin and spacing of the image, which
        # means that it will no longer align with the atom data. Restore it to
        # the proper values.
        splatter.Update()
        splatter_output = splatter.GetOutput()
        reset_position = vtk.vtkImageChangeInformation()
        reset_position.SetInputConnection(connectivity.GetOutputPort())
        reset_position.SetOutputOrigin(splatter_output.GetOrigin())
        reset_position.SetOutputSpacing(splatter_output.GetSpacing())

        # Sample the connectivity field back onto the atoms that generated them.
        # This will add a `RegionId` field to the atoms that identifes which
        # cluster each one belongs to.
        probe = vtk.vtkProbeFilter()
        probe.SetInputConnection(0, extract_he.GetOutputPort())
        probe.SetInputConnection(1, reset_position.GetOutputPort())
        probe.CategoricalDataOn()
        probe.PassPointArraysOn()

        connectivity.Update()
        bubble_sizes = connectivity.GetExtractedRegionSizes()
        probe.Update()
        self.atoms = probe.GetOutput()

        self.bubbles = []
        for bubble_index in range(1, bubble_sizes.GetNumberOfValues()):
            if bubble_sizes.GetValue(bubble_index) < burst_size:
                break
            self.bubbles.append(Bubble(self.atoms, bubble_index))
```

%% Cell type:code id:034539e1-5b21-4952-ae11-eca00cd35283 tags:

``` python
def find_burst_bubbles(reference_atoms, future_atoms):
    '''Given an `HeAtoms` object at a reference time and another such object
at a future timestep, determine which bubbles in the reference time
have burst. When a bubble bursts, the atoms become evacuated from their
cavity into a vacuum and disappear from the simulation. When a large
number of atoms in a bubble are not in the next timestep, that is
indicitive of the bubble bursting.

The function returns a dictionary with the keys being ids of bubbles that
have burst and the values being the number of atoms that went missing.'''
    # Create a set of all the atom ids in the next timestep for easy finding.
    atom_ids = next_atoms.GetPointData().GetArray('id')
    next_atom_set = set()
    for index in range(atom_ids.GetNumberOfValues()):
        next_atom_set.add(int(atom_ids.GetTuple1(index)))

    # Iterate over all bubbles from the previous timestep and count how many
    # have suddenly disappeared. If the count is above the `burst_size`
    # threshold, record it.
    burst_bubbles = {}
    for bubble_id, bubble_set in bubbles.items():
        if len(bubble_set) < burst_size:
            continue
        num_gone = 0
        for atom_id in bubble_set:
            if not atom_id in next_atom_set:
                num_gone += 1
        if num_gone >= burst_size:
            burst_bubbles[bubble_id] = num_gone

This function returns a list of `Bubble` objects that have been detected
as burst.'''
    burst_bubbles = []
    future_ids = \
        vtk_to_numpy(future_atoms.atoms.GetPointData().GetArray('id'))
    for bubble in reference_atoms.bubbles:
        reference_ids = \
            vtk_to_numpy(bubble.data.GetPointData().GetArray('id'))
        num_gone = np.isin(reference_ids, future_ids,
                           assume_unique=True, invert=True).sum()
        fraction = float(num_gone)/len(reference_ids)
        if (num_gone >= burst_size) and (fraction >= burst_fraction):
            print(num_gone, fraction)
            burst_bubbles.append(bubble)
    return burst_bubbles
```

%% Cell type:code id:0f551511-82e4-4e98-8557-6315b7beacb3 tags:

``` python
# Iterate over all the files in the data path and find potential bubble bursts.
bubble_dict = None
max_loss = 0
max_loss_ratio = 0.0
import queue
bubble_queue = queue.Queue(time_to_escape - 1)

import glob
for filename in sorted(glob.glob(data_path + '/*.vtk')):
    he_atoms = read_he_atoms(filename)
    if bubble_dict:
        burst_bubbles = find_burst_bubbles(bubble_dict, he_atoms)
        if len(burst_bubbles) > 0:
            print(filename)
            print(burst_bubbles)
        for bubble_id, loss in burst_bubbles.items():
            max_loss = max(max_loss, loss)
            loss_ratio = float(loss)/float(len(bubble_dict[bubble_id]))
            max_loss_ratio = max(max_loss_ratio, loss_ratio)
    bubbles = find_bubbles(he_atoms)
    bubble_dict = make_bubble_dict(bubbles)

print(max_loss)
print(max_loss_ratio)
for filename in sorted(glob.glob(data_path + '/*-0890*.vtk')):
    next_atoms = HeAtoms.ReadVTKFile(filename)
    if bubble_queue.full():
        reference_atoms = bubble_queue.get_nowait()
        burst_bubbles = find_burst_bubbles(reference_atoms, next_atoms)
        for bubble in burst_bubbles:
            print(reference_atoms.timestep, bubble.get_center(), filename)
    bubble_queue.put_nowait(next_atoms)
```

%% Output

    /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-084475.vtk
    {100: 167}
    /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-084476.vtk
    {21: 151}
    /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-091829.vtk
    {64: 164}
    /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-091830.vtk
    {26: 359}
    /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-091831.vtk
    {20: 185}
    /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-091832.vtk
    {13: 113}
    /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-094432.vtk
    {52: 299}
    /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-094433.vtk
    {22: 205}
    /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-094434.vtk
    {61: 110}
    /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-095042.vtk
    {58: 102}
    /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-098328.vtk
    {126: 351}
    /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-098329.vtk
    {33: 380}
    /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-098330.vtk
    {32: 160}
    380
    0.8429752066115702
    129 0.8164556962025317
    89018 (219.2584626403036, 128.282757288293, 4.523309201847974) /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-089027.vtk
    133 0.8417721518987342
    89019 (219.4156682461123, 128.26865285559546, 4.131269966311092) /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-089028.vtk
    134 0.8375
    89020 (219.19135427474976, 128.24084725379944, 3.909063812252134) /Users/4d5/data/PSI/karl-hammond-2023/110-25x25nm_detailed/vtk-files/110-25x25nm-detailed-089029.vtk

%% Cell type:code id:8c9e9b81-4315-49d6-b409-a9e45c8ffbc6 tags:
%% Cell type:code id:9dc7cc17-3cf3-4afd-a024-6ac6f7da8fbb tags:

``` python
```