Commit cd935813 authored by Salko Jr, Robert's avatar Salko Jr, Robert
Browse files

Performance improvements

The DNB summary script was very slow for large files because of poor
h5py data access times.  Get more creative about storing data in memory
to reduce this time.  This resulted in a 5X speedup for the quarter symmetry
pin-resolved test case I tested.
parent 553f36f0
Loading
Loading
Loading
Loading
+82 −61
Original line number Diff line number Diff line
import h5py
import numpy as np


class Hdf5Interface(object):
    """ An interface to the SubKit formatted HDF5 file

@@ -15,27 +14,58 @@ class Hdf5Interface(object):
        assert('CORE' in me.h5)
        assert('STATE_0001' in me.h5)

        if 'time' in me.h5['STATE_0001']:
            me._isTransient = True
        else:
            me._isTransient = False

        # Store some data as global values that will be set the first time the procedure is called
        # This saves us from always calculating the data during init (even if the user never uses it)
        # But keeps us from having to re-access and re-caculate data if the procedure is called multiple times.
        me._allChanSameLevel = None
        me._crossSectionalArea = {} # Takes section number (1-based) and returns cross-sectional area
        me._pinNumRadial = {} # Takes 1-based pin index and returns number of radial nodes

        me.numSection = max(me.h5['/CORE/chan_sections'][()])

        me.numState = 1
        # Get list of keys instead of constantly checking if dataset in h5 file because h5py contains is really slow
        stateDatasets = me.h5.keys()
        while True:
            if 'STATE_{:04d}'.format(me.numState) not in me.h5:
            if 'STATE_{:04d}'.format(me.numState) not in stateDatasets:
                me.numState = me.numState-1
                break
            else:
                me.numState = me.numState+1

        me.numPin = 1
        while True:
            if '/CORE/mesh/PIN_{:05d}'.format(me.numPin) not in me.h5:
                me.numPin = me.numPin-1
                break
        # Takes state number (0-based) and returns time in s
        me._transientTimes = []
        if me._isTransient:
           for state in range(me.numState):
              me._transientTimes.append(me.h5['STATE_{:04d}/time'.format(state+1)][()])

        # Takes state number (1-based) and returns a list of dataset names
        me._stateDatasets = {}
        for state in range(1, me.numState+1):
           me._stateDatasets[state] = me.h5['STATE_{:04d}'.format(state)].keys()

        meshDatasets = sorted(me.h5['CORE/mesh'].keys())
        if 'PIN_' in meshDatasets[-1]:
           me.numPin = int(meshDatasets[-1][4:])
        else:
                me.numPin = me.numPin+1
           me.numPin = 0

        # Takes pin index (1-based) and returns the number of levels in that pin
        me.pinNumAxial = {}
        for pin in range(me.numPin):
            me.pinNumAxial[pin+1] = len(me.h5['CORE/mesh/PIN_{:05d}/axial_nodes'.format(pin+1)])

        # Takes pin index (1-based) and returns number of sectors
        me.pinNumSector = {}
        for pin in range(me.numPin):
           me.pinNumSector[pin+1] = me.h5['CORE/mesh/PIN_{:05d}/azimuthal_fraction'.format(pin+1)].shape[1]


        # Create a mask for pin surface datasets, which can be used to extract relevant data
        # True means the data is irrelevant
        # For now we assum that dnbr will always be in the file (if there are pins in the model)
@@ -47,13 +77,9 @@ class Hdf5Interface(object):
              nsec = me.getPinNumSector(pin)
              me.surfMask[0:nlev, 0:nsec, pin-1] = False

        me.numCh = 1
        while True:
            if '/CORE/mesh/CHAN_{:05d}'.format(me.numCh) not in me.h5:
                me.numCh = me.numCh-1
                break
            else:
                me.numCh = me.numCh+1
        lastCh = meshDatasets[-me.numPin-1]
        assert 'CHAN_' in lastCh
        me.numCh = int(lastCh[5:])

        # Create a mask for channel datasets
        # Need one for scalar data and one for momentum data
@@ -65,7 +91,7 @@ class Hdf5Interface(object):
              me.chanScalarMask[0:nlev+1, chan-1] = False

        # Create a mask for pin internal data
        if 'STATE_0001/pin_temp' in me.h5:
        if me.numPin>0:
           me.pinInternalMask = np.ones(me.h5['STATE_0001/pin_temp'].shape, dtype=bool)
           me.pinInternalMask[:, :, :, :] = True
           for pin in range(1, me.numPin+1):
@@ -105,7 +131,6 @@ class Hdf5Interface(object):
                if 'mesh_type' in dataset.attrs:
                    me.chanDatasetMeshTypes[datasetName] = dataset.attrs['mesh_type'][0]


    def getNumPin(me):
        """ Returns the total number of pins in the model """
        return me.numPin
@@ -116,10 +141,7 @@ class Hdf5Interface(object):

    def isTransient(me):
        """ Returns True if this simulation is a transient """
        if 'time' in me.h5['STATE_0001']:
            return True
        else:
            return False
        return me._isTransient

    def getTransientTime(me, state):
        """ Returns the transient time for this state
@@ -128,8 +150,9 @@ class Hdf5Interface(object):
           state (int) : State number (1-based)

        """
        me._stateValid(state)
        assert(me.isTransient())
        return me.h5['STATE_{:04d}/time'.format(state)][()]
        return me._transientTimes[state-1]

    def getTransientTimes(me):
        """ Returns a list of times in the transient
@@ -137,10 +160,7 @@ class Hdf5Interface(object):
        Will be None if not a transient.
        """
        if me.isTransient():
            times = []
            for state in range(1, me.getNumState()+1):
                times.append(me.getTransientTime(state))
            return times
            return me._transientTimes


    def _sectionValid(me, section):
@@ -201,13 +221,13 @@ class Hdf5Interface(object):
        """
        if state is None:
            for state_ in range(1, me.getNumState()+1):
                if dataset in me.h5['STATE_{:04d}'.format(state_)]:
                if dataset in me._stateDatasets[state_]:
                    return
            raise ValueError("Dataset "+str(dataset) +
                            " is not found in any states in the model")
        else:
            me._stateValid(state)
            if dataset not in me.h5['STATE_{:04d}'.format(state)]:
            if dataset not in me._stateDatasets[state]:
                raise ValueError("Dataset "+str(dataset) +
                                 " is not found in STATE_{:04d}".format(state))

@@ -219,18 +239,6 @@ class Hdf5Interface(object):
            raise ValueError("Level "+str(level)+" is not in STATE_{:04d}/{:s}, channel {:d}".format(state,
                dataset, ch))

#   def getChanNumScalarLev(me, ch):
#      """ Returns the number of scalar levels (including ghost levels) in the channel
#
#      Args:
#         ch (int) : Channel index (1-based)
#
#      """
#      if not me._chIndexValid(ch):
#          raise ValueError("Channel index "+str(ch)+" is not valid")
#      path = "CORE/mesh/CHAN_{:05d}/scalar_bounds".format(ch)
#      return len(me.h5[path])+1

    def getPinNumLev(me, pin):
        """ Returns the number of axial levels in the pin

@@ -333,7 +341,9 @@ class Hdf5Interface(object):

        """
        me._pinIndexValid(pin)
        return len(me.h5['CORE/mesh/PIN_{:05d}/radial_nodes'.format(pin)])
        if pin not in me._pinNumRadial:
           me._pinNumRadial[pin] = len(me.h5['CORE/mesh/PIN_{:05d}/radial_nodes'.format(pin)])
        return me._pinNumRadial[pin]

    def getPinRadialNodes(me, pin):
        """ Returns the radial node locations in the pin
@@ -353,10 +363,11 @@ class Hdf5Interface(object):

        """
        me._pinIndexValid(pin)
        return me.h5['CORE/mesh/PIN_{:05d}/azimuthal_fraction'.format(pin)].shape[1]
        return me.pinNumSector[pin]

    def _allChanSameLevels(me):
        """ Returns True if all channels in the model have the same number of levels."""
        if me._allChanSameLevel is not None: return me._allChanSameLevel
        nlev = None
        for ch in range(1, me.getNumChan()+1):
            path = "CORE/mesh/CHAN_{:05d}/scalar_bounds".format(ch)
@@ -365,8 +376,9 @@ class Hdf5Interface(object):
            else:
                thisLev = len(me.h5[path])
                if nlev!=thisLev:
                    return False
        return True
                    me._allChanSameLevel = False
        me._allChanSameLevel = True
        return me._allChanSameLevel

    def getNumState(me):
        """ Returns the total number of states in the model """
@@ -374,7 +386,7 @@ class Hdf5Interface(object):

    def getNumSections(me):
        """ Returns the total number of axial sections in the model """
        return max(me.h5['/CORE/chan_sections'][()])
        return me.numSection

    def getChansInSection(me, section=1):
        """ Returns a list of the channel IDs in the passed section.
@@ -398,9 +410,12 @@ class Hdf5Interface(object):

        """
        assert(isinstance(section, int))
        if section not in me._crossSectionalArea:
           area = 0.0
           chans = me.getChansInSection(section)
        return np.sum(me.h5['CORE/chan_area'][list(np.array(chans)-1)])
           me._crossSectionalArea[section] = np.sum(me.h5['CORE/chan_area'][list(np.array(chans)-1)])
        return me._crossSectionalArea[section]


    def _areaWeightAverage(me, dataset, chans, level, state):
        """ Calculates an area-weighted average of the specified dataset
@@ -413,9 +428,9 @@ class Hdf5Interface(object):

        """
        chID = list(np.array(chans)-1)
        data = me.h5['/STATE_{:04d}/{:s}'.format(state, dataset)][level, chID][()]
        data = me.h5['/STATE_{:04d}/{:s}'.format(state, dataset)][()]
        area = me.h5["/CORE/chan_area"][()][chID]
        return np.average(data, weights=area)
        return np.average(data[level, chID], weights=area)

    def getAvePressure(me, section=1, level=None, state=1):
        """ Returns the average pressure (area weighted)
@@ -440,10 +455,11 @@ class Hdf5Interface(object):
    def _sumChanDataset(me, dataset, state, chans, level):
        """ Gets the sum of the dataset for passed state, chans, and level"""
        chID = np.array(chans)-1
        return np.sum(me.h5['STATE_{:04d}/{:s}'.format(state, dataset)][level, chID])
        data = me.h5['STATE_{:04d}/{:s}'.format(state, dataset)][()]
        return np.sum(data[level, chID])

    def getAveMassFlux(me, section=1, level=None, state=1):
        """ Returns the average mass flux (area weighted)
        """ Returns the average mass flux (area weighted) for a given level

        Args:
           section (int) : Section index from which pressure will be calculated.  Defaults to 1. (1-based)
@@ -628,7 +644,7 @@ class Hdf5Interface(object):
           return (None, None, None)

    def getAveTemp(me, section=1, level=None, state=1):
        """ Returns the average temperature (flow weighted)
        """ Returns the average temperature (flow weighted) for a given level

        Args:
           section (int) : Section index from which data will be calculated.  Defaults to 1. (1-based)
@@ -642,16 +658,21 @@ class Hdf5Interface(object):
            chans = me.getChansInSection(me.getNumSections())
            _level = me.getChanDatasetNumLev(chans[0], 'chan_temp')-1
        else:
            # Pick a random channel in this section to check if the passed level is valid
            chans = me.getChansInSection(section)
            me._chLevelValid('chan_pressure', state, chans[0], level)
            _level = level
        chID = list(np.array(chans)-1)
        T     = me.h5['/STATE_{:04d}/chan_temp'.format(state)][_level, chID][()]
        mdotl = me.h5['/STATE_{:04d}/chan_mdot_liq'.format(state)][_level, chID][()]
        mdotv = me.h5['/STATE_{:04d}/chan_mdot_vap'.format(state)][_level, chID][()]
        mdotd = me.h5['/STATE_{:04d}/chan_mdot_drp'.format(state)][_level, chID][()]
        mdotTot = mdotl+mdotv+mdotd
        return np.average(T, weights=mdotTot)
        chID = np.array(chans)-1
        T     = me.h5['/STATE_{:04d}/chan_temp'.format(state)][()]
        mdotl = me.h5['/STATE_{:04d}/chan_mdot_liq'.format(state)][()]
        mdotv = me.h5['/STATE_{:04d}/chan_mdot_vap'.format(state)][()]
        mdote = me.h5['/STATE_{:04d}/chan_mdot_drp'.format(state)][()]
        mdotTot = mdotl[_level, chID]+mdotv[_level, chID]+mdote[_level, chID]
        #mdotTot = me.h5['/STATE_{:04d}/chan_mdot_liq'.format(state)][_level, chID]+\
        #        me.h5['/STATE_{:04d}/chan_mdot_vap'.format(state)][_level, chID]+\
        #        me.h5['/STATE_{:04d}/chan_mdot_drp'.format(state)][_level, chID]
        #mdotTot = mdotl+mdotv+mdotd
        return np.average(T[_level, chID], weights=mdotTot)


    def _getPinAxialMax(me, dataset, state=None, pin=None):
+4 −1
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@ import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
import SubKit
import SubKit.utils.UnitConversions as unit
from SubKit.utils.Timer import eTime

class SummaryTools(object):
    """ Set of tools for generating text summary files from the HDF5 file
@@ -13,6 +14,8 @@ class SummaryTools(object):

    """
    def __init__(me, h5file):
        me.time = eTime()
        print me.time.elapsed(), "Initializing file"
        me.h5 = Hdf5Interface(h5file)

    def genDNBSummary(me, fname=None, units=None):
@@ -48,7 +51,7 @@ class SummaryTools(object):
            f.write(unitsB)
        f.write("  |==================================|================================|==========================================================|=======|\n")
        for state in range(1, me.h5.getNumState()+1):
            print "Processing state {:8d}".format(state)
            print me.time.elapsed(), "Processing state {:8d}".format(state)
            minState, minPin, level, theta, minDNBR = me.h5.getPinWithMinDNBR(state)
            minDNBR = min(minDNBR, 100.0)
            hotChID = me.h5.getConnChan(minPin, level, theta)

SubKit/utils/Timer.py

0 → 100755
+15 −0
Original line number Diff line number Diff line
# A simple utility for keeping track of time

import time

class eTime:
   """ A utility for keeping track of elapsed time """
   def __init__(me):
      me.start = time.time()
      me.last = me.start
   def elapsed(me):
      """ Returns the time since the object was initialized in HH:MM:SS format """
      m, s = divmod(time.time()-me.start, 60)
      h, m = divmod(m, 60)
      return "{0:02.0f}:{1:02.0f}:{2:02.0f}".format(h, m, s)