Repack improvement

After calling repack, code checks the last modification time vs the current time. Only proceeds to delete old file and move repacked file after 1s has passed.

Repack improvement
After calling repack, code checks the last modification time vs the current time. Only proceeds to delete old file and move repacked file after 1s has passed.
71a67369 · Chris Smith · d0f6fbf8 · 71a67369
Commit 71a67369 authored 8 years ago by Chris Smith
--- a/pycroscopy/io/io_hdf5.py
+++ b/pycroscopy/io/io_hdf5.py
@@ -8,7 +8,7 @@ Main Class in charge of writing/reading to/from hdf5 file.
 import os
 import subprocess
 import sys
-from time import sleep
+from time import time, sleep
 from warnings import warn

 import h5py
@@ -57,7 +57,7 @@ class ioHDF5(object):
                return
            self.file = file_handle.file
            self.path = file_handle.filename
-        
+
    def clear(self):
        '''
        Clear h5.file of all contents
@@ -70,7 +70,7 @@ class ioHDF5(object):
        self.file.clear()
        self.repack()

-    
+
    def repack(self):
        '''
        Uses the h5repack command to recover cleared space in an hdf5 file.
@@ -84,14 +84,18 @@ class ioHDF5(object):
        Repack the opened hdf5 file into a temporary file
        '''
        try:
-            repack_line = 'h5repack '+self.path+' '+tmpfile
+            repack_line = ' '.join(['h5repack',self.path,tmpfile])
            subprocess.check_output(repack_line,
                                    stderr=subprocess.STDOUT,
                                    shell=True)
-            sleep(2)
+            # Check that the file is done being modified
+            while time()-os.stat(tmpfile).st_mtime <= 1:
+                sleep(0.5)
        except subprocess.CalledProcessError as err:
            print('Could not repack hdf5 file')
            raise Exception(err.output)
+        except:
+            raise

        '''
        Delete the original file and move the temporary file to the originals path
@@ -114,17 +118,17 @@ class ioHDF5(object):
    def close(self):
        '''Close h5.file'''
        self.file.close()
-        
+
    def delete(self):
        ''' Delete h5.file'''
        self.close()
        os.remove(self.path)
-        
+
    def flush(self):
        '''Flush data from memory and commit to file. 
        Use this after manually inserting data into the hdf dataset'''
        self.file.flush()
-        
+
    def writeData(self, data, print_log=False):
        '''
        Writes data into the hdf5 file and assigns data attributes such as region references.
@@ -140,18 +144,18 @@ class ioHDF5(object):
        refList : List of HDF5dataset or HDF5Datagroup references
            References to the objects written
        '''
-        
+
        f = self.file
-        
+
        f.attrs['PySPM version']=version
-        
+
        # Checking if the data is an MicroDataGroup object
-        if not isinstance(data, MicroDataGroup): 
+        if not isinstance(data, MicroDataGroup):
            warn('Input of type: {} \n'.format(type(data)))
            sys.exit("Input not of type MicroDataGroup.\n We're done here! \n")
-             
+
        # Figuring out if the first item in AFMData tree is file or group
-        if data.name is '' and data.parent is '/': 
+        if data.name is '' and data.parent is '/':
            # For file we just write the attributes
            for key in data.attrs.iterkeys():
                f.attrs[key] = data.attrs[key]
@@ -184,12 +188,12 @@ class ioHDF5(object):
                g.attrs[key] = data.attrs[key]
            if print_log: print('Wrote attributes to group: {} \n'.format(data.name))
            root = g.name
-                    
+
        # Populating the tree structure recursively
        refList = []
        # Recursive function
        def __populate(child, parent):
-            
+
            if isinstance(child, MicroDataGroup):
                if child.indexed:
                    previous = np.where([child.name in key for key in f[parent].keys()])[0]
@@ -198,7 +202,7 @@ class ioHDF5(object):
                    else:
                        last = f[parent].keys()[previous[-1]]
                        index = int(last.split('_')[-1])+1
-                    child.name+='{:03d}'.format(index)                
+                    child.name+='{:03d}'.format(index)
                try:
                    itm = f[parent].create_group(child.name)
                    if print_log: print('Created Group {}'.format(itm.name))
@@ -219,10 +223,10 @@ class ioHDF5(object):
                        # finite sized dataset and maxshape is not provided
                        # Typically for small / ancilliary datasets
                        try:
-                            itm = f[parent].create_dataset(child.name, 
-                                                        data = child.data, 
+                            itm = f[parent].create_dataset(child.name,
+                                                        data = child.data,
                                                        compression = child.compression,
-                                                        dtype = child.data.dtype, 
+                                                        dtype = child.data.dtype,
                                                        chunks= child.chunking)
                        except RuntimeError:
                            itm = f[parent][child.name]
@@ -234,23 +238,23 @@ class ioHDF5(object):
                        # Here, we only allocate the space. The provided data is ignored
                        # print child.name
                        try:
-                            itm = f[parent].create_dataset(child.name, child.maxshape, 
+                            itm = f[parent].create_dataset(child.name, child.maxshape,
                                                        compression = child.compression,
-                                                        dtype = child.dtype, 
+                                                        dtype = child.dtype,
                                                        chunks= child.chunking)
                        except RuntimeError:
                            itm = f[parent][child.name]
                            warn('Found Dataset already exists {}'.format(itm.name))
                        except:
-                            raise                        
-                else:                
+                            raise
+                else:
                    # Resizable but the written files are significantly larger
                    max_shape = tuple([ None for i in range(len(child.data.shape))])
                    try:
-                        itm = f[parent].create_dataset(child.name, 
-                                                    data = child.data, 
+                        itm = f[parent].create_dataset(child.name,
+                                                    data = child.data,
                                                    compression = child.compression,
-                                                    dtype = child.data.dtype, 
+                                                    dtype = child.data.dtype,
                                                    chunks= child.chunking,
                                                    maxshape = max_shape)
                    except RuntimeError:
@@ -258,54 +262,54 @@ class ioHDF5(object):
                        warn('Found Dataset already exists {}'.format(itm.name))
                    except:
                        raise
-                    
+
                if print_log: print('Created Dataset {}'.format(itm.name))
                for key in child.attrs.iterkeys():
                    # print('Found some region references')
                    # writing region reference
                    if key is 'labels':
                        # print('Found some region references')
-                        labels = child.attrs[key]# labels here is a dictionary 
+                        labels = child.attrs[key]# labels here is a dictionary
                        self.regionRefs(itm, labels, print_log=print_log)
                        '''
                        Now make an attribute called 'labels' that is a list of strings 
                        First ascertain the dimension of the slicing:
-                        ''' 
+                        '''
                        found_dim = False
-                        for dimen, slobj in enumerate(labels[labels.keys()[0]]): 
+                        for dimen, slobj in enumerate(labels[labels.keys()[0]]):
                            # We make the assumption that checking the start is sufficient 
-                            if slobj.start != None: 
-                                found_dim = True 
+                            if slobj.start != None:
+                                found_dim = True
                                break
-                        if found_dim: 
-                            headers = [None]*len(labels) # The list that will hold all the names 
-                            for col_name in labels.keys(): 
-                                headers[labels[col_name][dimen].start] = col_name 
-                            # Now write the list of col / row names as an attribute: 
-                            itm.attrs[key] = headers 
-                        else: 
-                            warn('Unable to write region labels for %s' %(itm.name.split('/')[-1]))                                
+                        if found_dim:
+                            headers = [None]*len(labels) # The list that will hold all the names
+                            for col_name in labels.keys():
+                                headers[labels[col_name][dimen].start] = col_name
+                            # Now write the list of col / row names as an attribute:
+                            itm.attrs[key] = headers
+                        else:
+                            warn('Unable to write region labels for %s' %(itm.name.split('/')[-1]))

                        if print_log: print('Wrote Region References of Dataset %s' %(itm.name.split('/')[-1]))
-                    else:        
+                    else:
                        itm.attrs[key] = child.attrs[key]
                        if print_log: print('Wrote Attributes of Dataset %s \n' %(itm.name.split('/')[-1]))
                        # Make a dictionary of references
            refList.append(itm)
            return refList
-        
+
        # Recursive function is called at each stage beginning at the root
        for child in data.children:
            __populate(child, root)
-        
-        if print_log: 
+
+        if print_log:
            print('Finished writing to h5 file.\n'+
                  'Right now you got yourself a fancy folder structure. \n'+
                  'Make sure you do some reference linking to take advantage of the full power of HDF5.')
        return refList
-   
-   
-    
+
+
+
    def regionRefs(self, dataset, slices, print_log=False):
        '''
        Creates attributes of a h5.Dataset that refer to regions in the arrays
@@ -321,4 +325,4 @@ class ioHDF5(object):
        for sl in slices.iterkeys():
            if print_log: print('Wrote Region Reference:%s to Dataset %s' %(sl, dataset.name))
            dataset.attrs[sl] = dataset.regionref[slices[sl]]
-    
+