From 71a673696a238aa9b277b4860f0ab99c6c189576 Mon Sep 17 00:00:00 2001
From: Chris Smith <csmith55@utk.edu>
Date: Thu, 29 Sep 2016 09:30:55 -0400
Subject: [PATCH] Repack improvement After calling repack, code checks the last
 modification time vs the current time.  Only proceeds to delete old file and
 move repacked file after 1s has passed.

---
 pycroscopy/io/io_hdf5.py | 102 ++++++++++++++++++++-------------------
 1 file changed, 53 insertions(+), 49 deletions(-)

diff --git a/pycroscopy/io/io_hdf5.py b/pycroscopy/io/io_hdf5.py
index 35ca1ee1..5e8548e4 100644
--- a/pycroscopy/io/io_hdf5.py
+++ b/pycroscopy/io/io_hdf5.py
@@ -8,7 +8,7 @@ Main Class in charge of writing/reading to/from hdf5 file.
 import os
 import subprocess
 import sys
-from time import sleep
+from time import time, sleep
 from warnings import warn
 
 import h5py
@@ -57,7 +57,7 @@ class ioHDF5(object):
                 return
             self.file = file_handle.file
             self.path = file_handle.filename
-        
+
     def clear(self):
         '''
         Clear h5.file of all contents
@@ -70,7 +70,7 @@ class ioHDF5(object):
         self.file.clear()
         self.repack()
 
-    
+
     def repack(self):
         '''
         Uses the h5repack command to recover cleared space in an hdf5 file.
@@ -84,14 +84,18 @@ class ioHDF5(object):
         Repack the opened hdf5 file into a temporary file
         '''
         try:
-            repack_line = 'h5repack '+self.path+' '+tmpfile
+            repack_line = ' '.join(['h5repack',self.path,tmpfile])
             subprocess.check_output(repack_line,
                                     stderr=subprocess.STDOUT,
                                     shell=True)
-            sleep(2)
+            # Check that the file is done being modified
+            while time()-os.stat(tmpfile).st_mtime <= 1:
+                sleep(0.5)
         except subprocess.CalledProcessError as err:
             print('Could not repack hdf5 file')
             raise Exception(err.output)
+        except:
+            raise
 
         '''
         Delete the original file and move the temporary file to the originals path
@@ -114,17 +118,17 @@ class ioHDF5(object):
     def close(self):
         '''Close h5.file'''
         self.file.close()
-        
+
     def delete(self):
         ''' Delete h5.file'''
         self.close()
         os.remove(self.path)
-        
+
     def flush(self):
         '''Flush data from memory and commit to file. 
         Use this after manually inserting data into the hdf dataset'''
         self.file.flush()
-        
+
     def writeData(self, data, print_log=False):
         '''
         Writes data into the hdf5 file and assigns data attributes such as region references.
@@ -140,18 +144,18 @@ class ioHDF5(object):
         refList : List of HDF5dataset or HDF5Datagroup references
             References to the objects written
         '''
-        
+
         f = self.file
-        
+
         f.attrs['PySPM version']=version
-        
+
         # Checking if the data is an MicroDataGroup object
-        if not isinstance(data, MicroDataGroup): 
+        if not isinstance(data, MicroDataGroup):
             warn('Input of type: {} \n'.format(type(data)))
             sys.exit("Input not of type MicroDataGroup.\n We're done here! \n")
-             
+
         # Figuring out if the first item in AFMData tree is file or group
-        if data.name is '' and data.parent is '/': 
+        if data.name is '' and data.parent is '/':
             # For file we just write the attributes
             for key in data.attrs.iterkeys():
                 f.attrs[key] = data.attrs[key]
@@ -184,12 +188,12 @@ class ioHDF5(object):
                 g.attrs[key] = data.attrs[key]
             if print_log: print('Wrote attributes to group: {} \n'.format(data.name))
             root = g.name
-                    
+
         # Populating the tree structure recursively
         refList = []
         # Recursive function
         def __populate(child, parent):
-            
+
             if isinstance(child, MicroDataGroup):
                 if child.indexed:
                     previous = np.where([child.name in key for key in f[parent].keys()])[0]
@@ -198,7 +202,7 @@ class ioHDF5(object):
                     else:
                         last = f[parent].keys()[previous[-1]]
                         index = int(last.split('_')[-1])+1
-                    child.name+='{:03d}'.format(index)                
+                    child.name+='{:03d}'.format(index)
                 try:
                     itm = f[parent].create_group(child.name)
                     if print_log: print('Created Group {}'.format(itm.name))
@@ -219,10 +223,10 @@ class ioHDF5(object):
                         # finite sized dataset and maxshape is not provided
                         # Typically for small / ancilliary datasets
                         try:
-                            itm = f[parent].create_dataset(child.name, 
-                                                        data = child.data, 
+                            itm = f[parent].create_dataset(child.name,
+                                                        data = child.data,
                                                         compression = child.compression,
-                                                        dtype = child.data.dtype, 
+                                                        dtype = child.data.dtype,
                                                         chunks= child.chunking)
                         except RuntimeError:
                             itm = f[parent][child.name]
@@ -234,23 +238,23 @@ class ioHDF5(object):
                         # Here, we only allocate the space. The provided data is ignored
                         # print child.name
                         try:
-                            itm = f[parent].create_dataset(child.name, child.maxshape, 
+                            itm = f[parent].create_dataset(child.name, child.maxshape,
                                                         compression = child.compression,
-                                                        dtype = child.dtype, 
+                                                        dtype = child.dtype,
                                                         chunks= child.chunking)
                         except RuntimeError:
                             itm = f[parent][child.name]
                             warn('Found Dataset already exists {}'.format(itm.name))
                         except:
-                            raise                        
-                else:                
+                            raise
+                else:
                     # Resizable but the written files are significantly larger
                     max_shape = tuple([ None for i in range(len(child.data.shape))])
                     try:
-                        itm = f[parent].create_dataset(child.name, 
-                                                    data = child.data, 
+                        itm = f[parent].create_dataset(child.name,
+                                                    data = child.data,
                                                     compression = child.compression,
-                                                    dtype = child.data.dtype, 
+                                                    dtype = child.data.dtype,
                                                     chunks= child.chunking,
                                                     maxshape = max_shape)
                     except RuntimeError:
@@ -258,54 +262,54 @@ class ioHDF5(object):
                         warn('Found Dataset already exists {}'.format(itm.name))
                     except:
                         raise
-                    
+
                 if print_log: print('Created Dataset {}'.format(itm.name))
                 for key in child.attrs.iterkeys():
                     # print('Found some region references')
                     # writing region reference
                     if key is 'labels':
                         # print('Found some region references')
-                        labels = child.attrs[key]# labels here is a dictionary 
+                        labels = child.attrs[key]# labels here is a dictionary
                         self.regionRefs(itm, labels, print_log=print_log)
                         '''
                         Now make an attribute called 'labels' that is a list of strings 
                         First ascertain the dimension of the slicing:
-                        ''' 
+                        '''
                         found_dim = False
-                        for dimen, slobj in enumerate(labels[labels.keys()[0]]): 
+                        for dimen, slobj in enumerate(labels[labels.keys()[0]]):
                             # We make the assumption that checking the start is sufficient 
-                            if slobj.start != None: 
-                                found_dim = True 
+                            if slobj.start != None:
+                                found_dim = True
                                 break
-                        if found_dim: 
-                            headers = [None]*len(labels) # The list that will hold all the names 
-                            for col_name in labels.keys(): 
-                                headers[labels[col_name][dimen].start] = col_name 
-                            # Now write the list of col / row names as an attribute: 
-                            itm.attrs[key] = headers 
-                        else: 
-                            warn('Unable to write region labels for %s' %(itm.name.split('/')[-1]))                                
+                        if found_dim:
+                            headers = [None]*len(labels) # The list that will hold all the names
+                            for col_name in labels.keys():
+                                headers[labels[col_name][dimen].start] = col_name
+                            # Now write the list of col / row names as an attribute:
+                            itm.attrs[key] = headers
+                        else:
+                            warn('Unable to write region labels for %s' %(itm.name.split('/')[-1]))
 
                         if print_log: print('Wrote Region References of Dataset %s' %(itm.name.split('/')[-1]))
-                    else:        
+                    else:
                         itm.attrs[key] = child.attrs[key]
                         if print_log: print('Wrote Attributes of Dataset %s \n' %(itm.name.split('/')[-1]))
                         # Make a dictionary of references
             refList.append(itm)
             return refList
-        
+
         # Recursive function is called at each stage beginning at the root
         for child in data.children:
             __populate(child, root)
-        
-        if print_log: 
+
+        if print_log:
             print('Finished writing to h5 file.\n'+
                   'Right now you got yourself a fancy folder structure. \n'+
                   'Make sure you do some reference linking to take advantage of the full power of HDF5.')
         return refList
-   
-   
-    
+
+
+
     def regionRefs(self, dataset, slices, print_log=False):
         '''
         Creates attributes of a h5.Dataset that refer to regions in the arrays
@@ -321,4 +325,4 @@ class ioHDF5(object):
         for sl in slices.iterkeys():
             if print_log: print('Wrote Region Reference:%s to Dataset %s' %(sl, dataset.name))
             dataset.attrs[sl] = dataset.regionref[slices[sl]]
-    
+
-- 
GitLab