Commit fc28353f authored by Gao, Shang's avatar Gao, Shang
Browse files

upload_resource method now accepts local files and copies to nfs

parent 8081e4be
Loading
Loading
Loading
Loading
+114 −82
Original line number Diff line number Diff line
@@ -113,7 +113,7 @@ class crossbowBase(object):

    def list_resources(self,package):
        '''
        returns all resources (scripts, datasets, and other) for a particular package
        returns all resources for a particular package

        parameters:
          - package: string
@@ -126,6 +126,36 @@ class crossbowBase(object):
        resources = [resource['name'] for resource in metadata['resources']]
        return resources

    def list_dcd_resources(self,package):
        '''
        returns all .dcd resources for a particular package

        parameters:
          - package: string
            name of package

        output: list
            names of available resources in package
        '''
        metadata = self.ckan.action.package_show(id=package)
        resources = [resource['name'] if resource['url'][-4:]==".dcd" else None for resource in metadata['resources']]
        return resources

    def list_non_dcd_resources(self,package):
        '''
        returns all non .dcd resources for a particular package

        parameters:
          - package: string
            name of package

        output: list
            names of available resources in package
        '''
        metadata = self.ckan.action.package_show(id=package)
        resources = [resource['name'] if resource['url'][-4:]!=".dcd" else None for resource in metadata['resources']]
        return resources

    def get_resource_details(self,package,resource):
        '''
        prints and returns details for a specific resource
@@ -186,16 +216,23 @@ class crossbowMount(crossbowBase):
        edit an existing package (dataset)

      - list_resources(package)
        returns all resources (scripts, datasets, and other) for a particular package
        returns all resources for a particular package

      - list_dcd_resources(package)
        returns all .dcd resources for a particular package

      - list_non_dcd_resources(package)
        returns all non .dcd resources for a particular package

      - get_resource_details(package,resource)
        prints and returns details for a specific resource

      - add_resource(package,resource,resource_path,description=None)
        add a resource located on local NFS mount to an existing package
      - upload_resource(package,resource,resource_path,description=None)
        upload a local resource to an existing package
        resource is copied to CROSSBOW_NFS/package/filename

      - edit_resource(package,resource,*args,**kwargs)
        edit an existing resource
      - edit_resource(package,resource,new_name=None,new_description=None)
        edit the name or description for an existing resource

      - get_resource_path(package,resource)
        return the path to a resource on the local NFS mount
@@ -220,9 +257,26 @@ class crossbowMount(crossbowBase):

        super(crossbowMount, self).__init__(api_key,CKAN_url,user_agent)

    def add_resource(self,package,resource,resource_path,description=None):
    def _copy_with_progress(self,src, dst, callback, length=16*1024):
        '''
        custom function to copy file with progress bar
        '''
        add a resource located on local NFS mount to an existing package
        copied = 0
        size = os.path.getsize(src)
        with open(src, 'rb') as fsrc:
            with open(dst, 'wb') as fdst:
                while True:
                    buf = fsrc.read(length)
                    if not buf:
                        break
                    fdst.write(buf)
                    copied += len(buf)
                    callback(copied,size)

    def upload_resource(self,package,resource,resource_path,description=None):
        '''
        upload a local resource to an existing package
        resource is copied to CROSSBOW_NFS/package/filename

        parameters:
          - package: string
@@ -230,79 +284,68 @@ class crossbowMount(crossbowBase):
          - resource: string
            unique name for resource
          - resource_path: string
            full path to resource, must be on NFS mount
            path to local resource
          - description: string (optional)
            description of resource
        '''
        #check if file is on NFS
        #check if file exists
        if not os.path.isfile(resource_path):
            raise Exception("Could not find specified file on NFS")
        if self.NFS_path not in resource_path:
            raise Exception("File must be located on NFS mount")
            raise Exception("Could not find specified file")

        #autodetect file info
        size = os.path.getsize(resource_path)
        _, format = os.path.splitext(resource_path)
        filename = os.path.basename(resource_path)
        nfs_path = self.NFS_path + package + "/" + filename
        url = "file://CROSSBOW_NFS/%s/%s" % (package,filename)

        #update path
        resource_path = resource_path.replace(self.NFS_path, "file://CROSSBOW_NFS/")
        #check if file already exists on nfs
        if os.path.isfile(nfs_path):
            raise Exception("File already exists on NFS, try using a different filename")

        self.ckan.action.resource_create(package_id=package,url=resource_path,name=resource,
        #copy file to crossbow NFS
        prog = ProgressBar(widgets=[Percentage(), Bar()], maxval=100).start()
        self._copy_with_progress(resource_path, nfs_path, lambda pos, total: prog.update(100*pos/float(total)))
        print ''

        #add file to CKAN
        self.ckan.action.resource_create(package_id=package,url=url,name=resource,
            description=description,format=format,size=size)

    def edit_resource(self,package,resource,*args,**kwargs):
    def edit_resource(self,package,resource,new_name=None,new_description=None):
        '''
        edit an existing resource
        edit the name or description for an existing resource

        parameters:
          - package: string
            name of package
          - resource: string
            name of resource
          see create_resource() function for additional parameters
          - new_name: string (optional)
            new name for resource
          - new_description: string (optional)
            new description for resource
        '''
        #get current metadata
        pkg_metadata = self.ckan.action.package_show(id=package)
        rsc_metadata = [metadata if metadata['name']==resource else None for metadata in pkg_metadata['resources']][0]

        #add args to kwargs dic
        keywords = ['resource_path','description']
        for idx,arg in enumerate(args):
            kwargs[keywords[idx]] = arg

        #autodetect file info
        if 'resource_path' in kwargs.keys():
            kwargs['size'] = os.path.getsize(kwargs['resource_path'])
            _, kwargs['format'] = os.path.splitext(kwargs['resource_path'])

        #replace kwargs keywords that don't match ckanapi keywords
        if 'resource_path' in kwargs:
            
            #check if file is on NFS
            if not os.path.isfile(kwargs['resource_path']):
                raise Exception("Could not find specified file on NFS")
            if self.NFS_path not in kwargs['resource_path']:
                raise Exception("File must be located on NFS mount")

            kwargs['url'] = kwargs['resource_path'].replace(self.NFS_path, "file://CROSSBOW_NFS/")
            del kwargs['resource_path']

        #update metadata with new edits
        for key,val in kwargs.iteritems():
            rsc_metadata[key] = val
        if new_name:
            rsc_metadata['name'] = new_name
        if new_description:
            rsc_metadata['description'] = new_description

        self.ckan.action.resource_update(**rsc_metadata)

    def get_resource_path(self,package,resource):
         '''
         return the path to a resource on the local NFS mount

         parameters:
           - package: string
             name of package
           - resource: string
             name of resource

         output: string
             NFS file URI path to resource (e.g. "/data/file.csv")
         '''
@@ -328,25 +371,12 @@ class crossbowMount(crossbowBase):
        resource_path = resource_path.replace("file://CROSSBOW_NFS/", self.NFS_path)
        dest_path = destination + os.path.basename(resource_path)
        
        #custom function to copy with progress bar
        def copy_with_progress(src, dst, callback, length=16*1024):
            copied = 0
            size = os.path.getsize(src)
            with open(src, 'rb') as fsrc:
                with open(dst, 'wb') as fdst:
                    while True:
                        buf = fsrc.read(length)
                        if not buf:
                            break
                        fdst.write(buf)
                        copied += len(buf)
                        callback(copied,size)

        #copy file to local drive
        prog = ProgressBar(widgets=[Percentage(), Bar()], maxval=100).start()
        copy_with_progress(resource_path, dest_path, lambda pos, total: prog.update(100*pos/float(total)))
        self._copy_with_progress(resource_path, dest_path, lambda pos, total: prog.update(100*pos/float(total)))
        print ''

    def delete_resource(self,package,resource,delete_from_nfs=False):
    def delete_resource(self,package,resource):
        '''
        delete a resource from a package (Note: your api key must belong to a sysadmin 
        or the owner of the resource to delete a resource)
@@ -362,8 +392,10 @@ class crossbowMount(crossbowBase):
        pkg_metadata = self.ckan.action.package_show(id=package)
        resource_id = [metadata['id'] if metadata['name']==resource else None for metadata in pkg_metadata['resources']][0]

        #delete from ckan
        self.ckan.action.resource_delete(id=resource_id)
        if delete_from_nfs==True:

        #delete from nfs
        resource_path = [metadata['url'] if metadata['name']==resource else None for metadata in pkg_metadata['resources']][0]
        resource_path = resource_path.replace("file://CROSSBOW_NFS/", self.NFS_path)
        os.remove(resource_path)
@@ -374,7 +406,7 @@ class crossbowGlobus(crossbowBase):
    this version is for file transfers through Globus
    '''

    def add_resource(self):
    def upload_resource(self):
        pass
    def edit_resource(self):
        pass
@@ -389,6 +421,6 @@ class crossbowGlobus(crossbowBase):
if __name__ == "__main__":
    cbow = crossbowMount(NFS_path="/data",api_key="eaabd7d9-3cb4-4014-85fe-73736e658472")
    packages = cbow.list_packages()
    cbow.print_package_details(packages[0])
    pkg_meta = cbow.get_package_details(packages[0])
    resources = cbow.list_resources(packages[0])
    cbow.print_resource_details(packages[0],resources[0])
    rsc_meta = cbow.get_resource_details(packages[0],resources[0])
+35 −18
Original line number Diff line number Diff line
@@ -17,11 +17,16 @@ def setup_module():
    ckan.action.package_create(name="nosetests1",owner_org="test",title="nosetests1",author="nosetests1",notes="nosetests1")
    
    #create resource for testing
    ckan.action.resource_create(package_id="nosetests1",url="file://CROSSBOW_NFS/testfile.csv",name="myresource1",description="myresource1")
    ckan.action.resource_create(package_id="nosetests1",url="file://CROSSBOW_NFS/nosetests1/testfile.csv",name="myresource1",description="myresource1")
    ckan.action.resource_create(package_id="nosetests1",url="file://CROSSBOW_NFS/nosetests1/testfile.dcd",name="myresource2",description="myresource2")

    #create temporary file for testing
    #tempfile = open('/data/testfile.csv', 'w+')
    #tempfile.close()
    #create temporary files for testing
    tempfile = open('/data/nosetests1/testfile.csv', 'w+')
    tempfile.close()
    tempfile = open('/data/nosetests1/testfile.dcd', 'w+')
    tempfile.close()
    tempfile = open('./testfile2.dcd', 'w+')
    tempfile.close()

def test_list_packages():
    assert "nosetests1" in cbowMount.list_packages()
@@ -42,36 +47,45 @@ def test_edit_package():

def test_list_resources():
    assert "myresource1" in cbowMount.list_resources("nosetests1")
    assert "myresource2" in cbowMount.list_resources("nosetests1")

def test_list_dcd_resources():
    assert "myresource2" in cbowMount.list_dcd_resources("nosetests1")

def test_list_non_dcd_resources():
    assert "myresource1" in cbowMount.list_non_dcd_resources("nosetests1")

def test_get_resource_details():
    assert cbowMount.get_resource_details("nosetests1","myresource1")['url'] == "file://CROSSBOW_NFS/testfile.csv"
    assert cbowMount.get_resource_details("nosetests1","myresource1")['url'] == "file://CROSSBOW_NFS/nosetests1/testfile.csv"

def test_add_resource():
    cbowMount.add_resource("nosetests1","myresource2","/data/testfile.csv",description="myresource2")
    cbowMount.add_resource("nosetests1","myresource3","./testfile2.dcd",description="myresource3")
    metadata = ckan.action.package_show(id="nosetests1")
    resources = [resource['name'] for resource in metadata['resources']]
    assert "myresource2" in resources
    resource_urls = [resource['url'] for resource in metadata['resources']]
    assert "myresource3" in resources
    assert "file://CROSSBOW_NFS/nosetests1/testfile2.dcd" in resource_urls
    assert os.path.isfile('/data/nosetests1/testfile2.dcd')

def test_edit_resource():
    cbowMount.edit_resource("nosetests1","myresource1","/data/testfile.csv",description="myresource1b")
    cbowMount.edit_resource("nosetests1","myresource1",new_description="myresource1b")
    pkg_metadata = ckan.action.package_show(id="nosetests1")
    rsc_metadata = [metadata if metadata['name']=="myresource1" else None for metadata in pkg_metadata['resources']][0]
    assert rsc_metadata['description'] == "myresource1b"
    assert rsc_metadata['url'] == "file://CROSSBOW_NFS/testfile.csv"

def test_get_resource_path():
    assert cbowMount.get_resource_path("nosetests1","myresource1") == "/data/testfile.csv"
     assert cbowMount.get_resource_path("nosetests1","myresource1") == "/data/nosetests1/testfile.csv"

def test_download_resource():
    cbowMount.download_resource("nosetests1","myresource1")
    assert os.path.isfile('testfile.csv')

def test_delete_resource():
    cbowMount.delete_resource("nosetests1","myresource1",delete_from_nfs=False)
    cbowMount.delete_resource("nosetests1","myresource1")
    metadata = ckan.action.package_show(id="nosetests1")
    resources = [resource['name'] for resource in metadata['resources']]
    assert "myresource1" not in resources
    #assert not os.path.isfile('/data/testfile.csv')
    assert not os.path.isfile('/data/nosetests1/testfile.csv')

def teardown_module():
    #purge all packages used for testing
@@ -80,8 +94,11 @@ def teardown_module():
        ckan.action.dataset_purge(id="nosetests2")

    #delete temporary files if they exist
    #if os.path.isfile('/data/testfile.csv'):
       #os.remove('/data/testfile.csv')
    if os.path.isfile('/data/nosetests1/testfile.csv'):
       os.remove('/data/testfile.csv')
    if os.path.isfile('/data/nosetests1/testfile.dcd'):
       os.remove('/data/testfile.dcd')
    if os.path.isfile('testfile.csv'):
       os.remove('testfile.csv')
    
    if os.path.isfile('testfile2.dcd'):
       os.remove('testfile2.dcd')