Loading crossbow/crossbowGlobus.py +27 −0 Original line number Diff line number Diff line Loading @@ -75,6 +75,9 @@ class crossbowGlobus(crossbowBase): - check_task_status(task_id) check on the status of a globus task - check_resource_exists_endpoint(resource_path,endpoint=olcfatlas) check if a resource exists on an endpoint ''' def __init__(self,api_key,CKAN_url="http://128.219.185.145:5000", Loading Loading @@ -316,6 +319,30 @@ class crossbowGlobus(crossbowBase): r = self.transfer.get_task(task_id) return r["status"] def check_resource_exists_endpoint(self,resource_path,endpoint=olcfatlas): ''' check if a resource exists on an endpoint parameters: - resource_path: string path to resource on endpoint - endpoint: string (default: olcf - 'ef1a9560-7ca1-11e5-992c-22000b96db58') globus endpoint id of endpoint output: boolean whether or not the resource already exists on the endpoint ''' #make sure file exists filename = os.path.basename(resource_path) directory,_ = os.path.split(resource_path) r = self.transfer.operation_ls(endpoint,path=directory,filter='name:'+filename) found = 0 for item in r: found += 1 if not found: return False return True #add model api later #add scheduling api later (for both filters and models) Loading yelp_example/cnn.pbs 0 → 100644 +23 −0 Original line number Diff line number Diff line #!/bin/bash #PBS -A CSC237 #PBS -N YELP_TEST #PBS -l walltime=100:00:00 #PBS -l nodes=1 #PBS -lpartition=gpu cd ${HOME}/ git clone https://code.ornl.gov/v33/crossbow.git cd ${HOME}/crossbow/ git pull # set python paths export PATH=/ccs/home/iamshang/anaconda2/bin:/ccs/home/iamshang/anaconda2/lib/python2.7/site-packages:$PATH export PYTHONPATH=/ccs/home/iamshang/crossbow/crossbow:$PYTHONPATH # run scripts cd ${HOME}/crossbow/yelp_example/scripts/ if ! [ -f embeddings.npy ] && ! [ -f data.pkl ]; then python feature_extraction.py ${HOME}/yelp_academic_dataset_review.json fi python th_cnn.py yelp_example/han.pbs 0 → 100644 +23 −0 Original line number Diff line number Diff line #!/bin/bash #PBS -A CSC237 #PBS -N YELP_TEST #PBS -l walltime=100:00:00 #PBS -l nodes=1 #PBS -lpartition=gpu cd ${HOME}/ git clone https://code.ornl.gov/v33/crossbow.git cd ${HOME}/crossbow/ git pull # set python paths export PATH=/ccs/home/iamshang/anaconda2/bin:/ccs/home/iamshang/anaconda2/lib/python2.7/site-packages:$PATH export PYTHONPATH=/ccs/home/iamshang/crossbow/crossbow:$PYTHONPATH # run scripts cd ${HOME}/crossbow/yelp_example/scripts/ if ! [ -f embeddings.npy ] && ! [ -f data.pkl ]; then python feature_extraction.py ${HOME}/yelp_academic_dataset_review.json fi python th_han.py yelp_example/yelp.pbs→yelp_example/naive_bayes.pbs +6 −8 Original line number Diff line number Diff line Loading @@ -6,20 +6,18 @@ #PBS -lpartition=rhea cd ${HOME}/ rm -rf crossbow git clone https://code.ornl.gov/v33/crossbow.git cd ${HOME}/crossbow/ git pull # set python paths export PATH=/ccs/home/iamshang/anaconda2/bin:/ccs/home/iamshang/anaconda2/lib/python2.7/site-packages:$PATH export PYTHONPATH=/ccs/home/iamshang/crossbow/crossbow:$PYTHONPATH # run scripts cd ${HOME}/crossbow/yelp_example/scripts/ mkdir ../results if ! [ -f embeddings.npy ] && ! [ -f data.pkl ]; then python feature_extraction.py ${HOME}/yelp_academic_dataset_review.json fi python naive_bayes.py #upload results cd ${HOME}/crossbow/yelp_example/results/ python upload_results.py yelp_example/run_experiment.py +13 −5 Original line number Diff line number Diff line from crossbowGlobus import crossbowGlobus from crossbowOlcf import crossbowOlcf import time import sys #get pbs to submit args = (sys.argv) if len(args) != 2: raise Exception("Usage: python run_experiment.py <.pbs file>") pbs_script = args[1] #connect to CKAN, make sure yelp dataset exists cbow = crossbowGlobus(api_key="eaabd7d9-3cb4-4014-85fe-73736e658472", Loading @@ -16,13 +23,14 @@ if 'yelp_academic_dataset_review.json' not in resources: #connect to OLCF before download starts olcf = crossbowOlcf('rhea') #download yelp dataset #download yelp dataset if it doesn't exist olcf_endpoint = 'ef1a9560-7ca1-11e5-992c-22000b96db58' path = '/~/' if not cbow.check_resource_exists_endpoint('/~/yelp_academic_dataset_review.json'): dl_id = cbow.download_resource('yelp','yelp_academic_dataset_review.json', dest_endpoint=olcf_endpoint,dest_path=path, wait_for_download=True,timeout=600) #run Naive Bayes print 'submitting pbs script to start experiment' olcf.qsub('yelp.pbs') olcf.qsub(pbs_script) Loading
crossbow/crossbowGlobus.py +27 −0 Original line number Diff line number Diff line Loading @@ -75,6 +75,9 @@ class crossbowGlobus(crossbowBase): - check_task_status(task_id) check on the status of a globus task - check_resource_exists_endpoint(resource_path,endpoint=olcfatlas) check if a resource exists on an endpoint ''' def __init__(self,api_key,CKAN_url="http://128.219.185.145:5000", Loading Loading @@ -316,6 +319,30 @@ class crossbowGlobus(crossbowBase): r = self.transfer.get_task(task_id) return r["status"] def check_resource_exists_endpoint(self,resource_path,endpoint=olcfatlas): ''' check if a resource exists on an endpoint parameters: - resource_path: string path to resource on endpoint - endpoint: string (default: olcf - 'ef1a9560-7ca1-11e5-992c-22000b96db58') globus endpoint id of endpoint output: boolean whether or not the resource already exists on the endpoint ''' #make sure file exists filename = os.path.basename(resource_path) directory,_ = os.path.split(resource_path) r = self.transfer.operation_ls(endpoint,path=directory,filter='name:'+filename) found = 0 for item in r: found += 1 if not found: return False return True #add model api later #add scheduling api later (for both filters and models) Loading
yelp_example/cnn.pbs 0 → 100644 +23 −0 Original line number Diff line number Diff line #!/bin/bash #PBS -A CSC237 #PBS -N YELP_TEST #PBS -l walltime=100:00:00 #PBS -l nodes=1 #PBS -lpartition=gpu cd ${HOME}/ git clone https://code.ornl.gov/v33/crossbow.git cd ${HOME}/crossbow/ git pull # set python paths export PATH=/ccs/home/iamshang/anaconda2/bin:/ccs/home/iamshang/anaconda2/lib/python2.7/site-packages:$PATH export PYTHONPATH=/ccs/home/iamshang/crossbow/crossbow:$PYTHONPATH # run scripts cd ${HOME}/crossbow/yelp_example/scripts/ if ! [ -f embeddings.npy ] && ! [ -f data.pkl ]; then python feature_extraction.py ${HOME}/yelp_academic_dataset_review.json fi python th_cnn.py
yelp_example/han.pbs 0 → 100644 +23 −0 Original line number Diff line number Diff line #!/bin/bash #PBS -A CSC237 #PBS -N YELP_TEST #PBS -l walltime=100:00:00 #PBS -l nodes=1 #PBS -lpartition=gpu cd ${HOME}/ git clone https://code.ornl.gov/v33/crossbow.git cd ${HOME}/crossbow/ git pull # set python paths export PATH=/ccs/home/iamshang/anaconda2/bin:/ccs/home/iamshang/anaconda2/lib/python2.7/site-packages:$PATH export PYTHONPATH=/ccs/home/iamshang/crossbow/crossbow:$PYTHONPATH # run scripts cd ${HOME}/crossbow/yelp_example/scripts/ if ! [ -f embeddings.npy ] && ! [ -f data.pkl ]; then python feature_extraction.py ${HOME}/yelp_academic_dataset_review.json fi python th_han.py
yelp_example/yelp.pbs→yelp_example/naive_bayes.pbs +6 −8 Original line number Diff line number Diff line Loading @@ -6,20 +6,18 @@ #PBS -lpartition=rhea cd ${HOME}/ rm -rf crossbow git clone https://code.ornl.gov/v33/crossbow.git cd ${HOME}/crossbow/ git pull # set python paths export PATH=/ccs/home/iamshang/anaconda2/bin:/ccs/home/iamshang/anaconda2/lib/python2.7/site-packages:$PATH export PYTHONPATH=/ccs/home/iamshang/crossbow/crossbow:$PYTHONPATH # run scripts cd ${HOME}/crossbow/yelp_example/scripts/ mkdir ../results if ! [ -f embeddings.npy ] && ! [ -f data.pkl ]; then python feature_extraction.py ${HOME}/yelp_academic_dataset_review.json fi python naive_bayes.py #upload results cd ${HOME}/crossbow/yelp_example/results/ python upload_results.py
yelp_example/run_experiment.py +13 −5 Original line number Diff line number Diff line from crossbowGlobus import crossbowGlobus from crossbowOlcf import crossbowOlcf import time import sys #get pbs to submit args = (sys.argv) if len(args) != 2: raise Exception("Usage: python run_experiment.py <.pbs file>") pbs_script = args[1] #connect to CKAN, make sure yelp dataset exists cbow = crossbowGlobus(api_key="eaabd7d9-3cb4-4014-85fe-73736e658472", Loading @@ -16,13 +23,14 @@ if 'yelp_academic_dataset_review.json' not in resources: #connect to OLCF before download starts olcf = crossbowOlcf('rhea') #download yelp dataset #download yelp dataset if it doesn't exist olcf_endpoint = 'ef1a9560-7ca1-11e5-992c-22000b96db58' path = '/~/' if not cbow.check_resource_exists_endpoint('/~/yelp_academic_dataset_review.json'): dl_id = cbow.download_resource('yelp','yelp_academic_dataset_review.json', dest_endpoint=olcf_endpoint,dest_path=path, wait_for_download=True,timeout=600) #run Naive Bayes print 'submitting pbs script to start experiment' olcf.qsub('yelp.pbs') olcf.qsub(pbs_script)