Commit 63a3397d authored by Gao, Shang's avatar Gao, Shang
Browse files

added individual .pbs files for each model

parent 5cc45249
Loading
Loading
Loading
Loading
+27 −0
Original line number Diff line number Diff line
@@ -75,6 +75,9 @@ class crossbowGlobus(crossbowBase):
        
      - check_task_status(task_id)
        check on the status of a globus task
        
      - check_resource_exists_endpoint(resource_path,endpoint=olcfatlas)
        check if a resource exists on an endpoint
    '''

    def __init__(self,api_key,CKAN_url="http://128.219.185.145:5000",
@@ -316,6 +319,30 @@ class crossbowGlobus(crossbowBase):
        r = self.transfer.get_task(task_id)
        return r["status"]
        
    def check_resource_exists_endpoint(self,resource_path,endpoint=olcfatlas):
        '''
        check if a resource exists on an endpoint

        parameters:
          - resource_path: string
            path to resource on endpoint
          - endpoint: string (default: olcf - 'ef1a9560-7ca1-11e5-992c-22000b96db58')
            globus endpoint id of endpoint
            
        output: boolean
            whether or not the resource already exists on the endpoint
        '''
        #make sure file exists
        filename = os.path.basename(resource_path)
        directory,_ = os.path.split(resource_path)
        r = self.transfer.operation_ls(endpoint,path=directory,filter='name:'+filename)
        found = 0
        for item in r:
            found += 1
        if not found:
            return False
        return True

#add model api later
#add scheduling api later (for both filters and models)

yelp_example/cnn.pbs

0 → 100644
+23 −0
Original line number Diff line number Diff line
#!/bin/bash
#PBS -A CSC237
#PBS -N YELP_TEST
#PBS -l walltime=100:00:00
#PBS -l nodes=1
#PBS -lpartition=gpu

cd ${HOME}/
git clone https://code.ornl.gov/v33/crossbow.git

cd ${HOME}/crossbow/
git pull

# set python paths
export PATH=/ccs/home/iamshang/anaconda2/bin:/ccs/home/iamshang/anaconda2/lib/python2.7/site-packages:$PATH
export PYTHONPATH=/ccs/home/iamshang/crossbow/crossbow:$PYTHONPATH

# run scripts
cd ${HOME}/crossbow/yelp_example/scripts/
if ! [ -f embeddings.npy ] && ! [ -f data.pkl ]; then
    python feature_extraction.py ${HOME}/yelp_academic_dataset_review.json
fi
python th_cnn.py

yelp_example/han.pbs

0 → 100644
+23 −0
Original line number Diff line number Diff line
#!/bin/bash
#PBS -A CSC237
#PBS -N YELP_TEST
#PBS -l walltime=100:00:00
#PBS -l nodes=1
#PBS -lpartition=gpu

cd ${HOME}/
git clone https://code.ornl.gov/v33/crossbow.git

cd ${HOME}/crossbow/
git pull

# set python paths
export PATH=/ccs/home/iamshang/anaconda2/bin:/ccs/home/iamshang/anaconda2/lib/python2.7/site-packages:$PATH
export PYTHONPATH=/ccs/home/iamshang/crossbow/crossbow:$PYTHONPATH

# run scripts
cd ${HOME}/crossbow/yelp_example/scripts/
if ! [ -f embeddings.npy ] && ! [ -f data.pkl ]; then
    python feature_extraction.py ${HOME}/yelp_academic_dataset_review.json
fi
python th_han.py
+6 −8
Original line number Diff line number Diff line
@@ -6,20 +6,18 @@
#PBS -lpartition=rhea

cd ${HOME}/

rm -rf crossbow
git clone https://code.ornl.gov/v33/crossbow.git

cd ${HOME}/crossbow/
git pull

# set python paths
export PATH=/ccs/home/iamshang/anaconda2/bin:/ccs/home/iamshang/anaconda2/lib/python2.7/site-packages:$PATH
export PYTHONPATH=/ccs/home/iamshang/crossbow/crossbow:$PYTHONPATH

# run scripts
cd ${HOME}/crossbow/yelp_example/scripts/
mkdir ../results
if ! [ -f embeddings.npy ] && ! [ -f data.pkl ]; then
    python feature_extraction.py ${HOME}/yelp_academic_dataset_review.json
fi
python naive_bayes.py

#upload results
cd ${HOME}/crossbow/yelp_example/results/
python upload_results.py
+13 −5
Original line number Diff line number Diff line
from crossbowGlobus import crossbowGlobus
from crossbowOlcf import crossbowOlcf
import time
import sys

#get pbs to submit
args = (sys.argv)
if len(args) != 2:
    raise Exception("Usage: python run_experiment.py <.pbs file>")
pbs_script = args[1]

#connect to CKAN, make sure yelp dataset exists
cbow = crossbowGlobus(api_key="eaabd7d9-3cb4-4014-85fe-73736e658472",
@@ -16,13 +23,14 @@ if 'yelp_academic_dataset_review.json' not in resources:
#connect to OLCF before download starts
olcf = crossbowOlcf('rhea')

#download yelp dataset
#download yelp dataset if it doesn't exist
olcf_endpoint = 'ef1a9560-7ca1-11e5-992c-22000b96db58'
path = '/~/'
if not cbow.check_resource_exists_endpoint('/~/yelp_academic_dataset_review.json'):
    dl_id = cbow.download_resource('yelp','yelp_academic_dataset_review.json',
                                   dest_endpoint=olcf_endpoint,dest_path=path,
                                   wait_for_download=True,timeout=600)
                               
#run Naive Bayes
print 'submitting pbs script to start experiment'
olcf.qsub('yelp.pbs')
olcf.qsub(pbs_script)
Loading