Commit 17a5383d authored by Gao, Shang's avatar Gao, Shang
Browse files

crossbowOlcf now does qsub

parent c8551c16
Loading
Loading
Loading
Loading
+30 −8
Original line number Diff line number Diff line
import paramiko
import getpass
from scp import SCPClient
import time

olcfdtn = 'dtn.ccs.ornl.gov'
class crossbowOlcf(object):

    def __init__(self,cluster):
        if cluster == 'rhea':
            self.cluster = 'rhea.ccs.ornl.gov'
        elif cluster == 'titan':
            self.cluster = 'titan.ccs.ornl.gov'
        else:
            raise Exception("Cluster not recognized; use 'rhea' or 'titan'")
        
        username = raw_input("Please enter your OLCF username: ")
        password = getpass.getpass("Please enter your PIN + TOKEN: ")
        
ssh = paramiko.SSHClient()
ssh.load_system_host_keys()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(olcfdtn, username=username, password=password)
ssh_stdin, ssh_stdout, ssh_stderr = ssh.exec_command("touch test.txt")
        self.ssh = paramiko.SSHClient()
        self.ssh.load_system_host_keys()
        self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        
        self.ssh.connect(self.cluster, username=username, password=password)
        self.scp = SCPClient(self.ssh.get_transport())
        
    def qsub(self,pbs_script,olcf_path='./'):

        self.scp.put(pbs_script, olcf_path+pbs_script)
        time.sleep(1)
        ssh_stdin, ssh_stdout, ssh_stderr = \
                self.ssh.exec_command("qsub %s" % (olcf_path+pbs_script))
        ssh_stdout.readlines()
        
if __name__ == "__main__":
    olcf = crossbowOlcf('rhea')
+8 −1
Original line number Diff line number Diff line
from crossbowGlobus import crossbowGlobus
from crossbowOlcf import crossbowOlcf
import time

#connect to CKAN, make sure yelp dataset exists
@@ -12,9 +13,15 @@ resources = cbow.list_resources(packages[idx])
if 'yelp_academic_dataset_review.json' not in resources:
    raise Exception("yelp_academic_dataset_review.json is missing from yelp package")

#connect to OLCF before download starts
olcf = crossbowOlcf('rhea')

#download yelp dataset
endpoint = 'ef1a9560-7ca1-11e5-992c-22000b96db58'
path = '/~/crossbow/yelp_example'
path = '/~/'
dl_id = cbow.download_resource('yelp','yelp_academic_dataset_review.json',
                               dest_endpoint=endpoint,dest_path=path,
                               wait_for_download=True,timeout=600)
                               
#run Naive Bayes
olcf.qsub('yelp.pbs')
+2 −2
Original line number Diff line number Diff line
@@ -117,11 +117,11 @@ plt.show()
'''
#save all word embeddings to matrix
print "saving word vectors to matrix"
vocab = np.zeros((len(model.vocab)+1,embedding_size))
vocab = np.zeros((len(model.wv.vocab)+1,embedding_size))
word2id = {}

#first row of embedding matrix isn't used so that 0 can be masked
for key,val in model.vocab.iteritems():
for key,val in model.wv.vocab.iteritems():
    idx = val.__dict__['index'] + 1
    vocab[idx,:] = model[key]
    word2id[key] = idx
+5 −3
Original line number Diff line number Diff line
@@ -3,14 +3,16 @@
#PBS -N YELP_TEST
#PBS -l walltime=1:00:00
#PBS -l nodes=1
#PBS -lpartition=cpu
#PBS -lpartition=rhea

cd ${HOME}/

rm -rf crossbow
git clone https://code.ornl.gov/v33/crossbow.git

# set python paths
export PATH=/ccs/home/iamshang/anaconda2/bin:/ccs/home/iamshang/anaconda2/lib/python2.7/site-packages:$PATH
export PYTHONPATH=/ccs/home/iamshang/crossbow/crossbow:$PYTHONPATH

# run scripts
python ${HOME}/crossbow/yelp_example/load_dataset.py
python ${HOME}/crossbow/yelp_example/feature_extraction.py ${HOME}/crossbow/yelp_example/yelp_academic_dataset_review.json
python ${HOME}/crossbow/yelp_example/scripts/feature_extraction.py ${HOME}/yelp_academic_dataset_review.json