Loading crossbow/crossbowOlcf.py +30 −8 Original line number Diff line number Diff line import paramiko import getpass from scp import SCPClient import time olcfdtn = 'dtn.ccs.ornl.gov' class crossbowOlcf(object): def __init__(self,cluster): if cluster == 'rhea': self.cluster = 'rhea.ccs.ornl.gov' elif cluster == 'titan': self.cluster = 'titan.ccs.ornl.gov' else: raise Exception("Cluster not recognized; use 'rhea' or 'titan'") username = raw_input("Please enter your OLCF username: ") password = getpass.getpass("Please enter your PIN + TOKEN: ") ssh = paramiko.SSHClient() ssh.load_system_host_keys() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) ssh.connect(olcfdtn, username=username, password=password) ssh_stdin, ssh_stdout, ssh_stderr = ssh.exec_command("touch test.txt") self.ssh = paramiko.SSHClient() self.ssh.load_system_host_keys() self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) self.ssh.connect(self.cluster, username=username, password=password) self.scp = SCPClient(self.ssh.get_transport()) def qsub(self,pbs_script,olcf_path='./'): self.scp.put(pbs_script, olcf_path+pbs_script) time.sleep(1) ssh_stdin, ssh_stdout, ssh_stderr = \ self.ssh.exec_command("qsub %s" % (olcf_path+pbs_script)) ssh_stdout.readlines() if __name__ == "__main__": olcf = crossbowOlcf('rhea') yelp_example/run_experiment.py +8 −1 Original line number Diff line number Diff line from crossbowGlobus import crossbowGlobus from crossbowOlcf import crossbowOlcf import time #connect to CKAN, make sure yelp dataset exists Loading @@ -12,9 +13,15 @@ resources = cbow.list_resources(packages[idx]) if 'yelp_academic_dataset_review.json' not in resources: raise Exception("yelp_academic_dataset_review.json is missing from yelp package") #connect to OLCF before download starts olcf = crossbowOlcf('rhea') #download yelp dataset endpoint = 'ef1a9560-7ca1-11e5-992c-22000b96db58' path = '/~/crossbow/yelp_example' path = '/~/' dl_id = cbow.download_resource('yelp','yelp_academic_dataset_review.json', dest_endpoint=endpoint,dest_path=path, wait_for_download=True,timeout=600) #run Naive Bayes olcf.qsub('yelp.pbs') yelp_example/scripts/feature_extraction.py +2 −2 Original line number Diff line number Diff line Loading @@ -117,11 +117,11 @@ plt.show() ''' #save all word embeddings to matrix print "saving word vectors to matrix" vocab = np.zeros((len(model.vocab)+1,embedding_size)) vocab = np.zeros((len(model.wv.vocab)+1,embedding_size)) word2id = {} #first row of embedding matrix isn't used so that 0 can be masked for key,val in model.vocab.iteritems(): for key,val in model.wv.vocab.iteritems(): idx = val.__dict__['index'] + 1 vocab[idx,:] = model[key] word2id[key] = idx Loading yelp_example/yelp.pbs +5 −3 Original line number Diff line number Diff line Loading @@ -3,14 +3,16 @@ #PBS -N YELP_TEST #PBS -l walltime=1:00:00 #PBS -l nodes=1 #PBS -lpartition=cpu #PBS -lpartition=rhea cd ${HOME}/ rm -rf crossbow git clone https://code.ornl.gov/v33/crossbow.git # set python paths export PATH=/ccs/home/iamshang/anaconda2/bin:/ccs/home/iamshang/anaconda2/lib/python2.7/site-packages:$PATH export PYTHONPATH=/ccs/home/iamshang/crossbow/crossbow:$PYTHONPATH # run scripts python ${HOME}/crossbow/yelp_example/load_dataset.py python ${HOME}/crossbow/yelp_example/feature_extraction.py ${HOME}/crossbow/yelp_example/yelp_academic_dataset_review.json python ${HOME}/crossbow/yelp_example/scripts/feature_extraction.py ${HOME}/yelp_academic_dataset_review.json Loading
crossbow/crossbowOlcf.py +30 −8 Original line number Diff line number Diff line import paramiko import getpass from scp import SCPClient import time olcfdtn = 'dtn.ccs.ornl.gov' class crossbowOlcf(object): def __init__(self,cluster): if cluster == 'rhea': self.cluster = 'rhea.ccs.ornl.gov' elif cluster == 'titan': self.cluster = 'titan.ccs.ornl.gov' else: raise Exception("Cluster not recognized; use 'rhea' or 'titan'") username = raw_input("Please enter your OLCF username: ") password = getpass.getpass("Please enter your PIN + TOKEN: ") ssh = paramiko.SSHClient() ssh.load_system_host_keys() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) ssh.connect(olcfdtn, username=username, password=password) ssh_stdin, ssh_stdout, ssh_stderr = ssh.exec_command("touch test.txt") self.ssh = paramiko.SSHClient() self.ssh.load_system_host_keys() self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) self.ssh.connect(self.cluster, username=username, password=password) self.scp = SCPClient(self.ssh.get_transport()) def qsub(self,pbs_script,olcf_path='./'): self.scp.put(pbs_script, olcf_path+pbs_script) time.sleep(1) ssh_stdin, ssh_stdout, ssh_stderr = \ self.ssh.exec_command("qsub %s" % (olcf_path+pbs_script)) ssh_stdout.readlines() if __name__ == "__main__": olcf = crossbowOlcf('rhea')
yelp_example/run_experiment.py +8 −1 Original line number Diff line number Diff line from crossbowGlobus import crossbowGlobus from crossbowOlcf import crossbowOlcf import time #connect to CKAN, make sure yelp dataset exists Loading @@ -12,9 +13,15 @@ resources = cbow.list_resources(packages[idx]) if 'yelp_academic_dataset_review.json' not in resources: raise Exception("yelp_academic_dataset_review.json is missing from yelp package") #connect to OLCF before download starts olcf = crossbowOlcf('rhea') #download yelp dataset endpoint = 'ef1a9560-7ca1-11e5-992c-22000b96db58' path = '/~/crossbow/yelp_example' path = '/~/' dl_id = cbow.download_resource('yelp','yelp_academic_dataset_review.json', dest_endpoint=endpoint,dest_path=path, wait_for_download=True,timeout=600) #run Naive Bayes olcf.qsub('yelp.pbs')
yelp_example/scripts/feature_extraction.py +2 −2 Original line number Diff line number Diff line Loading @@ -117,11 +117,11 @@ plt.show() ''' #save all word embeddings to matrix print "saving word vectors to matrix" vocab = np.zeros((len(model.vocab)+1,embedding_size)) vocab = np.zeros((len(model.wv.vocab)+1,embedding_size)) word2id = {} #first row of embedding matrix isn't used so that 0 can be masked for key,val in model.vocab.iteritems(): for key,val in model.wv.vocab.iteritems(): idx = val.__dict__['index'] + 1 vocab[idx,:] = model[key] word2id[key] = idx Loading
yelp_example/yelp.pbs +5 −3 Original line number Diff line number Diff line Loading @@ -3,14 +3,16 @@ #PBS -N YELP_TEST #PBS -l walltime=1:00:00 #PBS -l nodes=1 #PBS -lpartition=cpu #PBS -lpartition=rhea cd ${HOME}/ rm -rf crossbow git clone https://code.ornl.gov/v33/crossbow.git # set python paths export PATH=/ccs/home/iamshang/anaconda2/bin:/ccs/home/iamshang/anaconda2/lib/python2.7/site-packages:$PATH export PYTHONPATH=/ccs/home/iamshang/crossbow/crossbow:$PYTHONPATH # run scripts python ${HOME}/crossbow/yelp_example/load_dataset.py python ${HOME}/crossbow/yelp_example/feature_extraction.py ${HOME}/crossbow/yelp_example/yelp_academic_dataset_review.json python ${HOME}/crossbow/yelp_example/scripts/feature_extraction.py ${HOME}/yelp_academic_dataset_review.json