Loading crossbow/crossbowMount.py +1 −1 Original line number Diff line number Diff line Loading @@ -54,7 +54,7 @@ class crossbowMount(crossbowBase): - check_resource_exists(package,resource) check if a resource already exists on the CKAN server - upload_resource(package,resource,resource_path,description=None) - upload_resource(package,resource_path,description=None) upload a local resource to an existing package resource is copied to CROSSBOW_NFS/package/filename Loading mnist_example/mnist.pbs 0 → 100644 +29 −0 Original line number Diff line number Diff line #!/bin/bash #PBS -A CSC237 #PBS -N MNIST_TEST #PBS -l walltime=3:00:00 #PBS -l nodes=1 #PBS -lpartition=gpu #clone repo/get latest cd ${HOME}/ if [ ! -d crossbow ]; then git clone https://code.ornl.gov/v33/crossbow.git else cd ${HOME}/crossbow/ git pull fi # set python paths export PATH=/ccs/home/iamshang/anaconda2/bin:/ccs/home/iamshang/anaconda2/lib/python2.7/site-packages:$PATH export PYTHONPATH=/ccs/home/iamshang/crossbow/crossbow:$PYTHONPATH # run scripts mv mnist_train.csv ${HOME}/crossbow/mnist_example/scripts/ mv mnist_test.csv ${HOME}/crossbow/mnist_example/scripts/ cd ${HOME}/crossbow/mnist_example/scripts/ python conv_net.py #cleanup cd ${HOME}/ rm mnist.pbs mnist_example/run_experiment.py 0 → 100644 +42 −0 Original line number Diff line number Diff line from crossbowGlobus import crossbowGlobus from crossbowOlcf import crossbowOlcf import time import sys #get pbs to submit args = (sys.argv) if len(args) != 2: raise Exception("Usage: python run_experiment.py <.pbs file>") pbs_script = args[1] #connect to CKAN, make sure yelp dataset exists cbow = crossbowGlobus(api_key="eaabd7d9-3cb4-4014-85fe-73736e658472", token_file='../crossbow/refresh-tokens.json') packages = cbow.list_packages() if 'mnist' not in packages: raise Exception("mnist package is missing from CKAN") idx = packages.index("mnist") resources = cbow.list_resources(packages[idx]) if 'mnist_train.csv' not in resources: raise Exception("mnist_train.csv is missing from mnist package") if 'mnist_test.csv' not in resources: raise Exception("mnist_test.csv is missing from mnist package") #connect to OLCF before download starts olcf = crossbowOlcf('rhea') #download mnist dataset if it doesn't exist olcf_endpoint = 'ef1a9560-7ca1-11e5-992c-22000b96db58' path = '/~/' if not cbow.check_resource_exists_endpoint('/~/mnist_test.csv'): dl_id = cbow.download_resource('mnist','mnist_test.csv', dest_endpoint=olcf_endpoint,dest_path=path, wait_for_download=True,timeout=100) if not cbow.check_resource_exists_endpoint('/~/mnist_train.csv'): dl_id = cbow.download_resource('mnist','mnist_train.csv', dest_endpoint=olcf_endpoint,dest_path=path, wait_for_download=True,timeout=100) #submit pbs print 'submitting pbs script to start experiment' olcf.qsub(pbs_script) mnist_example/scripts/conv_net.py 0 → 100644 +164 −0 Original line number Diff line number Diff line import numpy as np import pandas as pd import tensorflow as tf #load data train = pd.read_csv('mnist_train.csv',header=None) y_train = np.array(train.iloc[:,0]) X_train = np.array(train.iloc[:,1:])/255. test = pd.read_csv('mnist_test.csv',header=None) y_test = np.array(test.iloc[:,0]) X_test = np.array(test.iloc[:,1:])/255. #one hot encoding for labels def one_hot(y): retVal = np.zeros((len(y), 10)) retVal[np.arange(len(y)), y.astype(int)] = 1 return retVal y_train = one_hot(y_train) y_test = one_hot(y_test) #reshape data into tensorflow image format X_train = X_train.reshape(len(X_train),28,28,1) X_test = X_test.reshape(len(X_test),28,28,1) #model parameters noOfIterations = 10000 image_size = 28 num_channels = 1 num_labels = 10 batch_size = 100 #layer initialization functions def conv_ortho_weights(filter_h,filter_w,chan_in,chan_out): bound = np.sqrt(6./(chan_in*filter_h*filter_w + chan_out*filter_h*filter_w)) W = np.random.random((chan_out, chan_in * filter_h * filter_w)) u, s, v = np.linalg.svd(W,full_matrices=False) if u.shape[0] != u.shape[1]: W = u.reshape((filter_h, filter_w, chan_in, chan_out)) else: W = v.reshape((filter_h, filter_w, chan_in, chan_out)) return W.astype(np.float32) def dense_ortho_weights(fan_in,fan_out): bound = np.sqrt(2./(fan_in+fan_out)) W = np.random.randn(fan_in,fan_out)*bound u, s, v = np.linalg.svd(W,full_matrices=False) if u.shape[0] != u.shape[1]: W = u else: W = v return W.astype(np.float32) def bias_variable(shape): initial = tf.constant(0.0, shape=shape) return initial def conv2d(x, W, stride=[1, 1, 1, 1]): return tf.nn.conv2d(x, W, strides=stride, padding='SAME') def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') #neural net architecture #input layer - 20% dropout tfx = tf.placeholder(tf.float32, shape=[None,image_size,image_size,num_channels]) tfy = tf.placeholder(tf.float32, shape=[None,num_labels]) kp1 = tf.placeholder(tf.float32) d1 = tf.nn.dropout(tfx, kp1) #layer 1 - 3x3 convolution into 80 maps with ELU activation #input size - (batchsize,28,28,1) w1 = tf.Variable(conv_ortho_weights(3,3,num_channels,80)) b1 = tf.Variable(bias_variable([80])) l1 = tf.nn.relu(conv2d(d1,w1) + b1) #layer 2 - 3x3 convolution into 80 maps with ELU activation, then 2x2 maxpool #input size - (batchsize,28,28,80) w2 = tf.Variable(conv_ortho_weights(3,3,80,80)) b2 = tf.Variable(bias_variable([80])) l2 = tf.nn.relu(conv2d(l1, w2) + b2) maxpool1 = max_pool_2x2(l2) #layer 3 - 3x3 convolution into 160 maps with ELU activation #input size - (batchsize,14,14,80) w3 = tf.Variable(conv_ortho_weights(3,3,80,160)) b3 = tf.Variable(bias_variable([160])) l3 = tf.nn.relu(conv2d(maxpool1, w3) + b3) #layer 4 - 3x3 convolution into 160 maps with ELU activation, then 2x2 maxpool #input size - (batchsize,14,14,160) w4 = tf.Variable(conv_ortho_weights(3,3,160,160)) b4 = tf.Variable(bias_variable([160])) l4 = tf.nn.relu(conv2d(l3, w4) + b4) maxpool2 = max_pool_2x2(l4) #layer 5 - 3x3 convolution into 320 maps with ELU activation #input size - (batchsize,7,7,160) w5 = tf.Variable(conv_ortho_weights(3,3,160,320)) b5 = tf.Variable(bias_variable([320])) l5 = tf.nn.relu(conv2d(maxpool2, w5) + b5) #layer 6 - 3x3 convolution into 320 maps with ELU activation #input size - (batchsize,7,7,320) w6 = tf.Variable(conv_ortho_weights(3,3,320,320)) b6 = tf.Variable(bias_variable([320])) l6 = tf.nn.relu(conv2d(l5, w6) + b6) #layer 7 - dense feedforward layer with 2000 ELU units, 50% dropout #input size - (batchsize,7,7,320) w7 = tf.Variable(dense_ortho_weights(7 * 7 * 320, 2000)) b7 = tf.Variable(bias_variable([2000])) flattened = tf.reshape(l6, [-1, 7 * 7 * 320]) l7 = tf.nn.relu(tf.matmul(flattened, w7) + b7) kp2 = tf.placeholder(tf.float32) drop = tf.nn.dropout(l7, kp2) #layer 8 - softmax layer into output labels #input size - (batchsize,2000) w8 = tf.Variable(dense_ortho_weights(2000, num_labels)) b8 = tf.Variable(bias_variable([num_labels])) lastLayer = tf.matmul(drop, w8) + b8 #loss, accuracy, and training functions loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=lastLayer,labels=tfy)) optimizer = tf.train.AdamOptimizer(0.0002,0.9,0.99).minimize(loss) prediction=tf.nn.softmax(lastLayer) correct_prediction = tf.equal(tf.argmax(prediction,1), tf.argmax(tfy,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #run model init_op = tf.initialize_all_variables() sess = tf.InteractiveSession() sess.run(init_op) for i in range(1,noOfIterations+1): #generate random minibatch indices = np.random.permutation(X_train.shape[0])[:batch_size] X_batch = X_train[indices,:,:,:] y_batch = y_train[indices,:] #train feed_dict = {tfx:X_batch,tfy:y_batch,kp1:0.8,kp2:0.5} l,_ = sess.run([loss,optimizer], feed_dict=feed_dict) print 'iteration %i loss: %.4f' % (i, l) #test accuracy if (i % 100 == 0): test_accuracies = [] for j in range(0,X_test.shape[0],batch_size): feed_dict={tfx:X_test[j:j+batch_size,:,:,:],tfy:y_test[j:j+batch_size,:],kp1:1.,kp2:1.} test_accuracies.append(sess.run(accuracy, feed_dict=feed_dict)*100) print 'iteration %i test accuracy: %.4f%%' % (i, np.mean(test_accuracies)) #save predictions to csv preds = [] for j in range(0,X_test.shape[0],batch_size): feed_dict={tfx:X_test[j:j+batch_size,:,:,:],kp1:1.,kp2:1.} p = sess.run(prediction, feed_dict=feed_dict) preds.extend(np.argmax(p, 1)) pred = np.array(preds) np.savetxt('predictions.txt',pred,fmt='%.0f') yelp_example/run_experiment.py +1 −1 Original line number Diff line number Diff line Loading @@ -31,6 +31,6 @@ if not cbow.check_resource_exists_endpoint('/~/yelp_academic_dataset_review.json dest_endpoint=olcf_endpoint,dest_path=path, wait_for_download=True,timeout=600) #run Naive Bayes #submit pbs print 'submitting pbs script to start experiment' olcf.qsub(pbs_script) Loading
crossbow/crossbowMount.py +1 −1 Original line number Diff line number Diff line Loading @@ -54,7 +54,7 @@ class crossbowMount(crossbowBase): - check_resource_exists(package,resource) check if a resource already exists on the CKAN server - upload_resource(package,resource,resource_path,description=None) - upload_resource(package,resource_path,description=None) upload a local resource to an existing package resource is copied to CROSSBOW_NFS/package/filename Loading
mnist_example/mnist.pbs 0 → 100644 +29 −0 Original line number Diff line number Diff line #!/bin/bash #PBS -A CSC237 #PBS -N MNIST_TEST #PBS -l walltime=3:00:00 #PBS -l nodes=1 #PBS -lpartition=gpu #clone repo/get latest cd ${HOME}/ if [ ! -d crossbow ]; then git clone https://code.ornl.gov/v33/crossbow.git else cd ${HOME}/crossbow/ git pull fi # set python paths export PATH=/ccs/home/iamshang/anaconda2/bin:/ccs/home/iamshang/anaconda2/lib/python2.7/site-packages:$PATH export PYTHONPATH=/ccs/home/iamshang/crossbow/crossbow:$PYTHONPATH # run scripts mv mnist_train.csv ${HOME}/crossbow/mnist_example/scripts/ mv mnist_test.csv ${HOME}/crossbow/mnist_example/scripts/ cd ${HOME}/crossbow/mnist_example/scripts/ python conv_net.py #cleanup cd ${HOME}/ rm mnist.pbs
mnist_example/run_experiment.py 0 → 100644 +42 −0 Original line number Diff line number Diff line from crossbowGlobus import crossbowGlobus from crossbowOlcf import crossbowOlcf import time import sys #get pbs to submit args = (sys.argv) if len(args) != 2: raise Exception("Usage: python run_experiment.py <.pbs file>") pbs_script = args[1] #connect to CKAN, make sure yelp dataset exists cbow = crossbowGlobus(api_key="eaabd7d9-3cb4-4014-85fe-73736e658472", token_file='../crossbow/refresh-tokens.json') packages = cbow.list_packages() if 'mnist' not in packages: raise Exception("mnist package is missing from CKAN") idx = packages.index("mnist") resources = cbow.list_resources(packages[idx]) if 'mnist_train.csv' not in resources: raise Exception("mnist_train.csv is missing from mnist package") if 'mnist_test.csv' not in resources: raise Exception("mnist_test.csv is missing from mnist package") #connect to OLCF before download starts olcf = crossbowOlcf('rhea') #download mnist dataset if it doesn't exist olcf_endpoint = 'ef1a9560-7ca1-11e5-992c-22000b96db58' path = '/~/' if not cbow.check_resource_exists_endpoint('/~/mnist_test.csv'): dl_id = cbow.download_resource('mnist','mnist_test.csv', dest_endpoint=olcf_endpoint,dest_path=path, wait_for_download=True,timeout=100) if not cbow.check_resource_exists_endpoint('/~/mnist_train.csv'): dl_id = cbow.download_resource('mnist','mnist_train.csv', dest_endpoint=olcf_endpoint,dest_path=path, wait_for_download=True,timeout=100) #submit pbs print 'submitting pbs script to start experiment' olcf.qsub(pbs_script)
mnist_example/scripts/conv_net.py 0 → 100644 +164 −0 Original line number Diff line number Diff line import numpy as np import pandas as pd import tensorflow as tf #load data train = pd.read_csv('mnist_train.csv',header=None) y_train = np.array(train.iloc[:,0]) X_train = np.array(train.iloc[:,1:])/255. test = pd.read_csv('mnist_test.csv',header=None) y_test = np.array(test.iloc[:,0]) X_test = np.array(test.iloc[:,1:])/255. #one hot encoding for labels def one_hot(y): retVal = np.zeros((len(y), 10)) retVal[np.arange(len(y)), y.astype(int)] = 1 return retVal y_train = one_hot(y_train) y_test = one_hot(y_test) #reshape data into tensorflow image format X_train = X_train.reshape(len(X_train),28,28,1) X_test = X_test.reshape(len(X_test),28,28,1) #model parameters noOfIterations = 10000 image_size = 28 num_channels = 1 num_labels = 10 batch_size = 100 #layer initialization functions def conv_ortho_weights(filter_h,filter_w,chan_in,chan_out): bound = np.sqrt(6./(chan_in*filter_h*filter_w + chan_out*filter_h*filter_w)) W = np.random.random((chan_out, chan_in * filter_h * filter_w)) u, s, v = np.linalg.svd(W,full_matrices=False) if u.shape[0] != u.shape[1]: W = u.reshape((filter_h, filter_w, chan_in, chan_out)) else: W = v.reshape((filter_h, filter_w, chan_in, chan_out)) return W.astype(np.float32) def dense_ortho_weights(fan_in,fan_out): bound = np.sqrt(2./(fan_in+fan_out)) W = np.random.randn(fan_in,fan_out)*bound u, s, v = np.linalg.svd(W,full_matrices=False) if u.shape[0] != u.shape[1]: W = u else: W = v return W.astype(np.float32) def bias_variable(shape): initial = tf.constant(0.0, shape=shape) return initial def conv2d(x, W, stride=[1, 1, 1, 1]): return tf.nn.conv2d(x, W, strides=stride, padding='SAME') def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') #neural net architecture #input layer - 20% dropout tfx = tf.placeholder(tf.float32, shape=[None,image_size,image_size,num_channels]) tfy = tf.placeholder(tf.float32, shape=[None,num_labels]) kp1 = tf.placeholder(tf.float32) d1 = tf.nn.dropout(tfx, kp1) #layer 1 - 3x3 convolution into 80 maps with ELU activation #input size - (batchsize,28,28,1) w1 = tf.Variable(conv_ortho_weights(3,3,num_channels,80)) b1 = tf.Variable(bias_variable([80])) l1 = tf.nn.relu(conv2d(d1,w1) + b1) #layer 2 - 3x3 convolution into 80 maps with ELU activation, then 2x2 maxpool #input size - (batchsize,28,28,80) w2 = tf.Variable(conv_ortho_weights(3,3,80,80)) b2 = tf.Variable(bias_variable([80])) l2 = tf.nn.relu(conv2d(l1, w2) + b2) maxpool1 = max_pool_2x2(l2) #layer 3 - 3x3 convolution into 160 maps with ELU activation #input size - (batchsize,14,14,80) w3 = tf.Variable(conv_ortho_weights(3,3,80,160)) b3 = tf.Variable(bias_variable([160])) l3 = tf.nn.relu(conv2d(maxpool1, w3) + b3) #layer 4 - 3x3 convolution into 160 maps with ELU activation, then 2x2 maxpool #input size - (batchsize,14,14,160) w4 = tf.Variable(conv_ortho_weights(3,3,160,160)) b4 = tf.Variable(bias_variable([160])) l4 = tf.nn.relu(conv2d(l3, w4) + b4) maxpool2 = max_pool_2x2(l4) #layer 5 - 3x3 convolution into 320 maps with ELU activation #input size - (batchsize,7,7,160) w5 = tf.Variable(conv_ortho_weights(3,3,160,320)) b5 = tf.Variable(bias_variable([320])) l5 = tf.nn.relu(conv2d(maxpool2, w5) + b5) #layer 6 - 3x3 convolution into 320 maps with ELU activation #input size - (batchsize,7,7,320) w6 = tf.Variable(conv_ortho_weights(3,3,320,320)) b6 = tf.Variable(bias_variable([320])) l6 = tf.nn.relu(conv2d(l5, w6) + b6) #layer 7 - dense feedforward layer with 2000 ELU units, 50% dropout #input size - (batchsize,7,7,320) w7 = tf.Variable(dense_ortho_weights(7 * 7 * 320, 2000)) b7 = tf.Variable(bias_variable([2000])) flattened = tf.reshape(l6, [-1, 7 * 7 * 320]) l7 = tf.nn.relu(tf.matmul(flattened, w7) + b7) kp2 = tf.placeholder(tf.float32) drop = tf.nn.dropout(l7, kp2) #layer 8 - softmax layer into output labels #input size - (batchsize,2000) w8 = tf.Variable(dense_ortho_weights(2000, num_labels)) b8 = tf.Variable(bias_variable([num_labels])) lastLayer = tf.matmul(drop, w8) + b8 #loss, accuracy, and training functions loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=lastLayer,labels=tfy)) optimizer = tf.train.AdamOptimizer(0.0002,0.9,0.99).minimize(loss) prediction=tf.nn.softmax(lastLayer) correct_prediction = tf.equal(tf.argmax(prediction,1), tf.argmax(tfy,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #run model init_op = tf.initialize_all_variables() sess = tf.InteractiveSession() sess.run(init_op) for i in range(1,noOfIterations+1): #generate random minibatch indices = np.random.permutation(X_train.shape[0])[:batch_size] X_batch = X_train[indices,:,:,:] y_batch = y_train[indices,:] #train feed_dict = {tfx:X_batch,tfy:y_batch,kp1:0.8,kp2:0.5} l,_ = sess.run([loss,optimizer], feed_dict=feed_dict) print 'iteration %i loss: %.4f' % (i, l) #test accuracy if (i % 100 == 0): test_accuracies = [] for j in range(0,X_test.shape[0],batch_size): feed_dict={tfx:X_test[j:j+batch_size,:,:,:],tfy:y_test[j:j+batch_size,:],kp1:1.,kp2:1.} test_accuracies.append(sess.run(accuracy, feed_dict=feed_dict)*100) print 'iteration %i test accuracy: %.4f%%' % (i, np.mean(test_accuracies)) #save predictions to csv preds = [] for j in range(0,X_test.shape[0],batch_size): feed_dict={tfx:X_test[j:j+batch_size,:,:,:],kp1:1.,kp2:1.} p = sess.run(prediction, feed_dict=feed_dict) preds.extend(np.argmax(p, 1)) pred = np.array(preds) np.savetxt('predictions.txt',pred,fmt='%.0f')
yelp_example/run_experiment.py +1 −1 Original line number Diff line number Diff line Loading @@ -31,6 +31,6 @@ if not cbow.check_resource_exists_endpoint('/~/yelp_academic_dataset_review.json dest_endpoint=olcf_endpoint,dest_path=path, wait_for_download=True,timeout=600) #run Naive Bayes #submit pbs print 'submitting pbs script to start experiment' olcf.qsub(pbs_script)