Loading crossbow/crossbowBase.py +6 −0 Original line number Diff line number Diff line Loading @@ -76,8 +76,14 @@ class crossbowBase(object): - tags: list (optional) list of tag dictionaries to add to the package ''' #check if package already exists if package in self.ckan.action.package_list(): raise Exception('package already exists') #check format if " " in package or any(x.isupper() for x in package): raise Exception('package must be lowercase and without spaces') self.ckan.action.package_create( name=package,owner_org=owner_org,title=title,author=author, author_email=author_email,maintainer=maintainer, Loading crossbow/crossbowGlobus.py +1 −0 Original line number Diff line number Diff line Loading @@ -85,6 +85,7 @@ class crossbowGlobus(crossbowBase): self.client_id = '18270179-b498-4032-9187-ca646d6e29af' self.cadesdtn = 'e6bcd4fa-ac1b-11e6-9aee-22000a1e3b52' self.crossbowdtn = '586dc9b6-189a-11e7-bb9f-22000b9a448b' try: # if we already have tokens, load and use them Loading moldyn_example/scripts/conv_vae.py 0 → 100644 +376 −0 Original line number Diff line number Diff line ''' Convolutional variational autoencoder in Keras. Based off dense variational autoencoder from https://github.com/fchollet/keras/blob/master/examples/variational_autoencoder.py ''' import numpy as np from keras.layers import Input, Dense, Lambda, Flatten, Reshape, Dropout from keras.layers import Convolution2D, Conv2DTranspose from keras.models import Model from keras.optimizers import SGD, Adam, RMSprop, Adadelta from keras.callbacks import Callback, ModelCheckpoint from keras import backend as K from keras import objectives import warnings class conv_variational_autoencoder(object): ''' variational autoencoder class parameters: - image_size: tuple height and width of images - channels: int number of channels in input images - conv_layers: int number of encoding/decoding convolutional layers - feature_maps: list of ints number of output feature maps for each convolutional layer - filter_shapes: list of tuples convolutional filter shape for each convolutional layer - strides: list of tuples convolutional stride for each convolutional layer - dense_layers: int number of encoding/decoding dense layers - dense_neurons: list of ints number of neurons for each dense layer - dense_dropouts: list of float fraction of neurons to drop in each dense layer (between 0 and 1) - latent_dim: int number of dimensions for latent embedding - activation: string (default='relu') activation function to use for layers - eps_mean: float (default = 0.0) mean to use for epsilon (target distribution for embedding) - eps_std: float (default = 1.0) standard dev to use for epsilon (target distribution for embedding) methods: - train(data,batch_size,epochs=1,history=False,checkpoint=False, filepath=None) train network on given data - save(filepath) save the model weights to a file - load(filepath) load model weights from a file - return_embeddings(data) return the embeddings for given data - generate(embedding) return a generated output given a latent embedding ''' def __init__(self,image_size,channels,conv_layers,feature_maps,filter_shapes, strides,dense_layers,dense_neurons,dense_dropouts,latent_dim, activation='relu',eps_mean=0.0,eps_std=1.0): #check that arguments are proper length if len(filter_shapes)!=conv_layers: raise Exception("number of convolutional layers must equal length of \ filter_shapes list") if len(strides)!=conv_layers: raise Exception("number of convolutional layers must equal length of \ strides list") if len(feature_maps)!=conv_layers: raise Exception("number of convolutional layers must equal length of \ feature_maps list") if len(dense_neurons)!=dense_layers: raise Exception("number of dense layers must equal length of \ dense_neurons list") if len(dense_dropouts)!=dense_layers: raise Exception("number of dense layers must equal length of \ dense_dropouts list") #even shaped filters may cause problems in theano backend even_filters = [f for pair in filter_shapes for f in pair if f % 2 == 0] if K.image_dim_ordering() == 'th' and len(even_filters) > 0: warnings.warn('Even shaped filters may cause problems in Theano backend') self.eps_mean = eps_mean self.eps_std = eps_std self.image_size = image_size #define input layer if K.image_dim_ordering() == 'th': self.input = Input(shape=(channels,image_size[0],image_size[1])) else: self.input = Input(shape=(image_size[0],image_size[1],channels)) #define convolutional encoding layers self.encode_conv = [] layer = Convolution2D(feature_maps[0],filter_shapes[0],padding='same', activation=activation,strides=strides[0])(self.input) self.encode_conv.append(layer) for i in range(1,conv_layers): layer = Convolution2D(feature_maps[i],filter_shapes[i], padding='same',activation=activation, strides=strides[i])(self.encode_conv[i-1]) self.encode_conv.append(layer) #define dense encoding layers self.flat = Flatten()(self.encode_conv[-1]) self.encode_dense = [] layer = Dense(dense_neurons[0],activation=activation)\ (Dropout(dense_dropouts[0])(self.flat)) self.encode_dense.append(layer) for i in range(1,dense_layers): layer = Dense(dense_neurons[i],activation=activation)\ (Dropout(dense_dropouts[i])(self.encode_dense[i-1])) self.encode_dense.append(layer) #define embedding layer self.z_mean = Dense(latent_dim)(self.encode_dense[-1]) self.z_log_var = Dense(latent_dim)(self.encode_dense[-1]) self.z = Lambda(self._sampling, output_shape=(latent_dim,))\ ([self.z_mean, self.z_log_var]) #save all decoding layers for generation model self.all_decoding=[] #define dense decoding layers self.decode_dense = [] layer = Dense(dense_neurons[-1], activation=activation) self.all_decoding.append(layer) self.decode_dense.append(layer(self.z)) for i in range(1,dense_layers): layer = Dense(dense_neurons[-i-1],activation=activation) self.all_decoding.append(layer) self.decode_dense.append(layer(self.decode_dense[i-1])) #dummy model to get image size after encoding convolutions self.decode_conv = [] if K.image_dim_ordering() == 'th': dummy_input = np.ones((1,channels,image_size[0],image_size[1])) else: dummy_input = np.ones((1,image_size[0],image_size[1],channels)) dummy = Model(self.input, self.encode_conv[-1]) conv_size = dummy.predict(dummy_input).shape layer = Dense(conv_size[1]*conv_size[2]*conv_size[3],activation=activation) self.all_decoding.append(layer) self.decode_dense.append(layer(self.decode_dense[-1])) reshape = Reshape(conv_size[1:]) self.all_decoding.append(reshape) self.decode_conv.append(reshape(self.decode_dense[-1])) #define deconvolutional decoding layers for i in range(1,conv_layers): if K.image_dim_ordering() == 'th': dummy_input = np.ones((1,channels,image_size[0],image_size[1])) else: dummy_input = np.ones((1,image_size[0],image_size[1],channels)) dummy = Model(self.input, self.encode_conv[-i-1]) conv_size = list(dummy.predict(dummy_input).shape) if K.image_dim_ordering() == 'th': conv_size[1] = feature_maps[-i] else: conv_size[3] = feature_maps[-i] layer = Conv2DTranspose(feature_maps[-i-1],filter_shapes[-i], padding='same',activation=activation, strides=strides[-i]) self.all_decoding.append(layer) self.decode_conv.append(layer(self.decode_conv[i-1])) layer = Conv2DTranspose(channels,filter_shapes[0],padding='same', activation='sigmoid',strides=strides[0]) self.all_decoding.append(layer) self.output=layer(self.decode_conv[-1]) #build model self.model = Model(self.input, self.output) self.optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) self.model.compile(optimizer=self.optimizer, loss=self._vae_loss) print "model summary:" self.model.summary() #model for embeddings self.embedder = Model(self.input, self.z_mean) #model for generation self.decoder_input = Input(shape=(latent_dim,)) self.generation = [] self.generation.append(self.all_decoding[0](self.decoder_input)) for i in range(1, len(self.all_decoding)): self.generation.append(self.all_decoding[i](self.generation[i-1])) self.generator = Model(self.decoder_input, self.generation[-1]) def _sampling(self,args): ''' sampling function for embedding layer ''' z_mean,z_log_var = args epsilon = K.random_normal(shape=K.shape(z_mean),mean=self.eps_mean, stddev=self.eps_std) return z_mean + K.exp(z_log_var) * epsilon def _vae_loss(self,input,output): ''' loss function for variational autoencoder ''' input_flat = K.flatten(input) output_flat = K.flatten(output) xent_loss = self.image_size[0] * self.image_size[1] \ * objectives.binary_crossentropy(input_flat,output_flat) kl_loss = - 0.5 * K.mean(1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var), axis=-1) return xent_loss + kl_loss class _LossHistory(Callback): ''' loss history for training variational autoencoder ''' def on_train_begin(self, logs={}): self.losses = [] self.val_losses = [] def on_epoch_end(self, batch, logs={}): self.losses.append(logs.get('loss')) self.val_losses.append(logs.get('val_loss')) def train(self,data,batch_size,epochs=1,validation_data=None,history=False, checkpoint=False,filepath=None): ''' train network on given data parameters: - data: numpy array input data - batch_size: int number of records per batch - epochs: int (default: 1) number of epochs to train for - validation_data: tuple (optional) tuple of numpy arrays (X,y) representing validation dataZ - history: boolean (default: False) whether or not to record training history - checkpoint: boolean (default: False) whether or not to save model after each epoch - filepath: string (optional) path to save model if checkpoint is set to True outputs: None ''' if checkpoint==True and filepath==None: raise Exception("Please enter a path to save the network") callbacks = [] if history: self.history = self._LossHistory() callbacks.append(self.history) if checkpoint: callbacks.append(ModelCheckpoint(filepath)) self.model.fit(data,data,batch_size,epochs=epochs,shuffle=True, validation_data=(data,data),callbacks=callbacks) def save(self,filepath): ''' save the model weights to a file parameters: - filepath: string path to save model weights outputs: None ''' self.model.save_weights(filepath) def load(self,filepath): ''' load model weights from a file parameters: - filepath: string path from which to load model weights outputs: None ''' self.model.load_weights(filepath) def return_embeddings(self,data): ''' return the embeddings for given data parameters: - data: numpy array input data outputs: numpy array of embeddings for input data ''' return self.embedder.predict(data) def generate(self,embedding): ''' return a generated output given a latent embedding parameters: - data: numpy array latent embedding outputs: numpy array of generated output ''' return self.generator.predict(embedding) if __name__ == "__main__": import sys import os import matplotlib.pyplot as plt from scipy.stats import norm # get directory to dcd files args = (sys.argv) if len(args) != 2: raise Exception("Usage: python conv_vae.py <path to contact matrix numpy array>") #define parameters channels = 1 batch_size = 1000 conv_layers = 4 feature_maps = [256,256,256,256] filter_shapes = [(1,3),(1,3),(3,3),(3,3)] strides = [(1,1),(1,3),(1,1),(1,1)] dense_layers = 1 dense_neurons = [128] dense_dropouts = [0] latent_dim = 3 epochs = 1 train_size = 0.9 #load data print "loading data" X = np.load(args[1]) #reshape to 4d tensors image_size = X.shape[-2:] if K.image_dim_ordering() == 'th': tensor_shape = (1,image_size[0],image_size[1]) else: tensor_shape = (image_size[0],image_size[1],1) X = X.reshape((X.shape[0],) + tensor_shape) #test train split idx = int(X.shape[0]*train_size) X_train = X[:idx] X_test = X[idx:] #build autoencoder print "building variational autoencoder" autoencoder = conv_variational_autoencoder( image_size,channels,conv_layers,feature_maps,filter_shapes, strides,dense_layers,dense_neurons,dense_dropouts,latent_dim) #load saved weights if they exist if os.path.isfile("./savedweights.dat"): autoencoder.load("./savedweights.dat") else: autoencoder.train(X_train,batch_size,epochs=epochs, validation_data=(X_test,X_test), checkpoint=True,filepath="./savedweights.dat") moldyn_example/scripts/dcd2array.py 0 → 100644 +100 −0 Original line number Diff line number Diff line import gzip import os import sys import glob import numpy as np import MDAnalysis as mdanal import re import shutil # get directory to dcd files args = (sys.argv) if len(args) != 2: raise Exception("Usage: python feature_extraction.py <path to dcd files>") dcd_path = args[1] if dcd_path[-1] != '/': dcd_path += '/' # directory for results if not os.path.exists('./results'): os.makedirs('./results') # get dataset info files = glob.glob(dcd_path+'*-protein-*.dcd') n = len(files) dataset_name = os.path.basename(files[0]).split('-protein-')[0] file_basename = os.path.basename(files[0])[:-7] # process dcd files k = 0 for i in range(n): print 'processing dcd file %i of %i \r' % (i+1,n) # specify path of structure & trajectory files u0 = mdanal.Universe('%s%s.pdb' % (dcd_path,file_basename), '%s%s-0%i.dcd' % (dcd_path,file_basename,i)) #number of frames in file f = len(u0.trajectory) from MDAnalysis.analysis import contacts # crude definition of salt bridges as contacts between CA atoms CA = "(name CA and resid 1:21)" # reference groups (first frame of the trajectory, but you could also use a # separate PDB, eg crystal structure) CA0 = u0.select_atoms(CA) # running over frames per trajectory for j in range(0, (f)): # calculating and saving native contact dat files per frame # set up analysis of native contacts ("salt bridges"); salt bridges have a # distance <8 A ca = contacts.ContactAnalysis1(u0, selection=(CA, CA), refgroup=(CA0, CA0), radius=8.0, outfile='results/test%i.dat' % k) ca.run(store=True, start=j, stop=j+1, step=1) # reading zipped native contact array files inF_array = gzip.GzipFile("results/test%i.array.gz" % k, 'rb') s_array = inF_array.read() inF_array.close() # copying to another file outF_array = open("results/test%i.array" % k, 'wb') outF_array.write(s_array) outF_array.close() # removing zipped array file os.remove("results/test%i.array.gz" % k) # to next file name numerics k += 1 print "progress: %i frames" % k # concatening frames into single array print "assembling frames into single array" n = len(glob.glob('results/test*.array')) final_array = [] for i in range(n): sys.stdout.write('processing frame %i of %i \r' % (i+1,n)) sys.stdout.flush() frame = [] with open('results/test%i.array' % i, "r") as f: for line in f: data = [float(x) for x in line.split()] frame.append(data) final_array.append(frame) final_array = np.array(final_array) np.save('%s_contact_matrix' % dataset_name,final_array) print "" # concatening labels into single array print "assembling labels into single array" n = len(glob.glob('results/test*.dat')) final_array = [] for i in range(n): sys.stdout.write('processing frame %i of %i \r' % (i+1,n)) sys.stdout.flush() data = np.loadtxt('results/test%i.dat' % i) final_array.append(data) final_array = np.array(final_array) np.save('%s_labels' % dataset_name,final_array) print "" # cleanup print 'cleaning up' shutil.rmtree('./results') Loading
crossbow/crossbowBase.py +6 −0 Original line number Diff line number Diff line Loading @@ -76,8 +76,14 @@ class crossbowBase(object): - tags: list (optional) list of tag dictionaries to add to the package ''' #check if package already exists if package in self.ckan.action.package_list(): raise Exception('package already exists') #check format if " " in package or any(x.isupper() for x in package): raise Exception('package must be lowercase and without spaces') self.ckan.action.package_create( name=package,owner_org=owner_org,title=title,author=author, author_email=author_email,maintainer=maintainer, Loading
crossbow/crossbowGlobus.py +1 −0 Original line number Diff line number Diff line Loading @@ -85,6 +85,7 @@ class crossbowGlobus(crossbowBase): self.client_id = '18270179-b498-4032-9187-ca646d6e29af' self.cadesdtn = 'e6bcd4fa-ac1b-11e6-9aee-22000a1e3b52' self.crossbowdtn = '586dc9b6-189a-11e7-bb9f-22000b9a448b' try: # if we already have tokens, load and use them Loading
moldyn_example/scripts/conv_vae.py 0 → 100644 +376 −0 Original line number Diff line number Diff line ''' Convolutional variational autoencoder in Keras. Based off dense variational autoencoder from https://github.com/fchollet/keras/blob/master/examples/variational_autoencoder.py ''' import numpy as np from keras.layers import Input, Dense, Lambda, Flatten, Reshape, Dropout from keras.layers import Convolution2D, Conv2DTranspose from keras.models import Model from keras.optimizers import SGD, Adam, RMSprop, Adadelta from keras.callbacks import Callback, ModelCheckpoint from keras import backend as K from keras import objectives import warnings class conv_variational_autoencoder(object): ''' variational autoencoder class parameters: - image_size: tuple height and width of images - channels: int number of channels in input images - conv_layers: int number of encoding/decoding convolutional layers - feature_maps: list of ints number of output feature maps for each convolutional layer - filter_shapes: list of tuples convolutional filter shape for each convolutional layer - strides: list of tuples convolutional stride for each convolutional layer - dense_layers: int number of encoding/decoding dense layers - dense_neurons: list of ints number of neurons for each dense layer - dense_dropouts: list of float fraction of neurons to drop in each dense layer (between 0 and 1) - latent_dim: int number of dimensions for latent embedding - activation: string (default='relu') activation function to use for layers - eps_mean: float (default = 0.0) mean to use for epsilon (target distribution for embedding) - eps_std: float (default = 1.0) standard dev to use for epsilon (target distribution for embedding) methods: - train(data,batch_size,epochs=1,history=False,checkpoint=False, filepath=None) train network on given data - save(filepath) save the model weights to a file - load(filepath) load model weights from a file - return_embeddings(data) return the embeddings for given data - generate(embedding) return a generated output given a latent embedding ''' def __init__(self,image_size,channels,conv_layers,feature_maps,filter_shapes, strides,dense_layers,dense_neurons,dense_dropouts,latent_dim, activation='relu',eps_mean=0.0,eps_std=1.0): #check that arguments are proper length if len(filter_shapes)!=conv_layers: raise Exception("number of convolutional layers must equal length of \ filter_shapes list") if len(strides)!=conv_layers: raise Exception("number of convolutional layers must equal length of \ strides list") if len(feature_maps)!=conv_layers: raise Exception("number of convolutional layers must equal length of \ feature_maps list") if len(dense_neurons)!=dense_layers: raise Exception("number of dense layers must equal length of \ dense_neurons list") if len(dense_dropouts)!=dense_layers: raise Exception("number of dense layers must equal length of \ dense_dropouts list") #even shaped filters may cause problems in theano backend even_filters = [f for pair in filter_shapes for f in pair if f % 2 == 0] if K.image_dim_ordering() == 'th' and len(even_filters) > 0: warnings.warn('Even shaped filters may cause problems in Theano backend') self.eps_mean = eps_mean self.eps_std = eps_std self.image_size = image_size #define input layer if K.image_dim_ordering() == 'th': self.input = Input(shape=(channels,image_size[0],image_size[1])) else: self.input = Input(shape=(image_size[0],image_size[1],channels)) #define convolutional encoding layers self.encode_conv = [] layer = Convolution2D(feature_maps[0],filter_shapes[0],padding='same', activation=activation,strides=strides[0])(self.input) self.encode_conv.append(layer) for i in range(1,conv_layers): layer = Convolution2D(feature_maps[i],filter_shapes[i], padding='same',activation=activation, strides=strides[i])(self.encode_conv[i-1]) self.encode_conv.append(layer) #define dense encoding layers self.flat = Flatten()(self.encode_conv[-1]) self.encode_dense = [] layer = Dense(dense_neurons[0],activation=activation)\ (Dropout(dense_dropouts[0])(self.flat)) self.encode_dense.append(layer) for i in range(1,dense_layers): layer = Dense(dense_neurons[i],activation=activation)\ (Dropout(dense_dropouts[i])(self.encode_dense[i-1])) self.encode_dense.append(layer) #define embedding layer self.z_mean = Dense(latent_dim)(self.encode_dense[-1]) self.z_log_var = Dense(latent_dim)(self.encode_dense[-1]) self.z = Lambda(self._sampling, output_shape=(latent_dim,))\ ([self.z_mean, self.z_log_var]) #save all decoding layers for generation model self.all_decoding=[] #define dense decoding layers self.decode_dense = [] layer = Dense(dense_neurons[-1], activation=activation) self.all_decoding.append(layer) self.decode_dense.append(layer(self.z)) for i in range(1,dense_layers): layer = Dense(dense_neurons[-i-1],activation=activation) self.all_decoding.append(layer) self.decode_dense.append(layer(self.decode_dense[i-1])) #dummy model to get image size after encoding convolutions self.decode_conv = [] if K.image_dim_ordering() == 'th': dummy_input = np.ones((1,channels,image_size[0],image_size[1])) else: dummy_input = np.ones((1,image_size[0],image_size[1],channels)) dummy = Model(self.input, self.encode_conv[-1]) conv_size = dummy.predict(dummy_input).shape layer = Dense(conv_size[1]*conv_size[2]*conv_size[3],activation=activation) self.all_decoding.append(layer) self.decode_dense.append(layer(self.decode_dense[-1])) reshape = Reshape(conv_size[1:]) self.all_decoding.append(reshape) self.decode_conv.append(reshape(self.decode_dense[-1])) #define deconvolutional decoding layers for i in range(1,conv_layers): if K.image_dim_ordering() == 'th': dummy_input = np.ones((1,channels,image_size[0],image_size[1])) else: dummy_input = np.ones((1,image_size[0],image_size[1],channels)) dummy = Model(self.input, self.encode_conv[-i-1]) conv_size = list(dummy.predict(dummy_input).shape) if K.image_dim_ordering() == 'th': conv_size[1] = feature_maps[-i] else: conv_size[3] = feature_maps[-i] layer = Conv2DTranspose(feature_maps[-i-1],filter_shapes[-i], padding='same',activation=activation, strides=strides[-i]) self.all_decoding.append(layer) self.decode_conv.append(layer(self.decode_conv[i-1])) layer = Conv2DTranspose(channels,filter_shapes[0],padding='same', activation='sigmoid',strides=strides[0]) self.all_decoding.append(layer) self.output=layer(self.decode_conv[-1]) #build model self.model = Model(self.input, self.output) self.optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) self.model.compile(optimizer=self.optimizer, loss=self._vae_loss) print "model summary:" self.model.summary() #model for embeddings self.embedder = Model(self.input, self.z_mean) #model for generation self.decoder_input = Input(shape=(latent_dim,)) self.generation = [] self.generation.append(self.all_decoding[0](self.decoder_input)) for i in range(1, len(self.all_decoding)): self.generation.append(self.all_decoding[i](self.generation[i-1])) self.generator = Model(self.decoder_input, self.generation[-1]) def _sampling(self,args): ''' sampling function for embedding layer ''' z_mean,z_log_var = args epsilon = K.random_normal(shape=K.shape(z_mean),mean=self.eps_mean, stddev=self.eps_std) return z_mean + K.exp(z_log_var) * epsilon def _vae_loss(self,input,output): ''' loss function for variational autoencoder ''' input_flat = K.flatten(input) output_flat = K.flatten(output) xent_loss = self.image_size[0] * self.image_size[1] \ * objectives.binary_crossentropy(input_flat,output_flat) kl_loss = - 0.5 * K.mean(1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var), axis=-1) return xent_loss + kl_loss class _LossHistory(Callback): ''' loss history for training variational autoencoder ''' def on_train_begin(self, logs={}): self.losses = [] self.val_losses = [] def on_epoch_end(self, batch, logs={}): self.losses.append(logs.get('loss')) self.val_losses.append(logs.get('val_loss')) def train(self,data,batch_size,epochs=1,validation_data=None,history=False, checkpoint=False,filepath=None): ''' train network on given data parameters: - data: numpy array input data - batch_size: int number of records per batch - epochs: int (default: 1) number of epochs to train for - validation_data: tuple (optional) tuple of numpy arrays (X,y) representing validation dataZ - history: boolean (default: False) whether or not to record training history - checkpoint: boolean (default: False) whether or not to save model after each epoch - filepath: string (optional) path to save model if checkpoint is set to True outputs: None ''' if checkpoint==True and filepath==None: raise Exception("Please enter a path to save the network") callbacks = [] if history: self.history = self._LossHistory() callbacks.append(self.history) if checkpoint: callbacks.append(ModelCheckpoint(filepath)) self.model.fit(data,data,batch_size,epochs=epochs,shuffle=True, validation_data=(data,data),callbacks=callbacks) def save(self,filepath): ''' save the model weights to a file parameters: - filepath: string path to save model weights outputs: None ''' self.model.save_weights(filepath) def load(self,filepath): ''' load model weights from a file parameters: - filepath: string path from which to load model weights outputs: None ''' self.model.load_weights(filepath) def return_embeddings(self,data): ''' return the embeddings for given data parameters: - data: numpy array input data outputs: numpy array of embeddings for input data ''' return self.embedder.predict(data) def generate(self,embedding): ''' return a generated output given a latent embedding parameters: - data: numpy array latent embedding outputs: numpy array of generated output ''' return self.generator.predict(embedding) if __name__ == "__main__": import sys import os import matplotlib.pyplot as plt from scipy.stats import norm # get directory to dcd files args = (sys.argv) if len(args) != 2: raise Exception("Usage: python conv_vae.py <path to contact matrix numpy array>") #define parameters channels = 1 batch_size = 1000 conv_layers = 4 feature_maps = [256,256,256,256] filter_shapes = [(1,3),(1,3),(3,3),(3,3)] strides = [(1,1),(1,3),(1,1),(1,1)] dense_layers = 1 dense_neurons = [128] dense_dropouts = [0] latent_dim = 3 epochs = 1 train_size = 0.9 #load data print "loading data" X = np.load(args[1]) #reshape to 4d tensors image_size = X.shape[-2:] if K.image_dim_ordering() == 'th': tensor_shape = (1,image_size[0],image_size[1]) else: tensor_shape = (image_size[0],image_size[1],1) X = X.reshape((X.shape[0],) + tensor_shape) #test train split idx = int(X.shape[0]*train_size) X_train = X[:idx] X_test = X[idx:] #build autoencoder print "building variational autoencoder" autoencoder = conv_variational_autoencoder( image_size,channels,conv_layers,feature_maps,filter_shapes, strides,dense_layers,dense_neurons,dense_dropouts,latent_dim) #load saved weights if they exist if os.path.isfile("./savedweights.dat"): autoencoder.load("./savedweights.dat") else: autoencoder.train(X_train,batch_size,epochs=epochs, validation_data=(X_test,X_test), checkpoint=True,filepath="./savedweights.dat")
moldyn_example/scripts/dcd2array.py 0 → 100644 +100 −0 Original line number Diff line number Diff line import gzip import os import sys import glob import numpy as np import MDAnalysis as mdanal import re import shutil # get directory to dcd files args = (sys.argv) if len(args) != 2: raise Exception("Usage: python feature_extraction.py <path to dcd files>") dcd_path = args[1] if dcd_path[-1] != '/': dcd_path += '/' # directory for results if not os.path.exists('./results'): os.makedirs('./results') # get dataset info files = glob.glob(dcd_path+'*-protein-*.dcd') n = len(files) dataset_name = os.path.basename(files[0]).split('-protein-')[0] file_basename = os.path.basename(files[0])[:-7] # process dcd files k = 0 for i in range(n): print 'processing dcd file %i of %i \r' % (i+1,n) # specify path of structure & trajectory files u0 = mdanal.Universe('%s%s.pdb' % (dcd_path,file_basename), '%s%s-0%i.dcd' % (dcd_path,file_basename,i)) #number of frames in file f = len(u0.trajectory) from MDAnalysis.analysis import contacts # crude definition of salt bridges as contacts between CA atoms CA = "(name CA and resid 1:21)" # reference groups (first frame of the trajectory, but you could also use a # separate PDB, eg crystal structure) CA0 = u0.select_atoms(CA) # running over frames per trajectory for j in range(0, (f)): # calculating and saving native contact dat files per frame # set up analysis of native contacts ("salt bridges"); salt bridges have a # distance <8 A ca = contacts.ContactAnalysis1(u0, selection=(CA, CA), refgroup=(CA0, CA0), radius=8.0, outfile='results/test%i.dat' % k) ca.run(store=True, start=j, stop=j+1, step=1) # reading zipped native contact array files inF_array = gzip.GzipFile("results/test%i.array.gz" % k, 'rb') s_array = inF_array.read() inF_array.close() # copying to another file outF_array = open("results/test%i.array" % k, 'wb') outF_array.write(s_array) outF_array.close() # removing zipped array file os.remove("results/test%i.array.gz" % k) # to next file name numerics k += 1 print "progress: %i frames" % k # concatening frames into single array print "assembling frames into single array" n = len(glob.glob('results/test*.array')) final_array = [] for i in range(n): sys.stdout.write('processing frame %i of %i \r' % (i+1,n)) sys.stdout.flush() frame = [] with open('results/test%i.array' % i, "r") as f: for line in f: data = [float(x) for x in line.split()] frame.append(data) final_array.append(frame) final_array = np.array(final_array) np.save('%s_contact_matrix' % dataset_name,final_array) print "" # concatening labels into single array print "assembling labels into single array" n = len(glob.glob('results/test*.dat')) final_array = [] for i in range(n): sys.stdout.write('processing frame %i of %i \r' % (i+1,n)) sys.stdout.flush() data = np.loadtxt('results/test%i.dat' % i) final_array.append(data) final_array = np.array(final_array) np.save('%s_labels' % dataset_name,final_array) print "" # cleanup print 'cleaning up' shutil.rmtree('./results')