Commit 1055499d authored by David M. Rogers's avatar David M. Rogers
Browse files

Modified package_out to save cluster bests.

parent dae026f5
......@@ -26,15 +26,23 @@ def tar_iter(name, ext=None):
yield fname, name, f
# parse the first cluster from each included xml file.
# returns [ (score : float, run : int) ]
# example cluster syntax is:
# <cluster cluster_rank="1" lowest_binding_energy="-4.09" run="9" mean_binding_energy="-4.09" num_in_clus="1" />
cl = re.compile(b'\s*<cluster cluster_rank="1" lowest_binding_energy="([^"]*)" run="([^"]*)" mean_binding_energy="([^"]*)" num_in_clus="([^"]*)" />')
cl = re.compile(r'\s*<cluster cluster_rank="([^"]*)" lowest_binding_energy="([^"]*)" run="([^"]*)" mean_binding_energy="([^"]*)" num_in_clus="([^"]*)" />')
# *could* also parse runs:
# example run syntax is:
# <run rank="1" sub_rank="1" run="5" binding_energy="-3.96" cluster_rmsd="0.00" reference_rmsd="86.97" />
def xml_to_energy(f):
rank = 1
ans = []
for line in f:
m = cl.match(line)
if m:
return float(m[1]),int(m[2]),float(m[3]),int(m[4])
return None
assert rank == int(m[1])
rank += 1
ans.append( (float(m[2]), int(m[3])) )
return ans
def grep_all(f, *keys):
out = dict((k,[]) for k in keys)
......@@ -55,9 +63,23 @@ def parse_dlg(f):
tors = int( dlg['Number of rotatable bonds:'][0][26:] )
tors = np.nan
return confs, tors # en, pdbqt
return confs, tors # [(en, pdbqt)], int
# parse the xml and dlg file for the ligand name
def collect_lig(name, max_clusters=3):
with open(name+'.xml', encoding='utf-8') as f:
xml = xml_to_energy(f)
with open(name+'.dlg', encoding='utf-8') as f:
confs, tors = parse_dlg(f)
if len(xml) > max_clusters:
xml = xml[:max_clusters]
return [(en, confs[i][1]) for en,i in xml], tors
# inputstrings are all containing "DOCKED:"
# [(en,pdbqt)]
def dlg_to_confs(lines):
confs = []
conf = []
......@@ -74,7 +96,7 @@ def dlg_to_confs(lines):
confs.append( (en, conf) )
conf = []
en = None
return confs
# Basically, autodock will spit out -nruns number of poses, which I chose to output to a .pdb file so I could load them in typical molecular visualization tools. This also means that you'd need to get the energies for all the models, and sort them. This is what I was doing on a much smaller set:
......@@ -107,8 +129,9 @@ def validate_conf(conf):
return 0
return len(xyz)
def add_score(f, confs):
conf, ntor = parse_dlg(f)
def add_score(name, confs):
#conf, ntor = parse_dlg(f)
conf, ntor = collect_lig(name, len(confs))
for i in range(len(confs)):
if len(conf) < i:
......@@ -41,8 +41,7 @@ def main(argv):
for j in c: # en, crd
with open(fname) as f:
add_score(f, confs)
add_score(name, confs)
write_df(argv[2], names, confs)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment