simonduerr commited on
Commit
3055c36
1 Parent(s): 2aa7536

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -14
app.py CHANGED
@@ -157,7 +157,7 @@ def make_tied_positions_for_homomers(pdb_dict_list):
157
  return my_dict
158
 
159
 
160
- def align_structures(pdb1, pdb2, lenRes, index):
161
  """Take two structure and superimpose pdb1 on pdb2"""
162
  import Bio.PDB
163
  import subprocess
@@ -173,13 +173,13 @@ def align_structures(pdb1, pdb2, lenRes, index):
173
 
174
  io = Bio.PDB.PDBIO()
175
  io.set_structure(ref_structure)
176
- io.save(f"outputs/reference.pdb")
177
  io.set_structure(sample_structure)
178
- io.save(f"outputs/out_{index}_aligned.pdb")
179
  # Doing this to get around biopython CEALIGN bug
180
  # subprocess.call("pymol -c -Q -r cealign.pml", shell=True)
181
 
182
- return aligner.rms, "outputs/reference.pdb", f"outputs/out_{index}_aligned.pdb"
183
 
184
 
185
  def save_pdb(outs, filename, LEN):
@@ -198,7 +198,7 @@ def save_pdb(outs, filename, LEN):
198
 
199
 
200
  @ray.remote(num_gpus=1, max_calls=1)
201
- def run_alphafold(sequences, num_recycles):
202
  recycles = int(num_recycles)
203
  RUNNER, OPT = setup_af(sequences[0])
204
  plddts = []
@@ -232,7 +232,8 @@ def run_alphafold(sequences, num_recycles):
232
  outs, f"/home/duerr/phd/08_Code/ProteinMPNN/outputs/out_{i}.pdb", LEN
233
  )
234
  else:
235
- save_pdb(outs, f"/home/user/app/outputs/out_{i}.pdb", LEN)
 
236
  return plddts, paes, LEN
237
 
238
 
@@ -320,8 +321,10 @@ def preprocess_mol(pdb_code="", filepath=""):
320
  os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb")
321
  print(os.getcwd())
322
  print(os.listdir())
 
323
  mol = Molecule(f"{pdb_code}.pdb")
324
- mol.write("original.pdb")
 
325
  # clean messy files and only include protein itself
326
  mol.filter("protein")
327
  # renumber using moleculekit 0...len(protein)
@@ -334,8 +337,10 @@ def preprocess_mol(pdb_code="", filepath=""):
334
  indexes.append(j)
335
  j += 1
336
  df["proteinMPNN_index"] = indexes
337
- mol.write("cleaned.pdb")
338
- return "cleaned.pdb", df
 
 
339
 
340
 
341
  def assign_sasa(mol):
@@ -822,13 +827,14 @@ def update_AF(seq_dict, pdb, num_recycles, selectedResidues):
822
  plt.figure(),
823
  plt.figure(),
824
  )
 
825
 
826
- plddts, paes, num_res = ray.get(run_alphafold.remote(allSeqs, num_recycles))
827
 
828
  sequences = {}
829
  for i in range(lenSeqs):
830
  rms, input_pdb, aligned_pdb = align_structures(
831
- pdb, f"outputs/out_{i}.pdb", num_res, i
832
  )
833
  sequences[i] = {
834
  "Seq": i,
@@ -896,6 +902,7 @@ def update_AF(seq_dict, pdb, num_recycles, selectedResidues):
896
  selectedResidues,
897
  allSeqs,
898
  sequences,
 
899
  ),
900
  plotAF_plddt,
901
  pae_plots,
@@ -913,10 +920,10 @@ def read_mol(molpath):
913
 
914
 
915
  def molecule(
916
- input_pdb, aligned_pdb, lenSeqs, num_res, selectedResidues, allSeqs, sequences
917
  ):
918
 
919
- mol = read_mol("outputs/reference.pdb")
920
  options = ""
921
  pred_mol = "["
922
  seqdata = "{"
@@ -937,7 +944,7 @@ def molecule(
937
  + '"}'
938
  )
939
  options += f'<option {selected} value="{i}">sequence {i} </option>' # RMSD {sequences[i]["RMSD"]}, score {sequences[i]["Score"]}, recovery {sequences[i]["Recovery"]} pLDDT {sequences[i]["Mean pLDDT"]}
940
- p = f"outputs/out_{i}_aligned.pdb"
941
  pred_mol += f"`{read_mol(p)}`"
942
  selected = ""
943
  if i != lenSeqs - 1:
 
157
  return my_dict
158
 
159
 
160
+ def align_structures(pdb1, pdb2, lenRes, index, random_dir):
161
  """Take two structure and superimpose pdb1 on pdb2"""
162
  import Bio.PDB
163
  import subprocess
 
173
 
174
  io = Bio.PDB.PDBIO()
175
  io.set_structure(ref_structure)
176
+ io.save(f"{random_dir}/outputs/reference.pdb")
177
  io.set_structure(sample_structure)
178
+ io.save(f"{random_dir}/outputs/out_{index}_aligned.pdb")
179
  # Doing this to get around biopython CEALIGN bug
180
  # subprocess.call("pymol -c -Q -r cealign.pml", shell=True)
181
 
182
+ return aligner.rms, f"{random_dir}/outputs/reference.pdb", f"{random_dir}/outputs/out_{index}_aligned.pdb"
183
 
184
 
185
  def save_pdb(outs, filename, LEN):
 
198
 
199
 
200
  @ray.remote(num_gpus=1, max_calls=1)
201
+ def run_alphafold(sequences, num_recycles, random_dir):
202
  recycles = int(num_recycles)
203
  RUNNER, OPT = setup_af(sequences[0])
204
  plddts = []
 
232
  outs, f"/home/duerr/phd/08_Code/ProteinMPNN/outputs/out_{i}.pdb", LEN
233
  )
234
  else:
235
+ print(f"saving to {random_dir.name}")
236
+ save_pdb(outs, f"{random_dir.name}/outputs/out_{i}.pdb", LEN)
237
  return plddts, paes, LEN
238
 
239
 
 
321
  os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb")
322
  print(os.getcwd())
323
  print(os.listdir())
324
+ print(os.system(f"cat {pdb_code}.pdb"))
325
  mol = Molecule(f"{pdb_code}.pdb")
326
+ tf_original = tempfile.NamedTemporaryFile(delete=False)
327
+ mol.write(tf_original.name)
328
  # clean messy files and only include protein itself
329
  mol.filter("protein")
330
  # renumber using moleculekit 0...len(protein)
 
337
  indexes.append(j)
338
  j += 1
339
  df["proteinMPNN_index"] = indexes
340
+ tf_cleaned = tempfile.NamedTemporaryFile(delete=False)
341
+
342
+ mol.write(tf_cleaned.name)
343
+ return tf_cleaned.name, df
344
 
345
 
346
  def assign_sasa(mol):
 
827
  plt.figure(),
828
  plt.figure(),
829
  )
830
+ random_dir = tempfile.NamedTemporaryDir(delete=False)
831
 
832
+ plddts, paes, num_res = ray.get(run_alphafold.remote(allSeqs, num_recycles, random_dir ))
833
 
834
  sequences = {}
835
  for i in range(lenSeqs):
836
  rms, input_pdb, aligned_pdb = align_structures(
837
+ pdb, f"{random_dir}/outputs/out_{i}.pdb", num_res, i, random_dir.name
838
  )
839
  sequences[i] = {
840
  "Seq": i,
 
902
  selectedResidues,
903
  allSeqs,
904
  sequences,
905
+ random_dir.name
906
  ),
907
  plotAF_plddt,
908
  pae_plots,
 
920
 
921
 
922
  def molecule(
923
+ input_pdb, aligned_pdb, lenSeqs, num_res, selectedResidues, allSeqs, sequences, random_dir
924
  ):
925
 
926
+ mol = read_mol(f"{random_dir}/outputs/reference.pdb")
927
  options = ""
928
  pred_mol = "["
929
  seqdata = "{"
 
944
  + '"}'
945
  )
946
  options += f'<option {selected} value="{i}">sequence {i} </option>' # RMSD {sequences[i]["RMSD"]}, score {sequences[i]["Score"]}, recovery {sequences[i]["Recovery"]} pLDDT {sequences[i]["Mean pLDDT"]}
947
+ p = f"{random_dir}/outputs/out_{i}_aligned.pdb"
948
  pred_mol += f"`{read_mol(p)}`"
949
  selected = ""
950
  if i != lenSeqs - 1: