samsl commited on
Commit
809fb87
1 Parent(s): 093738f

Add initial application

Browse files
Files changed (2) hide show
  1. app.py +49 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from pathlib import Path
4
+ from Bio import SeqIO
5
+ from dscript.pretrained import get_pretrained
6
+ from dscript.language_model import lm_embed
7
+ from tqdm.auto import tqdm
8
+
9
+ def predict(sequence_file, pairs_file):
10
+
11
+ model = get_pretrained('human_v1')
12
+ seqs = SeqIO.to_dict(SeqIO.parse(sequence_file.name, "fasta"))
13
+ if Path(pairs_file.name).suffix == ".csv":
14
+ pairs = pd.read_csv(pairs_file.name)
15
+ elif Path(pairs_file.name).suffix == ".tsv":
16
+ pairs = pd.read_csv(pairs_file.name, sep="\t")
17
+ pairs.columns = ["protein1", "protein2"]
18
+
19
+ results = []
20
+ progress = gr.Progress(track_tqdm=True)
21
+ for i, r in tqdm(pairs.iterrows(), total=len(pairs)):
22
+ prot1 = r["protein1"]
23
+ prot2 = r["protein2"]
24
+ seq1 = str(seqs[prot1].seq)
25
+ seq2 = str(seqs[prot2].seq)
26
+ lm1 = lm_embed(seq1)
27
+ lm2 = lm_embed(seq2)
28
+ interaction = model.predict(lm1, lm2).item()
29
+ results.append([prot1, prot2, interaction])
30
+ # progress((i, len(pairs)))
31
+
32
+ results = pd.DataFrame(results, columns = ["Protein 1", "Protein 2", "Interaction"])
33
+
34
+ return results
35
+
36
+ demo = gr.Interface(
37
+ fn=predict,
38
+ inputs = [
39
+ gr.File(label="Sequences (.fasta)", file_types = [".fasta"]),
40
+ gr.File(label="Pairs (.csv/.tsv)", file_types = [".csv", ".tsv"])
41
+ ],
42
+ outputs = [
43
+ gr.DataFrame(label='Results', headers=['Protein 1', 'Protein 2', 'Interaction'])
44
+ ]
45
+ )
46
+
47
+ if __name__ == "__main__":
48
+ demo.queue(max_size=20)
49
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ dscript
2
+ biopython
3
+ pandas
4
+ tqdm