Upload 12 files
Browse files- file/Merge_Dlsc.py +38 -0
- file/Plt.py +35 -0
- file/Preinput_Merge.py +83 -0
- file/README.md +30 -0
- file/Rfile.py +11 -0
- file/Sort_Dlkcat.py +39 -0
- file/Sort_Sco_Kcat.py +28 -0
- file/Sort_Scores.py +35 -0
- file/background.jpeg +0 -0
- file/result.jpg +0 -0
- file/test.py +65 -0
- file/tname.py +6 -0
file/Merge_Dlsc.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from tname import *
|
3 |
+
from Rfile import *
|
4 |
+
|
5 |
+
|
6 |
+
def Merge_Dlsc(sc_file, dl_file):
|
7 |
+
scontents = j_reads(sc_file.name)
|
8 |
+
scores = []
|
9 |
+
|
10 |
+
# 读取并保存第一个scores值
|
11 |
+
content = re.match('.*score=(\d.\d+?),', scontents[0])
|
12 |
+
score = content.group(1)
|
13 |
+
scores.append(float(score))
|
14 |
+
|
15 |
+
# 保存剩下的scores值
|
16 |
+
for i in range(2, len(scontents)):
|
17 |
+
if i % 2 == 0:
|
18 |
+
# 使用正则表达式
|
19 |
+
content = re.match('.*score=(\d.\d+?),', scontents[i])
|
20 |
+
score = content.group(1)
|
21 |
+
scores.append(float(score))
|
22 |
+
|
23 |
+
dcontents = j_reads(dl_file.name)
|
24 |
+
dcontents[0] = dcontents[0].strip() + "\t" + "scores value" + "\n"
|
25 |
+
|
26 |
+
name = Name()
|
27 |
+
name = name + r"kcat_scores.tsv" # 结果文件名称
|
28 |
+
# 第一行与其它行格式不一样,单独写入
|
29 |
+
with open(name, "a") as f:
|
30 |
+
f.write(dcontents[0])
|
31 |
+
|
32 |
+
# 写入剩下的行
|
33 |
+
for i in range(1, len(dcontents)):
|
34 |
+
dcontents[i] = dcontents[i].strip() + "\t" + str(scores[i - 1]) + "\n"
|
35 |
+
with open(name, "a") as f:
|
36 |
+
f.write(dcontents[i])
|
37 |
+
|
38 |
+
return name
|
file/Plt.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from matplotlib import pyplot as plt
|
2 |
+
|
3 |
+
|
4 |
+
def Plt(file):
|
5 |
+
filereader = open(file.name, 'r')
|
6 |
+
# 可视化
|
7 |
+
Loss_list = []
|
8 |
+
Accuracy_list = []
|
9 |
+
|
10 |
+
for line in filereader.readlines():
|
11 |
+
if line[0:4] == "loss":
|
12 |
+
list = line.split()
|
13 |
+
# print(list[1])
|
14 |
+
Loss_list.append(float(list[1]))
|
15 |
+
Accuracy_list.append(float(list[3]))
|
16 |
+
|
17 |
+
print(Loss_list)
|
18 |
+
length = len(Loss_list)
|
19 |
+
x1 = range(0, length)
|
20 |
+
x2 = range(0, length)
|
21 |
+
y1 = Accuracy_list
|
22 |
+
# y2 = Loss_list[4:]
|
23 |
+
y2 = Loss_list
|
24 |
+
plt.subplot(2, 1, 1)
|
25 |
+
plt.plot(x1, y1)
|
26 |
+
plt.title('Test accuracy vs. epoches')
|
27 |
+
plt.ylabel('Test accuracy')
|
28 |
+
plt.subplot(2, 1, 2)
|
29 |
+
plt.plot(x2, y2)
|
30 |
+
plt.xlabel('Test loss vs. epoches')
|
31 |
+
plt.ylabel('Test loss')
|
32 |
+
plt.savefig("result.jpg")
|
33 |
+
# plt.show()
|
34 |
+
return "result.jpg"
|
35 |
+
|
file/Preinput_Merge.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
from tname import *
|
4 |
+
from Rfile import *
|
5 |
+
|
6 |
+
|
7 |
+
def Strip(seq_file):
|
8 |
+
contents = j_reads(seq_file.name)
|
9 |
+
ina = Name()
|
10 |
+
ina = ina + r"input.tsv" # 结果文件名称
|
11 |
+
|
12 |
+
# 去除序列文件中的换行,并写入新的文件中
|
13 |
+
for i in range(0, len(contents) - 1):
|
14 |
+
if contents[i][0] != '>' and contents[i + 1][0] != '>':
|
15 |
+
content = contents[i].split()
|
16 |
+
content = content[0]
|
17 |
+
else:
|
18 |
+
content = contents[i]
|
19 |
+
with open(ina, "a") as f:
|
20 |
+
f.write(content)
|
21 |
+
# 最后一行特殊,单独写入
|
22 |
+
with open(ina, "a") as f:
|
23 |
+
f.write(contents[len(contents) - 1])
|
24 |
+
return ina
|
25 |
+
|
26 |
+
|
27 |
+
def Merge(smi_file, seq_file):
|
28 |
+
smile = j_read(smi_file.name)
|
29 |
+
smile = smile.strip("\n")
|
30 |
+
|
31 |
+
# 读取去掉换行后的文件
|
32 |
+
contents = j_reads(seq_file.name)
|
33 |
+
|
34 |
+
name = Name()
|
35 |
+
name = name + r"kcat_input.tsv" # 结果文件名称
|
36 |
+
|
37 |
+
with open(name, "a") as f3:
|
38 |
+
f3.write("Substrate Name Substrate SMILES Protein Sequence")
|
39 |
+
f3.write("\n")
|
40 |
+
|
41 |
+
for i in range(0, len(contents)):
|
42 |
+
if i % 2 == 1:
|
43 |
+
with open(name, "a") as f3:
|
44 |
+
# 写入索引
|
45 |
+
f3.write(">seq" + str(int((i - 1) / 2)))
|
46 |
+
f3.write("\t")
|
47 |
+
# 写入smile名称
|
48 |
+
f3.write(smile)
|
49 |
+
f3.write("\t")
|
50 |
+
# 写入序列
|
51 |
+
f3.write(contents[i])
|
52 |
+
return name
|
53 |
+
|
54 |
+
|
55 |
+
def Merge_All(smi_file, seq_file):
|
56 |
+
smile = j_read(smi_file.name)
|
57 |
+
smile = smile.strip("\n")
|
58 |
+
|
59 |
+
# 读取去掉换行后的文件
|
60 |
+
contents = j_reads(seq_file.name)
|
61 |
+
|
62 |
+
name = Name()
|
63 |
+
name = name + r"kcat_input.tsv" # 结果文件名称
|
64 |
+
|
65 |
+
with open(name, "a") as f3:
|
66 |
+
f3.write("Substrate Name Substrate SMILES Protein Sequence")
|
67 |
+
f3.write("\n")
|
68 |
+
|
69 |
+
for i in range(0, len(contents)):
|
70 |
+
if i % 2 == 1:
|
71 |
+
with open(name, "a") as f3:
|
72 |
+
# 写入索引
|
73 |
+
# f3.write(">seq" + str(int((i - 1) / 2)))
|
74 |
+
info = re.sub(' ', '_', contents[i - 1])
|
75 |
+
info = re.sub('\n', '', info)
|
76 |
+
f3.write(info)
|
77 |
+
f3.write("\t")
|
78 |
+
# 写入smile名称
|
79 |
+
f3.write(smile)
|
80 |
+
f3.write("\t")
|
81 |
+
# 写入序列
|
82 |
+
f3.write(contents[i])
|
83 |
+
return name
|
file/README.md
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
```
|
2 |
+
1. Sort 模块里,可以进行dlkcat,scores值的排序,选择相关文件,点击对应按钮,即可进行排序
|
3 |
+
2. Pre Merge 模块里,进行smile 与 seq序列文件的合并,
|
4 |
+
如若序列文件中的单条序列不是在一行而是分为多行的,需处理换行符,在strip_file上传文件,点击strip按钮,会生成新的序列文件,并且注意文件除了末尾可以有一行空行,其余地方不能有多余的空行
|
5 |
+
合并smile 与 seq 文件时,有两种情况:
|
6 |
+
|
7 |
+
一种是序列格式文件如下所示的:
|
8 |
+
>1J3U_A/1-468 Crystal structure of aspartase from Bacillus sp. YM55-1 [Bacillus sp. YM55-1]
|
9 |
+
MNTDVRIEKDFLGEKEIPKDAYYGVQTIRATENFPITGYRIHPELIKSLGIVKKSAALANMEVGLLDKEVGQ
|
10 |
+
YIVKAADEVIEGKWNDQFIVDPIQGGAGTSINMNANEVIANRALELMGEEKGNYSKISPNSHVNMSQSTNDA
|
11 |
+
FPTATHIAVLSLLNQLIETTKYMQQEFMKKADEFAGVIKMGRTHLQDAVPILLGQEFEAYARVIARDIERIA
|
12 |
+
NTRNNLYDINMGATAVGTGLNADPEYISIVTEHLAKFSGHPLRSAQHLVDATQNTDCYTEVSSALKVCMINM
|
13 |
+
SKIANDLRLMASGPRAGLSEIVLPARQPGSSIMPGKVNPVMPEVMNQVAFQVFGNDLTITSASEAGQFELNV
|
14 |
+
MEPVLFFNLIQSISIMTNVFKSFTENCLKGIKANEERMKEYVEKSIGIITAINPHVGYETAAKLAREAYLTG
|
15 |
+
ESIRELCIKYGVLTEEQLNEILNPYEMIHPGIAGRK
|
16 |
+
>WP_016839137.1/1-468 aspartate ammonia-lyase [Ureibacillus thermosphaericus]
|
17 |
+
MNTDVRIEKDFLGEKEIPKDAYYGVQTIRATENFPITGYRIHPELIKSLGIVKKSAALANMEVGLLDKEVGQ
|
18 |
+
YIVKAADEVIEGKWNDQFIVDPIQGGAGTSINMNANEVIANRALELMGEEKGNYSKISPNSHVNMSQSTNDA
|
19 |
+
合并时需要保存序列的来源这些相关信息,点击Merge All 按钮
|
20 |
+
|
21 |
+
另一种点击Merge按钮
|
22 |
+
|
23 |
+
3. Merge Dlsc 模块里,进行scores 与 Dlkcat文件的合并,合并时注意两个文件的序列相对应
|
24 |
+
```
|
25 |
+
|
26 |
+
|
27 |
+
|
28 |
+
|
29 |
+
|
30 |
+
|
file/Rfile.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def j_reads(file):
|
2 |
+
with open(file, "r") as f:
|
3 |
+
contents = f.readlines()
|
4 |
+
return contents
|
5 |
+
|
6 |
+
|
7 |
+
def j_read(file):
|
8 |
+
with open(file, "r") as f:
|
9 |
+
content = f.readline()
|
10 |
+
return content
|
11 |
+
|
file/Sort_Dlkcat.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from tname import *
|
2 |
+
from Rfile import *
|
3 |
+
|
4 |
+
def Sort_Dlkcat(file):
|
5 |
+
contents = j_reads(file.name)
|
6 |
+
dlkcats = [] # 存储kcat值
|
7 |
+
# dlkcat sort
|
8 |
+
for i in range(2, len(contents)):
|
9 |
+
content = contents[i].split()
|
10 |
+
dlkcats.append(float(content[3]))
|
11 |
+
|
12 |
+
sorted_dlkcat = [] # 按kcat值从大到小对其索引进行排序
|
13 |
+
sorted_dlkcat = sorted(range(len(dlkcats)), key=lambda k: dlkcats[k], reverse=True)
|
14 |
+
|
15 |
+
name = Name()
|
16 |
+
name = name + r"kcat_sort.fa" # 结果文件名称
|
17 |
+
|
18 |
+
# 第一条序列单独写入
|
19 |
+
with open(name, "a") as f:
|
20 |
+
content = contents[1].split()
|
21 |
+
f.write(content[0])
|
22 |
+
f.write("\t")
|
23 |
+
f.write("Kcat value=")
|
24 |
+
f.write(content[3])
|
25 |
+
f.write("\n")
|
26 |
+
f.write(content[2])
|
27 |
+
f.write("\n")
|
28 |
+
for i in range(0, len(dlkcats)):
|
29 |
+
with open(name, "a") as f:
|
30 |
+
content = contents[sorted_dlkcat[i] + 2].split()
|
31 |
+
f.write(content[0])
|
32 |
+
f.write("\t")
|
33 |
+
f.write("Kcat value=")
|
34 |
+
f.write(content[3])
|
35 |
+
f.write("\n")
|
36 |
+
f.write(content[2])
|
37 |
+
f.write("\n")
|
38 |
+
|
39 |
+
return name
|
file/Sort_Sco_Kcat.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from tname import *
|
2 |
+
from Rfile import *
|
3 |
+
|
4 |
+
|
5 |
+
def Sort_Sco_Kcat(file):
|
6 |
+
dlkcats = []
|
7 |
+
contents = j_reads(file.name)
|
8 |
+
|
9 |
+
# dlkcat sort根据kcat值对文件进行排序
|
10 |
+
name = Name()
|
11 |
+
kcatname = name + r"mkcat_sort.fa" # 结果文件名称
|
12 |
+
for i in range(2, len(contents)):
|
13 |
+
content = contents[i].split()
|
14 |
+
dlkcats.append(float(content[3]))
|
15 |
+
s_dlkcat = []
|
16 |
+
s_dlkcat = sorted(range(len(dlkcats)), key=lambda k: dlkcats[k], reverse=True)
|
17 |
+
|
18 |
+
# 写入文件
|
19 |
+
with open(kcatname, "a") as f:
|
20 |
+
f.write(contents[0])
|
21 |
+
# 第一行第二行不参与排序,直接写入
|
22 |
+
f.write(contents[1])
|
23 |
+
|
24 |
+
# 写入剩下的行
|
25 |
+
for i in range(0, len(dlkcats)):
|
26 |
+
with open(kcatname, "a") as f:
|
27 |
+
f.write(contents[s_dlkcat[i]+2])
|
28 |
+
return kcatname
|
file/Sort_Scores.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from tname import *
|
3 |
+
from Rfile import *
|
4 |
+
|
5 |
+
|
6 |
+
def Sort_Scores(file):
|
7 |
+
scores = []
|
8 |
+
contents = j_reads(file.name)
|
9 |
+
|
10 |
+
for i in range(2, len(contents)):
|
11 |
+
if i % 2 == 0:
|
12 |
+
# 使用正则表达式
|
13 |
+
content = re.match('.*score=(\d.\d+?),', contents[i])
|
14 |
+
if content:
|
15 |
+
score = content.group(1)
|
16 |
+
scores.append(float(score))
|
17 |
+
|
18 |
+
na = Name()
|
19 |
+
na = na + r"scores_sort.tsv" # 结果文件名称
|
20 |
+
|
21 |
+
# 按列表scores中元素的值进行排序,并返回元素对应索引序列
|
22 |
+
sorted_id = []
|
23 |
+
sorted_id = sorted(range(len(scores)), key=lambda k: scores[k], reverse=True)
|
24 |
+
|
25 |
+
# 第一条序列和其他序列格式不一样,且第一条序列不需要排序,单独写入
|
26 |
+
with open(na, "a") as f1:
|
27 |
+
f1.write(contents[0])
|
28 |
+
f1.write(contents[1])
|
29 |
+
|
30 |
+
for i in range(0, len(scores)):
|
31 |
+
with open(na, "a") as f:
|
32 |
+
f.write(contents[sorted_id[i] * 2 + 2])
|
33 |
+
# 由于文件前两行未参与排序,所以索引要+2
|
34 |
+
f.write(contents[sorted_id[i] * 2 + 2 + 1])
|
35 |
+
return na
|
file/background.jpeg
ADDED
file/result.jpg
ADDED
file/test.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from Sort_Scores import *
|
3 |
+
from Preinput_Merge import *
|
4 |
+
from Sort_Dlkcat import *
|
5 |
+
from Merge_Dlsc import *
|
6 |
+
from Sort_Sco_Kcat import *
|
7 |
+
from Plt import *
|
8 |
+
|
9 |
+
with gr.Blocks(css=".gradio-container {background-image: url('file=background.jpeg')}") as demo:
|
10 |
+
gr.Markdown("Welcome using this demo.")
|
11 |
+
with gr.Tab("HelloWorld"):
|
12 |
+
gr.Markdown("Welcome using this demo.")
|
13 |
+
gr.Markdown("This is a succend test")
|
14 |
+
gr.Markdown("I think this demo can do some things")
|
15 |
+
gr.Markdown("在sort里,可以对scores文件,dlkcat文件,以及合并后的scores与dlkcat文件进行排序")
|
16 |
+
gr.Markdown("Pre Merge里,可以将序列文件与smi文件进行合并,合并后可以进行dlkcat值的计算,合并前若序列文件需要处理换行符也可以对其进行处理")
|
17 |
+
gr.Markdown("Merge Dlsc里,可以合并scores值文件和dlkcat文件,主义这两个文件序列需要一致")
|
18 |
+
with gr.Tab("Sort"):
|
19 |
+
file1_input = gr.File(label="输入相关文件")
|
20 |
+
file1_output = gr.File()
|
21 |
+
with gr.Row():
|
22 |
+
file1_button1 = gr.Button("Sort Scores")
|
23 |
+
file1_button2 = gr.Button("Sort Dlkcat")
|
24 |
+
file1_button3 = gr.Button("Sort Mergekcat")
|
25 |
+
|
26 |
+
with gr.Tab("Pre Merge"):
|
27 |
+
with gr.Row():
|
28 |
+
file2_input1 = gr.File(label="strip_file")
|
29 |
+
file2_input2 = gr.File(label="smi_file")
|
30 |
+
file2_input3 = gr.File(label="seq_file")
|
31 |
+
file2_output = gr.File()
|
32 |
+
file2_button1 = gr.Button("Strip")
|
33 |
+
with gr.Row():
|
34 |
+
file2_button2 = gr.Button("Merge")
|
35 |
+
file2_button3 = gr.Button("Merge All")
|
36 |
+
|
37 |
+
with gr.Tab("Merge Dlsc"):
|
38 |
+
with gr.Row():
|
39 |
+
file4_input1 = gr.File(label="sc_file")
|
40 |
+
file4_input2 = gr.File(label="cat_file")
|
41 |
+
file4_output = gr.File()
|
42 |
+
file4_button = gr.Button("Merge")
|
43 |
+
|
44 |
+
with gr.Tab("Plt Picture"):
|
45 |
+
file5_input = gr.File(label="log_file")
|
46 |
+
file5_output = gr.File()
|
47 |
+
file5_button = gr.Button("Plt")
|
48 |
+
|
49 |
+
with gr.Accordion("Open for More!"):
|
50 |
+
gr.Markdown("Look at me...")
|
51 |
+
|
52 |
+
file1_button1.click(Sort_Scores, inputs=file1_input, outputs=file1_output)
|
53 |
+
file1_button2.click(Sort_Dlkcat, inputs=file1_input, outputs=file1_output)
|
54 |
+
file1_button3.click(Sort_Sco_Kcat, inputs=file1_input, outputs=file1_output)
|
55 |
+
|
56 |
+
file2_button1.click(Strip, inputs=file2_input1, outputs=file2_output)
|
57 |
+
file2_button2.click(Merge, inputs=[file2_input2, file2_input3], outputs=file2_output)
|
58 |
+
file2_button3.click(Merge_All, inputs=[file2_input2, file2_input3], outputs=file2_output)
|
59 |
+
|
60 |
+
file4_button.click(Merge_Dlsc, inputs=[file4_input1, file4_input2], outputs=file4_output)
|
61 |
+
|
62 |
+
file5_button.click(Plt, inputs=file5_input, outputs=file5_output)
|
63 |
+
|
64 |
+
if __name__ == "__main__":
|
65 |
+
demo.launch()
|
file/tname.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
|
3 |
+
|
4 |
+
def Name():
|
5 |
+
name = time.strftime('%m-%d-%H_%M', time.localtime()) # %Y-%m-%d-%H:%M:%S 冒号会报错
|
6 |
+
return name
|