jie1 commited on
Commit
e1810a8
1 Parent(s): 24261c2

Upload 12 files

Browse files
file/Merge_Dlsc.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from tname import *
3
+ from Rfile import *
4
+
5
+
6
+ def Merge_Dlsc(sc_file, dl_file):
7
+ scontents = j_reads(sc_file.name)
8
+ scores = []
9
+
10
+ # 读取并保存第一个scores值
11
+ content = re.match('.*score=(\d.\d+?),', scontents[0])
12
+ score = content.group(1)
13
+ scores.append(float(score))
14
+
15
+ # 保存剩下的scores值
16
+ for i in range(2, len(scontents)):
17
+ if i % 2 == 0:
18
+ # 使用正则表达式
19
+ content = re.match('.*score=(\d.\d+?),', scontents[i])
20
+ score = content.group(1)
21
+ scores.append(float(score))
22
+
23
+ dcontents = j_reads(dl_file.name)
24
+ dcontents[0] = dcontents[0].strip() + "\t" + "scores value" + "\n"
25
+
26
+ name = Name()
27
+ name = name + r"kcat_scores.tsv" # 结果文件名称
28
+ # 第一行与其它行格式不一样,单独写入
29
+ with open(name, "a") as f:
30
+ f.write(dcontents[0])
31
+
32
+ # 写入剩下的行
33
+ for i in range(1, len(dcontents)):
34
+ dcontents[i] = dcontents[i].strip() + "\t" + str(scores[i - 1]) + "\n"
35
+ with open(name, "a") as f:
36
+ f.write(dcontents[i])
37
+
38
+ return name
file/Plt.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from matplotlib import pyplot as plt
2
+
3
+
4
+ def Plt(file):
5
+ filereader = open(file.name, 'r')
6
+ # 可视化
7
+ Loss_list = []
8
+ Accuracy_list = []
9
+
10
+ for line in filereader.readlines():
11
+ if line[0:4] == "loss":
12
+ list = line.split()
13
+ # print(list[1])
14
+ Loss_list.append(float(list[1]))
15
+ Accuracy_list.append(float(list[3]))
16
+
17
+ print(Loss_list)
18
+ length = len(Loss_list)
19
+ x1 = range(0, length)
20
+ x2 = range(0, length)
21
+ y1 = Accuracy_list
22
+ # y2 = Loss_list[4:]
23
+ y2 = Loss_list
24
+ plt.subplot(2, 1, 1)
25
+ plt.plot(x1, y1)
26
+ plt.title('Test accuracy vs. epoches')
27
+ plt.ylabel('Test accuracy')
28
+ plt.subplot(2, 1, 2)
29
+ plt.plot(x2, y2)
30
+ plt.xlabel('Test loss vs. epoches')
31
+ plt.ylabel('Test loss')
32
+ plt.savefig("result.jpg")
33
+ # plt.show()
34
+ return "result.jpg"
35
+
file/Preinput_Merge.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ from tname import *
4
+ from Rfile import *
5
+
6
+
7
+ def Strip(seq_file):
8
+ contents = j_reads(seq_file.name)
9
+ ina = Name()
10
+ ina = ina + r"input.tsv" # 结果文件名称
11
+
12
+ # 去除序列文件中的换行,并写入新的文件中
13
+ for i in range(0, len(contents) - 1):
14
+ if contents[i][0] != '>' and contents[i + 1][0] != '>':
15
+ content = contents[i].split()
16
+ content = content[0]
17
+ else:
18
+ content = contents[i]
19
+ with open(ina, "a") as f:
20
+ f.write(content)
21
+ # 最后一行特殊,单独写入
22
+ with open(ina, "a") as f:
23
+ f.write(contents[len(contents) - 1])
24
+ return ina
25
+
26
+
27
+ def Merge(smi_file, seq_file):
28
+ smile = j_read(smi_file.name)
29
+ smile = smile.strip("\n")
30
+
31
+ # 读取去掉换行后的文件
32
+ contents = j_reads(seq_file.name)
33
+
34
+ name = Name()
35
+ name = name + r"kcat_input.tsv" # 结果文件名称
36
+
37
+ with open(name, "a") as f3:
38
+ f3.write("Substrate Name Substrate SMILES Protein Sequence")
39
+ f3.write("\n")
40
+
41
+ for i in range(0, len(contents)):
42
+ if i % 2 == 1:
43
+ with open(name, "a") as f3:
44
+ # 写入索引
45
+ f3.write(">seq" + str(int((i - 1) / 2)))
46
+ f3.write("\t")
47
+ # 写入smile名称
48
+ f3.write(smile)
49
+ f3.write("\t")
50
+ # 写入序列
51
+ f3.write(contents[i])
52
+ return name
53
+
54
+
55
+ def Merge_All(smi_file, seq_file):
56
+ smile = j_read(smi_file.name)
57
+ smile = smile.strip("\n")
58
+
59
+ # 读取去掉换行后的文件
60
+ contents = j_reads(seq_file.name)
61
+
62
+ name = Name()
63
+ name = name + r"kcat_input.tsv" # 结果文件名称
64
+
65
+ with open(name, "a") as f3:
66
+ f3.write("Substrate Name Substrate SMILES Protein Sequence")
67
+ f3.write("\n")
68
+
69
+ for i in range(0, len(contents)):
70
+ if i % 2 == 1:
71
+ with open(name, "a") as f3:
72
+ # 写入索引
73
+ # f3.write(">seq" + str(int((i - 1) / 2)))
74
+ info = re.sub(' ', '_', contents[i - 1])
75
+ info = re.sub('\n', '', info)
76
+ f3.write(info)
77
+ f3.write("\t")
78
+ # 写入smile名称
79
+ f3.write(smile)
80
+ f3.write("\t")
81
+ # 写入序列
82
+ f3.write(contents[i])
83
+ return name
file/README.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```
2
+ 1. Sort 模块里,可以进行dlkcat,scores值的排序,选择相关文件,点击对应按钮,即可进行排序
3
+ 2. Pre Merge 模块里,进行smile 与 seq序列文件的合并,
4
+ 如若序列文件中的单条序列不是在一行而是分为多行的,需处理换行符,在strip_file上传文件,点击strip按钮,会生成新的序列文件,并且注意文件除了末尾可以有一行空行,其余地方不能有多余的空行
5
+ 合并smile 与 seq 文件时,有两种情况:
6
+
7
+ 一种是序列格式文件如下所示的:
8
+ >1J3U_A/1-468 Crystal structure of aspartase from Bacillus sp. YM55-1 [Bacillus sp. YM55-1]
9
+ MNTDVRIEKDFLGEKEIPKDAYYGVQTIRATENFPITGYRIHPELIKSLGIVKKSAALANMEVGLLDKEVGQ
10
+ YIVKAADEVIEGKWNDQFIVDPIQGGAGTSINMNANEVIANRALELMGEEKGNYSKISPNSHVNMSQSTNDA
11
+ FPTATHIAVLSLLNQLIETTKYMQQEFMKKADEFAGVIKMGRTHLQDAVPILLGQEFEAYARVIARDIERIA
12
+ NTRNNLYDINMGATAVGTGLNADPEYISIVTEHLAKFSGHPLRSAQHLVDATQNTDCYTEVSSALKVCMINM
13
+ SKIANDLRLMASGPRAGLSEIVLPARQPGSSIMPGKVNPVMPEVMNQVAFQVFGNDLTITSASEAGQFELNV
14
+ MEPVLFFNLIQSISIMTNVFKSFTENCLKGIKANEERMKEYVEKSIGIITAINPHVGYETAAKLAREAYLTG
15
+ ESIRELCIKYGVLTEEQLNEILNPYEMIHPGIAGRK
16
+ >WP_016839137.1/1-468 aspartate ammonia-lyase [Ureibacillus thermosphaericus]
17
+ MNTDVRIEKDFLGEKEIPKDAYYGVQTIRATENFPITGYRIHPELIKSLGIVKKSAALANMEVGLLDKEVGQ
18
+ YIVKAADEVIEGKWNDQFIVDPIQGGAGTSINMNANEVIANRALELMGEEKGNYSKISPNSHVNMSQSTNDA
19
+ 合并时需要保存序列的来源这些相关信息,点击Merge All 按钮
20
+
21
+ 另一种点击Merge按钮
22
+
23
+ 3. Merge Dlsc 模块里,进行scores 与 Dlkcat文件的合并,合并时注意两个文件的序列相对应
24
+ ```
25
+
26
+
27
+
28
+
29
+
30
+
file/Rfile.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def j_reads(file):
2
+ with open(file, "r") as f:
3
+ contents = f.readlines()
4
+ return contents
5
+
6
+
7
+ def j_read(file):
8
+ with open(file, "r") as f:
9
+ content = f.readline()
10
+ return content
11
+
file/Sort_Dlkcat.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tname import *
2
+ from Rfile import *
3
+
4
+ def Sort_Dlkcat(file):
5
+ contents = j_reads(file.name)
6
+ dlkcats = [] # 存储kcat值
7
+ # dlkcat sort
8
+ for i in range(2, len(contents)):
9
+ content = contents[i].split()
10
+ dlkcats.append(float(content[3]))
11
+
12
+ sorted_dlkcat = [] # 按kcat值从大到小对其索引进行排序
13
+ sorted_dlkcat = sorted(range(len(dlkcats)), key=lambda k: dlkcats[k], reverse=True)
14
+
15
+ name = Name()
16
+ name = name + r"kcat_sort.fa" # 结果文件名称
17
+
18
+ # 第一条序列单独写入
19
+ with open(name, "a") as f:
20
+ content = contents[1].split()
21
+ f.write(content[0])
22
+ f.write("\t")
23
+ f.write("Kcat value=")
24
+ f.write(content[3])
25
+ f.write("\n")
26
+ f.write(content[2])
27
+ f.write("\n")
28
+ for i in range(0, len(dlkcats)):
29
+ with open(name, "a") as f:
30
+ content = contents[sorted_dlkcat[i] + 2].split()
31
+ f.write(content[0])
32
+ f.write("\t")
33
+ f.write("Kcat value=")
34
+ f.write(content[3])
35
+ f.write("\n")
36
+ f.write(content[2])
37
+ f.write("\n")
38
+
39
+ return name
file/Sort_Sco_Kcat.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tname import *
2
+ from Rfile import *
3
+
4
+
5
+ def Sort_Sco_Kcat(file):
6
+ dlkcats = []
7
+ contents = j_reads(file.name)
8
+
9
+ # dlkcat sort根据kcat值对文件进行排序
10
+ name = Name()
11
+ kcatname = name + r"mkcat_sort.fa" # 结果文件名称
12
+ for i in range(2, len(contents)):
13
+ content = contents[i].split()
14
+ dlkcats.append(float(content[3]))
15
+ s_dlkcat = []
16
+ s_dlkcat = sorted(range(len(dlkcats)), key=lambda k: dlkcats[k], reverse=True)
17
+
18
+ # 写入文件
19
+ with open(kcatname, "a") as f:
20
+ f.write(contents[0])
21
+ # 第一行第二行不参与排序,直接写入
22
+ f.write(contents[1])
23
+
24
+ # 写入剩下的行
25
+ for i in range(0, len(dlkcats)):
26
+ with open(kcatname, "a") as f:
27
+ f.write(contents[s_dlkcat[i]+2])
28
+ return kcatname
file/Sort_Scores.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from tname import *
3
+ from Rfile import *
4
+
5
+
6
+ def Sort_Scores(file):
7
+ scores = []
8
+ contents = j_reads(file.name)
9
+
10
+ for i in range(2, len(contents)):
11
+ if i % 2 == 0:
12
+ # 使用正则表达式
13
+ content = re.match('.*score=(\d.\d+?),', contents[i])
14
+ if content:
15
+ score = content.group(1)
16
+ scores.append(float(score))
17
+
18
+ na = Name()
19
+ na = na + r"scores_sort.tsv" # 结果文件名称
20
+
21
+ # 按列表scores中元素的值进行排序,并返回元素对应索引序列
22
+ sorted_id = []
23
+ sorted_id = sorted(range(len(scores)), key=lambda k: scores[k], reverse=True)
24
+
25
+ # 第一条序列和其他序列格式不一样,且第一条序列不需要排序,单独写入
26
+ with open(na, "a") as f1:
27
+ f1.write(contents[0])
28
+ f1.write(contents[1])
29
+
30
+ for i in range(0, len(scores)):
31
+ with open(na, "a") as f:
32
+ f.write(contents[sorted_id[i] * 2 + 2])
33
+ # 由于文件前两行未参与排序,所以索引要+2
34
+ f.write(contents[sorted_id[i] * 2 + 2 + 1])
35
+ return na
file/background.jpeg ADDED
file/result.jpg ADDED
file/test.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from Sort_Scores import *
3
+ from Preinput_Merge import *
4
+ from Sort_Dlkcat import *
5
+ from Merge_Dlsc import *
6
+ from Sort_Sco_Kcat import *
7
+ from Plt import *
8
+
9
+ with gr.Blocks(css=".gradio-container {background-image: url('file=background.jpeg')}") as demo:
10
+ gr.Markdown("Welcome using this demo.")
11
+ with gr.Tab("HelloWorld"):
12
+ gr.Markdown("Welcome using this demo.")
13
+ gr.Markdown("This is a succend test")
14
+ gr.Markdown("I think this demo can do some things")
15
+ gr.Markdown("在sort里,可以对scores文件,dlkcat文件,以及合并后的scores与dlkcat文件进行排序")
16
+ gr.Markdown("Pre Merge里,可以将序列文件与smi文件进行合并,合并后可以进行dlkcat值的计算,合并前若序列文件需要处理换行符也可以对其进行处理")
17
+ gr.Markdown("Merge Dlsc里,可以合并scores值文件和dlkcat文件,主义这两个文件序列需要一致")
18
+ with gr.Tab("Sort"):
19
+ file1_input = gr.File(label="输入相关文件")
20
+ file1_output = gr.File()
21
+ with gr.Row():
22
+ file1_button1 = gr.Button("Sort Scores")
23
+ file1_button2 = gr.Button("Sort Dlkcat")
24
+ file1_button3 = gr.Button("Sort Mergekcat")
25
+
26
+ with gr.Tab("Pre Merge"):
27
+ with gr.Row():
28
+ file2_input1 = gr.File(label="strip_file")
29
+ file2_input2 = gr.File(label="smi_file")
30
+ file2_input3 = gr.File(label="seq_file")
31
+ file2_output = gr.File()
32
+ file2_button1 = gr.Button("Strip")
33
+ with gr.Row():
34
+ file2_button2 = gr.Button("Merge")
35
+ file2_button3 = gr.Button("Merge All")
36
+
37
+ with gr.Tab("Merge Dlsc"):
38
+ with gr.Row():
39
+ file4_input1 = gr.File(label="sc_file")
40
+ file4_input2 = gr.File(label="cat_file")
41
+ file4_output = gr.File()
42
+ file4_button = gr.Button("Merge")
43
+
44
+ with gr.Tab("Plt Picture"):
45
+ file5_input = gr.File(label="log_file")
46
+ file5_output = gr.File()
47
+ file5_button = gr.Button("Plt")
48
+
49
+ with gr.Accordion("Open for More!"):
50
+ gr.Markdown("Look at me...")
51
+
52
+ file1_button1.click(Sort_Scores, inputs=file1_input, outputs=file1_output)
53
+ file1_button2.click(Sort_Dlkcat, inputs=file1_input, outputs=file1_output)
54
+ file1_button3.click(Sort_Sco_Kcat, inputs=file1_input, outputs=file1_output)
55
+
56
+ file2_button1.click(Strip, inputs=file2_input1, outputs=file2_output)
57
+ file2_button2.click(Merge, inputs=[file2_input2, file2_input3], outputs=file2_output)
58
+ file2_button3.click(Merge_All, inputs=[file2_input2, file2_input3], outputs=file2_output)
59
+
60
+ file4_button.click(Merge_Dlsc, inputs=[file4_input1, file4_input2], outputs=file4_output)
61
+
62
+ file5_button.click(Plt, inputs=file5_input, outputs=file5_output)
63
+
64
+ if __name__ == "__main__":
65
+ demo.launch()
file/tname.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import time
2
+
3
+
4
+ def Name():
5
+ name = time.strftime('%m-%d-%H_%M', time.localtime()) # %Y-%m-%d-%H:%M:%S 冒号会报错
6
+ return name