add UI
Browse files- __pycache__/classification.cpython-39.pyc +0 -0
- __pycache__/run.cpython-39.pyc +0 -0
- __pycache__/textInput.cpython-39.pyc +0 -0
- __pycache__/util.cpython-39.pyc +0 -0
- app.py +35 -0
- classification.py +1 -1
- run.py +47 -56
- textInput.py +12 -0
__pycache__/classification.cpython-39.pyc
CHANGED
Binary files a/__pycache__/classification.cpython-39.pyc and b/__pycache__/classification.cpython-39.pyc differ
|
|
__pycache__/run.cpython-39.pyc
ADDED
Binary file (1.11 kB). View file
|
|
__pycache__/textInput.cpython-39.pyc
ADDED
Binary file (705 Bytes). View file
|
|
__pycache__/util.cpython-39.pyc
CHANGED
Binary files a/__pycache__/util.cpython-39.pyc and b/__pycache__/util.cpython-39.pyc differ
|
|
app.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import gradio as gr
|
3 |
+
import textInput
|
4 |
+
|
5 |
+
output = []
|
6 |
+
keys = []
|
7 |
+
|
8 |
+
|
9 |
+
with gr.Blocks() as demo:
|
10 |
+
#用markdown语法编辑输出一段话
|
11 |
+
gr.Markdown("# 文本分类系统")
|
12 |
+
gr.Markdown("请选择要输入的文件或填入文本")
|
13 |
+
topic_num = gr.Textbox()
|
14 |
+
max_length = gr.Textbox()
|
15 |
+
with gr.Tabs():
|
16 |
+
with gr.Tab("文本输入"):
|
17 |
+
text_input = gr.Textbox()
|
18 |
+
text_button = gr.Button("生成")
|
19 |
+
|
20 |
+
with gr.Tab("文件输入"):
|
21 |
+
gr.Markdown("目前支持的格式有PDF、Word、txt")
|
22 |
+
file_input = gr.File()
|
23 |
+
# 设置tab选项卡
|
24 |
+
with gr.Tabs():
|
25 |
+
with gr.Tab("分类页"):
|
26 |
+
text_keys_output = gr.Textbox()
|
27 |
+
|
28 |
+
with gr.Tab("摘要页"):
|
29 |
+
#Blocks特有组件,设置所有子组件按水平排列
|
30 |
+
text_ab_output = gr.Textbox()
|
31 |
+
# with gr.Accordion("Open for More!"):
|
32 |
+
# gr.Markdown("Look at me...")
|
33 |
+
text_button.click(textInput.text_dump_to_json, inputs=[text_input,topic_num,max_length], outputs=[text_keys_output,text_ab_output])
|
34 |
+
# image_button.click(flip_image, inputs=image_input, outputs=image_output)
|
35 |
+
demo.launch()
|
classification.py
CHANGED
@@ -8,7 +8,7 @@ import torch
|
|
8 |
|
9 |
def classify_by_topic(articles, central_topics):
|
10 |
|
11 |
-
#
|
12 |
def compute_similarity(articles, central_topics):
|
13 |
|
14 |
model = AutoModel.from_pretrained("distilbert-base-multilingual-cased")
|
|
|
8 |
|
9 |
def classify_by_topic(articles, central_topics):
|
10 |
|
11 |
+
# 计算与每个中心主题的相似度,返回一个矩阵
|
12 |
def compute_similarity(articles, central_topics):
|
13 |
|
14 |
model = AutoModel.from_pretrained("distilbert-base-multilingual-cased")
|
run.py
CHANGED
@@ -1,56 +1,47 @@
|
|
1 |
-
import util
|
2 |
-
import abstract
|
3 |
-
import classification
|
4 |
-
import inference
|
5 |
-
import outline
|
6 |
-
from inference import BertClassificationModel
|
7 |
-
# input:file/text,topic_num,max_length,output_choice
|
8 |
-
# output:file/text/topic_sentence
|
9 |
-
|
10 |
-
|
11 |
-
# file_process:
|
12 |
-
# in util
|
13 |
-
# read file code
|
14 |
-
# file to json_text
|
15 |
-
|
16 |
-
# convert:
|
17 |
-
# in util
|
18 |
-
# convert code
|
19 |
-
# json_text to text
|
20 |
-
|
21 |
-
# process:
|
22 |
-
# in util
|
23 |
-
# text process code
|
24 |
-
# del stop seg
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
article
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
matrix = inference.inference_matrix(title)
|
50 |
-
print(matrix)
|
51 |
-
|
52 |
-
text_outline,outline_list = outline.passage_outline(matrix,title)
|
53 |
-
print(text_outline)
|
54 |
-
|
55 |
-
output = util.formate_text(title_dict,outline_list)
|
56 |
-
print (output)
|
|
|
1 |
+
import util
|
2 |
+
import abstract
|
3 |
+
import classification
|
4 |
+
import inference
|
5 |
+
import outline
|
6 |
+
from inference import BertClassificationModel
|
7 |
+
# input:file/text,topic_num,max_length,output_choice
|
8 |
+
# output:file/text/topic_sentence
|
9 |
+
|
10 |
+
|
11 |
+
# file_process:
|
12 |
+
# in util
|
13 |
+
# read file code
|
14 |
+
# file to json_text
|
15 |
+
|
16 |
+
# convert:
|
17 |
+
# in util
|
18 |
+
# convert code
|
19 |
+
# json_text to text
|
20 |
+
|
21 |
+
# process:
|
22 |
+
# in util
|
23 |
+
# text process code
|
24 |
+
# del stop seg
|
25 |
+
|
26 |
+
def texClear(article):
|
27 |
+
sentencesCleared = [util.clean_text(sentence) for sentence in article]
|
28 |
+
return sentencesCleared
|
29 |
+
|
30 |
+
def textToAb(sentences, article, topic_num, max_length):
|
31 |
+
central_sentences = abstract.abstruct_main(sentences, topic_num)
|
32 |
+
groups = classification.classify_by_topic(article, central_sentences)
|
33 |
+
groups = util.article_to_group(groups, central_sentences)
|
34 |
+
title_dict,title = util.generation(groups, max_length)
|
35 |
+
# ans:
|
36 |
+
# {Ai_abstruct:(main_sentence,paragraph)}
|
37 |
+
|
38 |
+
matrix = inference.inference_matrix(title)
|
39 |
+
|
40 |
+
_,outline_list = outline.passage_outline(matrix,title)
|
41 |
+
|
42 |
+
output = util.formate_text(title_dict,outline_list)
|
43 |
+
keys = []
|
44 |
+
for key in title.keys():
|
45 |
+
keys.append(key)
|
46 |
+
|
47 |
+
return keys, output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
textInput.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import run
|
2 |
+
|
3 |
+
def text_dump_to_json(text):
|
4 |
+
lines = [x.strip() for x in text.split("\n") if x.strip()!='']
|
5 |
+
data = {"text":lines}
|
6 |
+
sentences = run.texClear(lines)
|
7 |
+
keys, output = run.textToAb(sentences,lines,5,50)
|
8 |
+
return keys, output
|
9 |
+
|
10 |
+
def file_dump_to_json(file):
|
11 |
+
|
12 |
+
return
|