Spaces:
Build error
Build error
File size: 5,570 Bytes
4cc4d15 e24946b 93e5f33 f1ae6c0 19d5657 e7699c1 4cc4d15 e0738a2 93e5f33 77129d5 e7699c1 f1ae6c0 19d5657 4cc4d15 1870c14 19d5657 f1ae6c0 d777f98 4cc4d15 bbf9596 e0738a2 1870c14 19d5657 f1ae6c0 d777f98 4cc4d15 19d5657 f1ae6c0 d777f98 4cc4d15 3611e07 93e5f33 77129d5 c215129 77129d5 7090141 77129d5 7090141 77129d5 93e5f33 1870c14 90f83ff 3611e07 77129d5 f1ae6c0 4d1745d f1ae6c0 3611e07 f1ae6c0 4d1745d 4cc4d15 f1ae6c0 19d5657 f1ae6c0 4d1745d f1ae6c0 1870c14 7090141 1870c14 f1ae6c0 77129d5 1870c14 e7699c1 93e5f33 5170bd0 93e5f33 e9377d8 e7699c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import torch, gradio as gr
from lex_rank import LexRank
from lex_rank_text2vec_v1 import LexRankText2VecV1
from lex_rank_L12 import LexRankL12
from sentence_transformers import SentenceTransformer, util
from ask_glm_4_help import GlmHelper
# ---===--- instances ---===---
embedder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
lex = LexRank()
lex_distiluse_v1 = LexRankText2VecV1()
lex_l12 = LexRankL12()
glm_helper = GlmHelper()
# 摘要方法1
def extract_handler(content, siblings, num):
if not siblings:
siblings = 0
if not num:
num = 10
siblings = int(siblings)
num = int(num)
glm_summarized_content = GlmHelper.clean_raw_content(content)
sentences = lex.find_central(glm_summarized_content, siblings=siblings, num=num)
output = f""">>>>>经过大模型清洗之后的文章为:\n{glm_summarized_content}\n\t>>>>>摘要为:\n"""
for index, sentence in enumerate(sentences):
output += f"{index}: {sentence}\n"
return output
# 摘要方法2
def extract_handler_distiluse_v1(content, siblings, num):
if not siblings:
siblings = 0
if not num:
num = 10
siblings = int(siblings)
num = int(num)
glm_summarized_content = GlmHelper.clean_raw_content(content)
sentences = lex.find_central(glm_summarized_content, siblings=siblings, num=num)
output = f""">>>>>经过大模型清洗之后的文章为:\n{glm_summarized_content}\n\t>>>>>摘要为:\n"""
for index, sentence in enumerate(sentences):
output += f"{index}: {sentence}\n"
return output
# 摘要方法3
def extract_handler_l12(content, siblings, num):
if not siblings:
siblings = 0
if not num:
num = 10
siblings = int(siblings)
num = int(num)
glm_summarized_content = GlmHelper.clean_raw_content(content)
sentences = lex.find_central(glm_summarized_content, siblings=siblings, num=num)
output = f""">>>>>经过大模型清洗之后的文章为:\n{glm_summarized_content}\n\t>>>>>摘要为:\n"""
for index, sentence in enumerate(sentences):
output += f"{index}: {sentence}\n"
return output
# 相似度检测方法
def similarity_search(queries, doc):
doc_list = doc.split('\n')
query_list = queries.split('\n')
corpus_embeddings = embedder.encode(doc_list, convert_to_tensor=True)
top_k = min(10, len(doc_list))
output = ""
for query in query_list:
query_embedding = embedder.encode(query, convert_to_tensor=True)
# We use cosine-similarity and torch.topk to find the highest 5 scores
cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0]
top_results = torch.topk(cos_scores, k=top_k)
output += "\n\n======================\n\n"
output += f"Query: {query}"
output += "\nTop 5 most similar sentences in corpus:\n"
for score, idx in zip(top_results[0], top_results[1]):
output += f"{doc_list[idx]}(Score: {score})\n"
return output
# web ui
with gr.Blocks() as app:
gr.Markdown("从下面的标签选择测试模块 [摘要生成,相似度检测]")
with gr.Tab("LexRank-mpnet"):
text_input_1 = gr.Textbox(label="请输入长文本:", lines=10, max_lines=1000)
with gr.Row():
text_button_1 = gr.Button("生成摘要")
siblings_input_1 = gr.Textbox(label="请输入摘要的宽度半径, 默认为0, 即显示摘要本身.")
num_input_1 = gr.Textbox(label="摘要的条数, 默认10条")
text_output_1 = gr.Textbox(label="摘要文本", lines=10)
with gr.Tab("shibing624/text2vec-base-chinese-paraphrase"):
text_input_2 = gr.Textbox(label="请输入长文本:", lines=10, max_lines=1000)
with gr.Row():
text_button_2 = gr.Button("生成摘要")
siblings_input_2 = gr.Textbox(label="请输入摘要的宽度半径, 默认为0, 即显示摘要本身.")
num_input_2 = gr.Textbox(label="摘要的条数, 默认10条")
text_output_2 = gr.Textbox(label="摘要文本", lines=10)
with gr.Tab("LexRank-MiniLM-L12-v2"):
text_input_3 = gr.Textbox(label="请输入长文本:", lines=10, max_lines=1000)
with gr.Row():
text_button_3 = gr.Button("生成摘要")
siblings_input_3 = gr.Textbox(label="请输入摘要的宽度半径, 默认为0, 即显示摘要本身.")
num_input_3 = gr.Textbox(label="摘要的条数, 默认10条")
text_output_3 = gr.Textbox(label="摘要文本", lines=10)
with gr.Tab("相似度检测"):
with gr.Row():
text_input_query = gr.Textbox(lines=10, label="查询文本")
text_input_doc = gr.Textbox(lines=20, label="逐行输入待比较的文本列表")
text_button_similarity = gr.Button("对比相似度")
text_output_similarity = gr.Textbox()
text_button_1.click(extract_handler, inputs=[text_input_1, siblings_input_1, num_input_1], outputs=text_output_1)
text_button_2.click(extract_handler_distiluse_v1, inputs=[text_input_2, siblings_input_2, num_input_2], outputs=text_output_2)
text_button_3.click(extract_handler_l12, inputs=[text_input_3, siblings_input_3, num_input_3], outputs=text_output_3)
text_button_similarity.click(similarity_search, inputs=[text_input_query, text_input_doc], outputs=text_output_similarity)
app.launch(
# enable share will generate a temporary public link.
# share=True,
# debug=True,
# auth=("qee", "world"),
# auth_message="请登陆"
)
|