hellopahe commited on
Commit
4cc4d15
1 Parent(s): 261e2d7

add cleaning process by GLMs.

Browse files
Files changed (2) hide show
  1. app.py +16 -11
  2. ask_glm_4_help.py +29 -0
app.py CHANGED
@@ -1,9 +1,10 @@
1
- import math, torch, gradio as gr
2
 
3
  from lex_rank import LexRank
4
  from lex_rank_text2vec_v1 import LexRankText2VecV1
5
  from lex_rank_L12 import LexRankL12
6
  from sentence_transformers import SentenceTransformer, util
 
7
 
8
 
9
  # ---===--- instances ---===---
@@ -11,6 +12,7 @@ embedder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
11
  lex = LexRank()
12
  lex_distiluse_v1 = LexRankText2VecV1()
13
  lex_l12 = LexRankL12()
 
14
 
15
 
16
  # 摘要方法1
@@ -23,9 +25,10 @@ def extract_handler(content, siblings, num):
23
  siblings = int(siblings)
24
  num = int(num)
25
 
26
- summary_length = math.ceil(len(content) / 10)
27
- sentences = lex.find_central(content, siblings=siblings, num=num)
28
- output = ""
 
29
  for index, sentence in enumerate(sentences):
30
  output += f"{index}: {sentence}\n"
31
  return output
@@ -41,9 +44,10 @@ def extract_handler_distiluse_v1(content, siblings, num):
41
  siblings = int(siblings)
42
  num = int(num)
43
 
44
- summary_length = math.ceil(len(content) / 10)
45
- sentences = lex_distiluse_v1.find_central(content, siblings=siblings, num=num)
46
- output = ""
 
47
  for index, sentence in enumerate(sentences):
48
  output += f"{index}: {sentence}\n"
49
  return output
@@ -59,9 +63,10 @@ def extract_handler_l12(content, siblings, num):
59
  siblings = int(siblings)
60
  num = int(num)
61
 
62
- summary_length = math.ceil(len(content) / 10)
63
- sentences = lex_l12.find_central(content, siblings=siblings, num=num)
64
- output = ""
 
65
  for index, sentence in enumerate(sentences):
66
  output += f"{index}: {sentence}\n"
67
  return output
@@ -103,7 +108,7 @@ with gr.Blocks() as app:
103
  with gr.Row():
104
  text_button_2 = gr.Button("生成摘要")
105
  siblings_input_2 = gr.Textbox(label="请输入摘要的宽度半径, 默认为0, 即显示摘要本身.")
106
- num_input_2 = gr.Textbox(label="label=摘要的条数, 默认10条")
107
  text_output_2 = gr.Textbox(label="摘要文本", lines=10)
108
  with gr.Tab("LexRank-MiniLM-L12-v2"):
109
  text_input_3 = gr.Textbox(label="请输入长文本:", lines=10, max_lines=1000)
 
1
+ import torch, gradio as gr
2
 
3
  from lex_rank import LexRank
4
  from lex_rank_text2vec_v1 import LexRankText2VecV1
5
  from lex_rank_L12 import LexRankL12
6
  from sentence_transformers import SentenceTransformer, util
7
+ from ask_glm_4_help import GlmHelper
8
 
9
 
10
  # ---===--- instances ---===---
 
12
  lex = LexRank()
13
  lex_distiluse_v1 = LexRankText2VecV1()
14
  lex_l12 = LexRankL12()
15
+ glm_helper = GlmHelper()
16
 
17
 
18
  # 摘要方法1
 
25
  siblings = int(siblings)
26
  num = int(num)
27
 
28
+ glm_summarized_content = GlmHelper.clean_raw_content(content)
29
+
30
+ sentences = lex.find_central(glm_summarized_content, siblings=siblings, num=num)
31
+ output = f""">>>>>经过大模型清洗之后的文章为:\n{glm_summarized_content}\n\t>>>>>摘要为:\n"""
32
  for index, sentence in enumerate(sentences):
33
  output += f"{index}: {sentence}\n"
34
  return output
 
44
  siblings = int(siblings)
45
  num = int(num)
46
 
47
+ glm_summarized_content = GlmHelper.clean_raw_content(content)
48
+
49
+ sentences = lex.find_central(glm_summarized_content, siblings=siblings, num=num)
50
+ output = f""">>>>>经过大模型清洗之后的文章为:\n{glm_summarized_content}\n\t>>>>>摘要为:\n"""
51
  for index, sentence in enumerate(sentences):
52
  output += f"{index}: {sentence}\n"
53
  return output
 
63
  siblings = int(siblings)
64
  num = int(num)
65
 
66
+ glm_summarized_content = GlmHelper.clean_raw_content(content)
67
+
68
+ sentences = lex.find_central(glm_summarized_content, siblings=siblings, num=num)
69
+ output = f""">>>>>经过大模型清洗之后的文章为:\n{glm_summarized_content}\n\t>>>>>摘要为:\n"""
70
  for index, sentence in enumerate(sentences):
71
  output += f"{index}: {sentence}\n"
72
  return output
 
108
  with gr.Row():
109
  text_button_2 = gr.Button("生成摘要")
110
  siblings_input_2 = gr.Textbox(label="请输入摘要的宽度半径, 默认为0, 即显示摘要本身.")
111
+ num_input_2 = gr.Textbox(label="摘要的条数, 默认10条")
112
  text_output_2 = gr.Textbox(label="摘要文本", lines=10)
113
  with gr.Tab("LexRank-MiniLM-L12-v2"):
114
  text_input_3 = gr.Textbox(label="请输入长文本:", lines=10, max_lines=1000)
ask_glm_4_help.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ SYS_MSG_4_CLEANING = "你是一个AI助手, 能将我给你的文章去除与主题无关的句子, 并尽量保留所有与主题相关的句子."
5
+
6
+ class GlmHelper(object):
7
+ def clean_raw_content(self, content: str):
8
+ history = []
9
+ rply = self.bot_message_handler(message=content, history=history, sys_msg=SYS_MSG_4_CLEANING)
10
+ return rply
11
+
12
+ # 携带知识库文本询问LLM
13
+ def bot_message_handler(self, message: str, history: [list], sys_msg: str):
14
+ request_body = {
15
+ "prompt": f"""
16
+ <s>[INST] <<SYS>>\n{sys_msg}\n<</SYS>>\n\n{message} [/INST]
17
+ """,
18
+ "knowledge": """
19
+ """,
20
+ "history": history,
21
+ "max_length": 2048 * 4,
22
+ }
23
+ rply = requests.post("http://region-9.autodl.pro:19567/gradio", data=json.dumps(request_body))
24
+ try:
25
+ reply_from_GLM = rply.json()["response"]
26
+ except:
27
+ reply_from_GLM = "GLM Api返回了坏的请求..."
28
+
29
+ return reply_from_GLM