3v324v23 commited on
Commit
dcaa7a1
1 Parent(s): 785893b

重命名一些函数

Browse files
functional.py → core_functional.py RENAMED
@@ -4,7 +4,7 @@
4
  # 默认按钮颜色是 secondary
5
  from toolbox import clear_line_break
6
 
7
- def get_functionals():
8
  return {
9
  "英语学术润色": {
10
  # 前言
 
4
  # 默认按钮颜色是 secondary
5
  from toolbox import clear_line_break
6
 
7
+ def get_core_functions():
8
  return {
9
  "英语学术润色": {
10
  # 前言
functional_crazy.py → crazy_functional.py RENAMED
@@ -1,6 +1,6 @@
1
  from toolbox import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
2
 
3
- def get_crazy_functionals():
4
  ###################### 第一组插件 ###########################
5
  # [第一组插件]: 最早期编写的项目插件和一些demo
6
  from crazy_functions.读文章写摘要 import 读文章写摘要
@@ -97,6 +97,14 @@ def get_crazy_functionals():
97
  "Function": HotReload(下载arxiv论文并翻译摘要)
98
  }
99
  })
 
 
 
 
 
 
 
 
100
  except Exception as err:
101
  print(f'[下载arxiv论文并翻译摘要] 插件导入失败 {str(err)}')
102
 
 
1
  from toolbox import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效
2
 
3
+ def get_crazy_functions():
4
  ###################### 第一组插件 ###########################
5
  # [第一组插件]: 最早期编写的项目插件和一些demo
6
  from crazy_functions.读文章写摘要 import 读文章写摘要
 
97
  "Function": HotReload(下载arxiv论文并翻译摘要)
98
  }
99
  })
100
+ from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
101
+ function_plugins.update({
102
+ "批量翻译PDF文档(多线程)": {
103
+ "Color": "stop",
104
+ "AsButton": False, # 加入下拉菜单中
105
+ "Function": HotReload(批量翻译PDF文档)
106
+ }
107
+ })
108
  except Exception as err:
109
  print(f'[下载arxiv论文并翻译摘要] 插件导入失败 {str(err)}')
110
 
crazy_functions/crazy_utils.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+
4
+ def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
5
+ def cut(txt_tocut, must_break_at_empty_line): # 递归
6
+ if get_token_fn(txt_tocut) <= limit:
7
+ return [txt_tocut]
8
+ else:
9
+ lines = txt_tocut.split('\n')
10
+ estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
11
+ estimated_line_cut = int(estimated_line_cut)
12
+ for cnt in reversed(range(estimated_line_cut)):
13
+ if must_break_at_empty_line:
14
+ if lines[cnt] != "": continue
15
+ print(cnt)
16
+ prev = "\n".join(lines[:cnt])
17
+ post = "\n".join(lines[cnt:])
18
+ if get_token_fn(prev) < limit: break
19
+ if cnt == 0:
20
+ print('what the fuck ?')
21
+ raise RuntimeError("存在一行极长的文本!")
22
+ # print(len(post))
23
+ # 列表递归接龙
24
+ result = [prev]
25
+ result.extend(cut(post, must_break_at_empty_line))
26
+ return result
27
+ try:
28
+ return cut(txt, must_break_at_empty_line=True)
29
+ except RuntimeError:
30
+ return cut(txt, must_break_at_empty_line=False)
31
+
32
+ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
33
+ def cut(txt_tocut, must_break_at_empty_line): # 递归
34
+ if get_token_fn(txt_tocut) <= limit:
35
+ return [txt_tocut]
36
+ else:
37
+ lines = txt_tocut.split('\n')
38
+ estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
39
+ estimated_line_cut = int(estimated_line_cut)
40
+ for cnt in reversed(range(estimated_line_cut)):
41
+ if must_break_at_empty_line:
42
+ if lines[cnt] != "": continue
43
+ print(cnt)
44
+ prev = "\n".join(lines[:cnt])
45
+ post = "\n".join(lines[cnt:])
46
+ if get_token_fn(prev) < limit: break
47
+ if cnt == 0:
48
+ print('what the fuck ?')
49
+ raise RuntimeError("存在一行极长的文本!")
50
+ # print(len(post))
51
+ # 列表递归接龙
52
+ result = [prev]
53
+ result.extend(cut(post, must_break_at_empty_line))
54
+ return result
55
+ try:
56
+ return cut(txt, must_break_at_empty_line=True)
57
+ except RuntimeError:
58
+ return cut(txt, must_break_at_empty_line=False)
crazy_functions/代码重写为全英文_多线程.py CHANGED
@@ -1,6 +1,7 @@
1
  import threading
2
  from request_llm.bridge_chatgpt import predict_no_ui_long_connection
3
  from toolbox import CatchException, write_results_to_file, report_execption
 
4
 
5
  def extract_code_block_carefully(txt):
6
  splitted = txt.split('```')
@@ -10,33 +11,6 @@ def extract_code_block_carefully(txt):
10
  txt_out = '```'.join(splitted[1:-1])
11
  return txt_out
12
 
13
- def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit, must_break_at_empty_line=True):
14
- def cut(txt_tocut, must_break_at_empty_line): # 递归
15
- if get_token_fn(txt_tocut) <= limit:
16
- return [txt_tocut]
17
- else:
18
- lines = txt_tocut.split('\n')
19
- estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
20
- estimated_line_cut = int(estimated_line_cut)
21
- for cnt in reversed(range(estimated_line_cut)):
22
- if must_break_at_empty_line:
23
- if lines[cnt] != "": continue
24
- print(cnt)
25
- prev = "\n".join(lines[:cnt])
26
- post = "\n".join(lines[cnt:])
27
- if get_token_fn(prev) < limit: break
28
- if cnt == 0:
29
- print('what the f?')
30
- raise RuntimeError("存在一行极长的文本!")
31
- print(len(post))
32
- # 列表递归接龙
33
- result = [prev]
34
- result.extend(cut(post, must_break_at_empty_line))
35
- return result
36
- try:
37
- return cut(txt, must_break_at_empty_line=True)
38
- except RuntimeError:
39
- return cut(txt, must_break_at_empty_line=False)
40
 
41
 
42
  def break_txt_into_half_at_some_linebreak(txt):
 
1
  import threading
2
  from request_llm.bridge_chatgpt import predict_no_ui_long_connection
3
  from toolbox import CatchException, write_results_to_file, report_execption
4
+ from .crazy_utils import breakdown_txt_to_satisfy_token_limit
5
 
6
  def extract_code_block_carefully(txt):
7
  splitted = txt.split('```')
 
11
  txt_out = '```'.join(splitted[1:-1])
12
  return txt_out
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
 
16
  def break_txt_into_half_at_some_linebreak(txt):
crazy_functions/批量翻译PDF文档_多线程.py ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
2
+ import re
3
+ import unicodedata
4
+ fast_debug = False
5
+
6
+
7
+ def is_paragraph_break(match):
8
+ """
9
+ 根据给定的匹配结果来判断换行符是否表示段落分隔。
10
+ 如果换行符前为句子结束标志(句号,感叹号,问号),且下一个字符为大写字母,则换行符更有可能表示段落分隔。
11
+ 也可以根据之前的内容长度来判断段落是否已经足够长。
12
+ """
13
+ prev_char, next_char = match.groups()
14
+
15
+ # 句子结束标志
16
+ sentence_endings = ".!?"
17
+
18
+ # 设定一个最小段落长度阈值
19
+ min_paragraph_length = 140
20
+
21
+ if prev_char in sentence_endings and next_char.isupper() and len(match.string[:match.start(1)]) > min_paragraph_length:
22
+ return "\n\n"
23
+ else:
24
+ return " "
25
+
26
+
27
+ def normalize_text(text):
28
+ """
29
+ 通过把连字(ligatures)等文本特殊符号转换为其基本形式来对文本进行归一化处理。
30
+ 例如,将连字 "fi" 转换为 "f" 和 "i"。
31
+ """
32
+ # 对文本进行归一化处理,分解连字
33
+ normalized_text = unicodedata.normalize("NFKD", text)
34
+
35
+ # 替换其他特殊字符
36
+ cleaned_text = re.sub(r'[^\x00-\x7F]+', '', normalized_text)
37
+
38
+ return cleaned_text
39
+
40
+
41
+ def clean_text(raw_text):
42
+ """
43
+ 对从 PDF 提取出的原始文本进行清洗和格式化处理。
44
+ 1. 对原始文本进行归一化处理。
45
+ 2. 替换跨行的连词,例如 “Espe-\ncially” 转换为 “Especially”。
46
+ 3. 根据 heuristic 规则判断换行符是否是段落分隔,并相应地进行替换。
47
+ """
48
+ # 对文本进行归一化处理
49
+ normalized_text = normalize_text(raw_text)
50
+
51
+ # 替换跨行的连词
52
+ text = re.sub(r'(\w+-\n\w+)',
53
+ lambda m: m.group(1).replace('-\n', ''), normalized_text)
54
+
55
+ # 根据前后相邻字符的特点,找到原文本中的换行符
56
+ newlines = re.compile(r'(\S)\n(\S)')
57
+
58
+ # 根据 heuristic 规则,用空格或段落分隔符替换原换行符
59
+ final_text = re.sub(newlines, lambda m: m.group(
60
+ 1) + is_paragraph_break(m) + m.group(2), text)
61
+
62
+ return final_text.strip()
63
+
64
+
65
+ def read_and_clean_pdf_text(fp):
66
+ import fitz, re
67
+ import numpy as np
68
+ # file_content = ""
69
+ with fitz.open(fp) as doc:
70
+ meta_txt = []
71
+ meta_font = []
72
+ for page in doc:
73
+ # file_content += page.get_text()
74
+ text_areas = page.get_text("dict") # 获取页面上的文本信息
75
+
76
+
77
+ # # 行元提取 for each word segment with in line for each line for each block
78
+ # meta_txt.extend( [ ["".join( [wtf['text'] for wtf in l['spans'] ]) for l in t['lines'] ] for t in text_areas['blocks'] if 'lines' in t])
79
+ # meta_font.extend([ [ np.mean([wtf['size'] for wtf in l['spans'] ]) for l in t['lines'] ] for t in text_areas['blocks'] if 'lines' in t])
80
+
81
+ # 块元提取 for each word segment with in line for each line for each block
82
+ meta_txt.extend( [ " ".join(["".join( [wtf['text'] for wtf in l['spans'] ]) for l in t['lines'] ]) for t in text_areas['blocks'] if 'lines' in t])
83
+ meta_font.extend([ np.mean( [ np.mean([wtf['size'] for wtf in l['spans'] ]) for l in t['lines'] ]) for t in text_areas['blocks'] if 'lines' in t])
84
+
85
+ def 把字符太少的块清除为回车(meta_txt):
86
+ for index, block_txt in enumerate(meta_txt):
87
+ if len(block_txt) < 100:
88
+ meta_txt[index] = '\n'
89
+ return meta_txt
90
+ meta_txt = 把字符太少的块清除为回车(meta_txt)
91
+
92
+ def 清理多余的空行(meta_txt):
93
+ for index in reversed(range(1, len(meta_txt))):
94
+ if meta_txt[index] == '\n' and meta_txt[index-1] == '\n':
95
+ meta_txt.pop(index)
96
+ return meta_txt
97
+ meta_txt = 清理多余的空行(meta_txt)
98
+
99
+ def 合并小写开头的段落块(meta_txt):
100
+ def starts_with_lowercase_word(s):
101
+ pattern = r"^[a-z]+"
102
+ match = re.match(pattern, s)
103
+ if match:
104
+ return True
105
+ else:
106
+ return False
107
+ for _ in range(100):
108
+ for index, block_txt in enumerate(meta_txt):
109
+ if starts_with_lowercase_word(block_txt):
110
+ if meta_txt[index-1]!='\n': meta_txt[index-1] += ' '
111
+ else: meta_txt[index-1] = ''
112
+ meta_txt[index-1] += meta_txt[index]
113
+ meta_txt[index] = '\n'
114
+ return meta_txt
115
+ meta_txt = 合并小写开头的��落块(meta_txt)
116
+ meta_txt = 清理多余的空行(meta_txt)
117
+
118
+ meta_txt = '\n'.join(meta_txt)
119
+ # 清除重复的换行
120
+ for _ in range(5):
121
+ meta_txt = meta_txt.replace('\n\n','\n')
122
+
123
+ # 换行 -> 双换行
124
+ meta_txt = meta_txt.replace('\n', '\n\n')
125
+
126
+ # print(meta_txt)
127
+
128
+ return meta_txt
129
+
130
+ @CatchException
131
+ def 批量翻译PDF文档(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
132
+ import glob
133
+ import os
134
+
135
+ # 基本信息:功能、贡献者
136
+ chatbot.append([
137
+ "函数插件功能?",
138
+ "批量总结PDF文档。函数插件贡献者: Binary-Husky, ValeriaWong, Eralien"])
139
+ yield chatbot, history, '正常'
140
+
141
+ # 尝试导入依赖,如果缺少依赖,则给出安装建议
142
+ try:
143
+ import fitz, tiktoken
144
+ except:
145
+ report_execption(chatbot, history,
146
+ a=f"解析项目: {txt}",
147
+ b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pymupdf```。")
148
+ yield chatbot, history, '正常'
149
+ return
150
+
151
+ # 清空历史,以免输入溢出
152
+ history = []
153
+
154
+ # 检测输入参数,如没有给定输入参数,直接退出
155
+ if os.path.exists(txt):
156
+ project_folder = txt
157
+ else:
158
+ if txt == "":
159
+ txt = '空空如也的输入栏'
160
+ report_execption(chatbot, history,
161
+ a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
162
+ yield chatbot, history, '正常'
163
+ return
164
+
165
+ # 搜索需要处理的文件清单
166
+ file_manifest = [f for f in glob.glob(
167
+ f'{project_folder}/**/*.pdf', recursive=True)]
168
+
169
+ # 如果没找到任何文件
170
+ if len(file_manifest) == 0:
171
+ report_execption(chatbot, history,
172
+ a=f"解析项目: {txt}", b=f"找不到任何.tex或.pdf文件: {txt}")
173
+ yield chatbot, history, '正常'
174
+ return
175
+
176
+ # 开始正式执行任务
177
+ yield from 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
178
+
179
+
180
+ def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
181
+ import time
182
+ import glob
183
+ import os
184
+ import fitz
185
+ import tiktoken
186
+ from concurrent.futures import ThreadPoolExecutor
187
+ print('begin analysis on:', file_manifest)
188
+ for index, fp in enumerate(file_manifest):
189
+ ### 1. 读取PDF文件
190
+ file_content = read_and_clean_pdf_text(fp)
191
+ ### 2. 递归地切割PDF文件
192
+ from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
193
+ enc = tiktoken.get_encoding("gpt2")
194
+ TOKEN_LIMIT_PER_FRAGMENT = 2048
195
+ get_token_num = lambda txt: len(enc.encode(txt))
196
+ # 分解
197
+ paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
198
+ txt=file_content, get_token_fn=get_token_num, limit=TOKEN_LIMIT_PER_FRAGMENT)
199
+ print([get_token_num(frag) for frag in paper_fragments])
200
+ ### 3. 逐个段落翻译
201
+ ## 3.1. 多线程开始
202
+ from request_llm.bridge_chatgpt import predict_no_ui_long_connection
203
+ n_frag = len(paper_fragments)
204
+ # 异步原子
205
+ mutable = [["", time.time()] for _ in range(n_frag)]
206
+ # 翻译函数
207
+ def translate_(index, fragment, mutable):
208
+ i_say = f"以下是你需要翻译的文章段落:{fragment}"
209
+ # 请求gpt,需要一段时间
210
+ gpt_say = predict_no_ui_long_connection(
211
+ inputs=i_say, top_p=top_p, temperature=temperature, history=[], # ["请翻译:" if len(previous_result)!=0 else "", previous_result],
212
+ sys_prompt="请你作为一个学术翻译,负责将给定的文章段落翻译成中文,要求语言简洁、精准、凝练。你只需要给出翻译后的文本,不能重复原文。",
213
+ observe_window=mutable[index])
214
+ return gpt_say
215
+ ### 4. 异步任务开始
216
+ executor = ThreadPoolExecutor(max_workers=16)
217
+ # Submit tasks to the pool
218
+ futures = [executor.submit(translate_, index, frag, mutable) for index, frag in enumerate(paper_fragments)]
219
+
220
+ ### 5. UI主线程,在任务期间提供实时的前端显示
221
+ cnt = 0
222
+ while True:
223
+ cnt += 1
224
+ time.sleep(1)
225
+ worker_done = [h.done() for h in futures]
226
+ if all(worker_done):
227
+ executor.shutdown(); break
228
+ # 更好的UI视觉效果
229
+ observe_win = []
230
+ # 每个线程都要喂狗(看门狗)
231
+ for thread_index, _ in enumerate(worker_done):
232
+ mutable[thread_index][1] = time.time()
233
+ # 在前端打印些好玩的东西
234
+ for thread_index, _ in enumerate(worker_done):
235
+ print_something_really_funny = "[ ...`"+mutable[thread_index][0][-30:].replace('\n','').replace('```','...').replace(' ','.').replace('<br/>','.....').replace('$','.')+"`... ]"
236
+ observe_win.append(print_something_really_funny)
237
+ stat_str = ''.join([f'执行中: {obs}\n\n' if not done else '已完成\n\n' for done, obs in zip(worker_done, observe_win)])
238
+ chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt%10+1))]; msg = "正常"
239
+ yield chatbot, history, msg
240
+
241
+ # Wait for tasks to complete
242
+ results = [future.result() for future in futures]
243
+
244
+ print(results)
245
+ # full_result += gpt_say
246
+
247
+ # history.extend([fp, full_result])
248
+
249
+ res = write_results_to_file(history)
250
+ chatbot.append(("完成了吗?", res)); msg = "完成"
251
+ yield chatbot, history, msg
252
+
253
+
254
+ # if __name__ == '__main__':
255
+ # pro()
crazy_functions/高级功能函数模板.py CHANGED
@@ -14,12 +14,13 @@ def 高阶功能模板函数(txt, top_p, temperature, chatbot, history, systemPr
14
  i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。'
15
  chatbot.append((i_say, "[Local Message] waiting gpt response."))
16
  yield chatbot, history, '正常' # 由于请求gpt需要一段时间,我们先及时地做一次状态显示
17
-
18
- # history = [] 每次询问不携带之前的询问历史
19
- gpt_say = predict_no_ui_long_connection(
20
- inputs=i_say, top_p=top_p, temperature=temperature, history=[],
21
- sys_prompt="当你想发送一张照片时,请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。") # 请求gpt,需要一段时间
22
-
 
23
  chatbot[-1] = (i_say, gpt_say)
24
  history.append(i_say);history.append(gpt_say)
25
  yield chatbot, history, '正常' # 显示
 
14
  i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。'
15
  chatbot.append((i_say, "[Local Message] waiting gpt response."))
16
  yield chatbot, history, '正常' # 由于请求gpt需要一段时间,我们先及时地做一次状态显示
17
+ try:
18
+ # history = [] 每次询问不携带之前的询问历史
19
+ gpt_say = predict_no_ui_long_connection(
20
+ inputs=i_say, top_p=top_p, temperature=temperature, history=[],
21
+ sys_prompt="当你想发送一张照片时,请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。") # 请求gpt,需要一段时间
22
+ except:
23
+ print("")
24
  chatbot[-1] = (i_say, gpt_say)
25
  history.append(i_say);history.append(gpt_say)
26
  yield chatbot, history, '正常' # 显示
main.py CHANGED
@@ -23,12 +23,12 @@ except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.IN
23
  print("所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log, 请注意自我隐私保护哦!")
24
 
25
  # 一些普通功能模块
26
- from functional import get_functionals
27
- functional = get_functionals()
28
 
29
  # 高级函数插件
30
- from functional_crazy import get_crazy_functionals
31
- crazy_fns = get_crazy_functionals()
32
 
33
  # 处理markdown文本格式的转变
34
  gr.Chatbot.postprocess = format_io
 
23
  print("所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log, 请注意自我隐私保护哦!")
24
 
25
  # 一些普通功能模块
26
+ from core_functional import get_core_functions
27
+ functional = get_core_functions()
28
 
29
  # 高级函数插件
30
+ from crazy_functional import get_crazy_functions
31
+ crazy_fns = get_crazy_functions()
32
 
33
  # 处理markdown文本格式的转变
34
  gr.Chatbot.postprocess = format_io
request_llm/bridge_chatgpt.py CHANGED
@@ -12,6 +12,7 @@
12
  """
13
 
14
  import json
 
15
  import gradio as gr
16
  import logging
17
  import traceback
@@ -73,11 +74,20 @@ def predict_no_ui(inputs, top_p, temperature, history=[], sys_prompt=""):
73
 
74
  def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_prompt="", observe_window=None):
75
  """
76
- 发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免有人中途掐网线。
77
- observe_window:用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可
 
 
 
 
 
 
 
 
 
78
  """
 
79
  headers, payload = generate_payload(inputs, top_p, temperature, history, system_prompt=sys_prompt, stream=True)
80
-
81
  retry = 0
82
  while True:
83
  try:
@@ -109,10 +119,16 @@ def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_pr
109
  if "content" in delta:
110
  result += delta["content"]
111
  print(delta["content"], end='')
112
- if observe_window is not None: observe_window[0] += delta["content"]
 
 
 
 
 
 
113
  else: raise RuntimeError("意外Json结构:"+delta)
114
  if json_data['finish_reason'] == 'length':
115
- raise ConnectionAbortedError("正常结束,但显示Token不足。")
116
  return result
117
 
118
 
@@ -128,11 +144,11 @@ def predict(inputs, top_p, temperature, chatbot=[], history=[], system_prompt=''
128
  additional_fn代表点击的哪个按钮,按钮见functional.py
129
  """
130
  if additional_fn is not None:
131
- import functional
132
- importlib.reload(functional) # 热更新prompt
133
- functional = functional.get_functionals()
134
- if "PreProcess" in functional[additional_fn]: inputs = functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
135
- inputs = functional[additional_fn]["Prefix"] + inputs + functional[additional_fn]["Suffix"]
136
 
137
  if stream:
138
  raw_input = inputs
@@ -189,10 +205,10 @@ def predict(inputs, top_p, temperature, chatbot=[], history=[], system_prompt=''
189
  chunk = get_full_error(chunk, stream_response)
190
  error_msg = chunk.decode()
191
  if "reduce the length" in error_msg:
192
- chatbot[-1] = (chatbot[-1][0], "[Local Message] Input (or history) is too long, please reduce input or clear history by refreshing this page.")
193
  history = [] # 清除历史
194
  elif "Incorrect API key" in error_msg:
195
- chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key provided.")
196
  elif "exceeded your current quota" in error_msg:
197
  chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由,拒绝服务.")
198
  else:
 
12
  """
13
 
14
  import json
15
+ import time
16
  import gradio as gr
17
  import logging
18
  import traceback
 
74
 
75
  def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_prompt="", observe_window=None):
76
  """
77
+ 发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
78
+ inputs:
79
+ 是本次问询的输入
80
+ sys_prompt:
81
+ 系统静默prompt
82
+ top_p, temperature:
83
+ chatGPT的内部调优参数
84
+ history:
85
+ 是之前的对话列表
86
+ observe_window = None:
87
+ 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
88
  """
89
+ watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
90
  headers, payload = generate_payload(inputs, top_p, temperature, history, system_prompt=sys_prompt, stream=True)
 
91
  retry = 0
92
  while True:
93
  try:
 
119
  if "content" in delta:
120
  result += delta["content"]
121
  print(delta["content"], end='')
122
+ if observe_window is not None:
123
+ # 观测窗,把已经获取的数据显示出去
124
+ if len(observe_window) >= 1: observe_window[0] += delta["content"]
125
+ # 看门狗,如果超过期限没有喂狗,则终止
126
+ if len(observe_window) >= 2:
127
+ if (time.time()-observe_window[1]) > watch_dog_patience:
128
+ raise RuntimeError("程序终止。")
129
  else: raise RuntimeError("意外Json结构:"+delta)
130
  if json_data['finish_reason'] == 'length':
131
+ raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。")
132
  return result
133
 
134
 
 
144
  additional_fn代表点击的哪个按钮,按钮见functional.py
145
  """
146
  if additional_fn is not None:
147
+ import core_functional
148
+ importlib.reload(core_functional) # 热更新prompt
149
+ core_functional = core_functional.get_functions()
150
+ if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
151
+ inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
152
 
153
  if stream:
154
  raw_input = inputs
 
205
  chunk = get_full_error(chunk, stream_response)
206
  error_msg = chunk.decode()
207
  if "reduce the length" in error_msg:
208
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长,或历史数据过长. 历史缓存数据现已释放,您可以请再次尝试.")
209
  history = [] # 清除历史
210
  elif "Incorrect API key" in error_msg:
211
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由,拒绝服务.")
212
  elif "exceeded your current quota" in error_msg:
213
  chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由,拒绝服务.")
214
  else:
request_llm/bridge_tgui.py CHANGED
@@ -101,11 +101,11 @@ def predict_tgui(inputs, top_p, temperature, chatbot=[], history=[], system_prom
101
  additional_fn代表点击的哪个按钮,按钮见functional.py
102
  """
103
  if additional_fn is not None:
104
- import functional
105
- importlib.reload(functional) # 热更新prompt
106
- functional = functional.get_functionals()
107
- if "PreProcess" in functional[additional_fn]: inputs = functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
108
- inputs = functional[additional_fn]["Prefix"] + inputs + functional[additional_fn]["Suffix"]
109
 
110
  raw_input = "What I would like to say is the following: " + inputs
111
  logging.info(f'[raw_input] {raw_input}')
 
101
  additional_fn代表点击的哪个按钮,按钮见functional.py
102
  """
103
  if additional_fn is not None:
104
+ import core_functional
105
+ importlib.reload(core_functional) # 热更新prompt
106
+ core_functional = core_functional.get_functions()
107
+ if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
108
+ inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
109
 
110
  raw_input = "What I would like to say is the following: " + inputs
111
  logging.info(f'[raw_input] {raw_input}')
requirements.txt CHANGED
@@ -5,3 +5,4 @@ Markdown
5
  latex2mathml
6
  openai
7
  transformers
 
 
5
  latex2mathml
6
  openai
7
  transformers
8
+ numpy