Add application file
Browse files
app.py
ADDED
@@ -0,0 +1,640 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import shutil
|
3 |
+
|
4 |
+
from chains.local_doc_qa import LocalDocQA
|
5 |
+
from configs.model_config import *
|
6 |
+
import nltk
|
7 |
+
import models.shared as shared
|
8 |
+
from models.loader.args import parser
|
9 |
+
from models.loader import LoaderCheckPoint
|
10 |
+
import os
|
11 |
+
import pandas as pd
|
12 |
+
|
13 |
+
nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
|
14 |
+
|
15 |
+
|
16 |
+
def get_vs_list():
|
17 |
+
lst_default = ["python_bot"]
|
18 |
+
if not os.path.exists(KB_ROOT_PATH):
|
19 |
+
return lst_default
|
20 |
+
lst = os.listdir(KB_ROOT_PATH)
|
21 |
+
if not lst:
|
22 |
+
return lst_default
|
23 |
+
lst.sort()
|
24 |
+
return lst_default + lst
|
25 |
+
|
26 |
+
|
27 |
+
embedding_model_dict_list = list(embedding_model_dict.keys())
|
28 |
+
|
29 |
+
llm_model_dict_list = list(llm_model_dict.keys())
|
30 |
+
|
31 |
+
local_doc_qa = LocalDocQA()
|
32 |
+
|
33 |
+
flag_csv_logger = gr.CSVLogger()
|
34 |
+
|
35 |
+
user = "None"
|
36 |
+
|
37 |
+
users = [
|
38 |
+
("wsy", "123456"),
|
39 |
+
("wdy", "654321"),
|
40 |
+
("lhj", "123456"),
|
41 |
+
("hhy", "123456"),
|
42 |
+
("yl", "123456"),
|
43 |
+
("hy", "123456"),
|
44 |
+
]
|
45 |
+
# mode = "知识库问答"
|
46 |
+
vs_path = "/home/wsy/Langchain-chat/Langchain-Chatchat/knowledge_base"
|
47 |
+
|
48 |
+
def get_answer(query, vs_path, history, mode, score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
|
49 |
+
vector_search_top_k=VECTOR_SEARCH_TOP_K, chunk_conent: bool = True,
|
50 |
+
chunk_size=CHUNK_SIZE, streaming: bool = STREAMING):
|
51 |
+
# if mode == "Bing搜索问答":
|
52 |
+
# for resp, history in local_doc_qa.get_search_result_based_answer(
|
53 |
+
# query=query, chat_history=history, streaming=streaming):
|
54 |
+
# source = "\n\n"
|
55 |
+
# source += "".join(
|
56 |
+
# [
|
57 |
+
# f"""<details> <summary>出处 [{i + 1}] <a href="{doc.metadata["source"]}" target="_blank">{doc.metadata["source"]}</a> </summary>\n"""
|
58 |
+
# f"""{doc.page_content}\n"""
|
59 |
+
# f"""</details>"""
|
60 |
+
# for i, doc in
|
61 |
+
# enumerate(resp["source_documents"])])
|
62 |
+
# history[-1][-1] += source
|
63 |
+
# yield history, ""
|
64 |
+
if mode == "知识库问答" and vs_path is not None and os.path.exists(vs_path) and "index.faiss" in os.listdir(
|
65 |
+
vs_path):
|
66 |
+
for resp, history in local_doc_qa.get_knowledge_based_answer(
|
67 |
+
query=query, vs_path=vs_path, chat_history=history, streaming=streaming):
|
68 |
+
source = "\n\n"
|
69 |
+
source += "".join(
|
70 |
+
[f"""<details> <summary>出处 [{i + 1}] {os.path.split(doc.metadata["source"])[-1]}</summary>\n"""
|
71 |
+
f"""{doc.page_content}\n"""
|
72 |
+
f"""</details>"""
|
73 |
+
for i, doc in
|
74 |
+
enumerate(resp["source_documents"])])
|
75 |
+
history[-1][-1] += source
|
76 |
+
yield history, ""
|
77 |
+
# elif mode == "知识库测试":
|
78 |
+
# if os.path.exists(vs_path):
|
79 |
+
# resp, prompt = local_doc_qa.get_knowledge_based_conent_test(query=query, vs_path=vs_path,
|
80 |
+
# score_threshold=score_threshold,
|
81 |
+
# vector_search_top_k=vector_search_top_k,
|
82 |
+
# chunk_conent=chunk_conent,
|
83 |
+
# chunk_size=chunk_size)
|
84 |
+
# if not resp["source_documents"]:
|
85 |
+
# yield history + [[query,
|
86 |
+
# "根据您的设定,没有匹配到任何内容,请确认您设置的知识相关度 Score 阈值是否过小或其他参数是否正确。"]], ""
|
87 |
+
# else:
|
88 |
+
# source = "\n".join(
|
89 |
+
# [
|
90 |
+
# f"""<details open> <summary>【知识相关度 Score】:{doc.metadata["score"]} - 【出处{i + 1}】: {os.path.split(doc.metadata["source"])[-1]} </summary>\n"""
|
91 |
+
# f"""{doc.page_content}\n"""
|
92 |
+
# f"""</details>"""
|
93 |
+
# for i, doc in
|
94 |
+
# enumerate(resp["source_documents"])])
|
95 |
+
# history.append([query, "以下内容为知识库中满足设置条件的匹配结果:\n\n" + source])
|
96 |
+
# yield history, ""
|
97 |
+
# else:
|
98 |
+
# yield history + [[query,
|
99 |
+
# "请选择知识库后进行测试,当前未选择知识库。"]], ""
|
100 |
+
else:
|
101 |
+
|
102 |
+
answer_result_stream_result = local_doc_qa.llm_model_chain(
|
103 |
+
{"prompt": query, "history": history, "streaming": streaming})
|
104 |
+
|
105 |
+
for answer_result in answer_result_stream_result['answer_result_stream']:
|
106 |
+
resp = answer_result.llm_output["answer"]
|
107 |
+
history = answer_result.history
|
108 |
+
history[-1][-1] = resp
|
109 |
+
yield history, ""
|
110 |
+
logger.info(f"flagging: username={user},query={query},vs_path={vs_path},mode={mode},history={history}")
|
111 |
+
flag_csv_logger.flag([query, vs_path, history, mode], username=user)
|
112 |
+
|
113 |
+
|
114 |
+
def init_model():
|
115 |
+
args = parser.parse_args()
|
116 |
+
|
117 |
+
args_dict = vars(args)
|
118 |
+
shared.loaderCheckPoint = LoaderCheckPoint(args_dict)
|
119 |
+
llm_model_ins = shared.loaderLLM()
|
120 |
+
llm_model_ins.history_len = LLM_HISTORY_LEN
|
121 |
+
try:
|
122 |
+
local_doc_qa.init_cfg(llm_model=llm_model_ins)
|
123 |
+
answer_result_stream_result = local_doc_qa.llm_model_chain(
|
124 |
+
{"prompt": "你好", "history": [], "streaming": False})
|
125 |
+
|
126 |
+
for answer_result in answer_result_stream_result['answer_result_stream']:
|
127 |
+
print(answer_result.llm_output)
|
128 |
+
reply = """模型已成功加载,可以开始对话"""
|
129 |
+
logger.info(reply)
|
130 |
+
return reply
|
131 |
+
except Exception as e:
|
132 |
+
logger.error(e)
|
133 |
+
reply = """模型未成功加载,请到页面左上角"模型配置"选项卡中重新选择后点击"加载模型"按钮"""
|
134 |
+
if str(e) == "Unknown platform: darwin":
|
135 |
+
logger.info("该报错可能因为您使用的是 macOS 操作系统,需先下载模型至本地后执行 Web UI,具体方法请参考项目 README 中本地部署方法及常见问题:"
|
136 |
+
" https://github.com/imClumsyPanda/langchain-ChatGLM")
|
137 |
+
else:
|
138 |
+
logger.info(reply)
|
139 |
+
return reply
|
140 |
+
|
141 |
+
|
142 |
+
def reinit_model(llm_model, embedding_model, llm_history_len, no_remote_model, use_ptuning_v2, use_lora, top_k,
|
143 |
+
history):
|
144 |
+
try:
|
145 |
+
llm_model_ins = shared.loaderLLM(llm_model, no_remote_model, use_ptuning_v2)
|
146 |
+
llm_model_ins.history_len = llm_history_len
|
147 |
+
local_doc_qa.init_cfg(llm_model=llm_model_ins,
|
148 |
+
embedding_model=embedding_model,
|
149 |
+
top_k=top_k)
|
150 |
+
model_status = """模型已成功重新加载"""
|
151 |
+
logger.info(model_status)
|
152 |
+
except Exception as e:
|
153 |
+
logger.error(e)
|
154 |
+
model_status = """模型未成功重新加载,请到页面左上角"模型配置"选项卡中重新选择后点击"加载模型"按钮"""
|
155 |
+
logger.info(model_status)
|
156 |
+
return history + [[None, model_status]]
|
157 |
+
|
158 |
+
|
159 |
+
def get_vector_store(vs_id, files, sentence_size, history, one_conent, one_content_segmentation):
|
160 |
+
vs_path = os.path.join(KB_ROOT_PATH, vs_id, "vector_store")
|
161 |
+
filelist = []
|
162 |
+
if local_doc_qa.llm_model_chain and local_doc_qa.embeddings:
|
163 |
+
if isinstance(files, list):
|
164 |
+
for file in files:
|
165 |
+
filename = os.path.split(file.name)[-1]
|
166 |
+
shutil.move(file.name, os.path.join(KB_ROOT_PATH, vs_id, "content", filename))
|
167 |
+
filelist.append(os.path.join(KB_ROOT_PATH, vs_id, "content", filename))
|
168 |
+
vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store(filelist, vs_path, sentence_size)
|
169 |
+
else:
|
170 |
+
vs_path, loaded_files = local_doc_qa.one_knowledge_add(vs_path, files, one_conent, one_content_segmentation,
|
171 |
+
sentence_size)
|
172 |
+
if len(loaded_files):
|
173 |
+
file_status = f"已添加 {'、'.join([os.path.split(i)[-1] for i in loaded_files if i])} 内容至知识库,并已加载知识库,请开始提问"
|
174 |
+
else:
|
175 |
+
file_status = "文件未成功加载,请重新上传文件"
|
176 |
+
else:
|
177 |
+
file_status = "模型未完成加载,请先在加载模型后再导入文件"
|
178 |
+
vs_path = None
|
179 |
+
logger.info(file_status)
|
180 |
+
return vs_path, None, history + [[None, file_status]], \
|
181 |
+
gr.update(choices=local_doc_qa.list_file_from_vector_store(vs_path) if vs_path else [])
|
182 |
+
|
183 |
+
|
184 |
+
def change_vs_name_input(vs_id, history):
|
185 |
+
if vs_id == "新建知识库":
|
186 |
+
return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), None, history, \
|
187 |
+
gr.update(choices=[]), gr.update(visible=False)
|
188 |
+
else:
|
189 |
+
vs_path = os.path.join(KB_ROOT_PATH, vs_id, "vector_store")
|
190 |
+
if "index.faiss" in os.listdir(vs_path):
|
191 |
+
file_status = f"已加载知识库{vs_id},请开始提问"
|
192 |
+
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), \
|
193 |
+
vs_path, history + [[None, file_status]], \
|
194 |
+
gr.update(choices=local_doc_qa.list_file_from_vector_store(vs_path), value=[]), \
|
195 |
+
gr.update(visible=True)
|
196 |
+
else:
|
197 |
+
file_status = f"已选择知识库{vs_id},当前知识库中未上传文件,请先上传文件后,再开始提问"
|
198 |
+
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), \
|
199 |
+
vs_path, history + [[None, file_status]], \
|
200 |
+
gr.update(choices=[], value=[]), gr.update(visible=True, value=[])
|
201 |
+
|
202 |
+
|
203 |
+
knowledge_base_test_mode_info = ("【注意】\n\n"
|
204 |
+
"1. 您已进入知识库测试模式,您输入的任何对话内容都将用于进行知识库查询,"
|
205 |
+
"并仅输出知识库匹配出的内容及相似度分值和及输入的文本源路径,查询的内容并不会进入模型查询。\n\n"
|
206 |
+
"2. 知识相关度 Score 经测试,建议设置为 500 或更低,具体设置情况请结合实际使用调整。"
|
207 |
+
"""3. 使用"添加单条数据"添加文本至知识库时,内容如未分段,则内容越多越会稀释各查询内容与之关联的score阈值。\n\n"""
|
208 |
+
"4. 单条内容长度建议设置在100-150左右。\n\n"
|
209 |
+
"5. 本界面用于知识入库及知识匹配相关参数设定,但当前版本中,"
|
210 |
+
"本界面中修改的参数并不会直接修改对话界面中参数,仍需前往`configs/model_config.py`修改后生效。"
|
211 |
+
"相关参数将在后续版本中支持本界面直接修改。")
|
212 |
+
|
213 |
+
|
214 |
+
def change_mode(mode, history):
|
215 |
+
if mode == "知识库问答":
|
216 |
+
return gr.update(visible=True), gr.update(visible=False), history
|
217 |
+
# + [[None, "【注意】:您已进入知识库问答模式,您输入的任何查询都将进行知识库查询,然后会自动整理知识库关联内容进入模型查询!!!"]]
|
218 |
+
elif mode == "知识库测试":
|
219 |
+
return gr.update(visible=True), gr.update(visible=True), [[None,
|
220 |
+
knowledge_base_test_mode_info]]
|
221 |
+
else:
|
222 |
+
return gr.update(visible=False), gr.update(visible=False), history
|
223 |
+
|
224 |
+
|
225 |
+
def change_chunk_conent(mode, label_conent, history):
|
226 |
+
conent = ""
|
227 |
+
if "chunk_conent" in label_conent:
|
228 |
+
conent = "搜索结果上下文关联"
|
229 |
+
elif "one_content_segmentation" in label_conent: # 这里没用上,可以先留着
|
230 |
+
conent = "内容分段入库"
|
231 |
+
|
232 |
+
if mode:
|
233 |
+
return gr.update(visible=True), history + [[None, f"【已开启{conent}】"]]
|
234 |
+
else:
|
235 |
+
return gr.update(visible=False), history + [[None, f"【已关闭{conent}】"]]
|
236 |
+
|
237 |
+
|
238 |
+
def add_vs_name(vs_name, chatbot):
|
239 |
+
if vs_name is None or vs_name.strip() == "":
|
240 |
+
vs_status = "知识库名称不能为空,请重新填写知识库名称"
|
241 |
+
chatbot = chatbot + [[None, vs_status]]
|
242 |
+
return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(
|
243 |
+
visible=False), chatbot, gr.update(visible=False)
|
244 |
+
elif vs_name in get_vs_list():
|
245 |
+
vs_status = "与已有知识库名称冲突,请重新选择其他名称后提交"
|
246 |
+
chatbot = chatbot + [[None, vs_status]]
|
247 |
+
return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(
|
248 |
+
visible=False), chatbot, gr.update(visible=False)
|
249 |
+
else:
|
250 |
+
# 新建上传文件存储路径
|
251 |
+
if not os.path.exists(os.path.join(KB_ROOT_PATH, vs_name, "content")):
|
252 |
+
os.makedirs(os.path.join(KB_ROOT_PATH, vs_name, "content"))
|
253 |
+
# 新建向量库存储路径
|
254 |
+
if not os.path.exists(os.path.join(KB_ROOT_PATH, vs_name, "vector_store")):
|
255 |
+
os.makedirs(os.path.join(KB_ROOT_PATH, vs_name, "vector_store"))
|
256 |
+
vs_status = f"""已新增知识库"{vs_name}",将在上传文件并载入成功后进行存储。请在开始对话前,先完成文件上传。 """
|
257 |
+
chatbot = chatbot + [[None, vs_status]]
|
258 |
+
return gr.update(visible=True, choices=get_vs_list(), value=vs_name), gr.update(
|
259 |
+
visible=False), gr.update(visible=False), gr.update(visible=True), chatbot, gr.update(visible=True)
|
260 |
+
|
261 |
+
|
262 |
+
# 自动化加载固定文件间中文件
|
263 |
+
def reinit_vector_store(vs_id, history):
|
264 |
+
try:
|
265 |
+
shutil.rmtree(os.path.join(KB_ROOT_PATH, vs_id, "vector_store"))
|
266 |
+
vs_path = os.path.join(KB_ROOT_PATH, vs_id, "vector_store")
|
267 |
+
sentence_size = gr.Number(value=SENTENCE_SIZE, precision=0,
|
268 |
+
label="文本入库分句长度限制",
|
269 |
+
interactive=True, visible=True)
|
270 |
+
vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store(os.path.join(KB_ROOT_PATH, vs_id, "content"),
|
271 |
+
vs_path, sentence_size)
|
272 |
+
model_status = """知识库构建成功"""
|
273 |
+
except Exception as e:
|
274 |
+
logger.error(e)
|
275 |
+
model_status = """知识库构建未成功"""
|
276 |
+
logger.info(model_status)
|
277 |
+
return history + [[None, model_status]]
|
278 |
+
|
279 |
+
|
280 |
+
def refresh_vs_list():
|
281 |
+
return gr.update(choices=get_vs_list()), gr.update(choices=get_vs_list())
|
282 |
+
|
283 |
+
|
284 |
+
def delete_file(vs_id, files_to_delete, chatbot):
|
285 |
+
vs_path = os.path.join(KB_ROOT_PATH, vs_id, "vector_store")
|
286 |
+
content_path = os.path.join(KB_ROOT_PATH, vs_id, "content")
|
287 |
+
docs_path = [os.path.join(content_path, file) for file in files_to_delete]
|
288 |
+
status = local_doc_qa.delete_file_from_vector_store(vs_path=vs_path,
|
289 |
+
filepath=docs_path)
|
290 |
+
if "fail" not in status:
|
291 |
+
for doc_path in docs_path:
|
292 |
+
if os.path.exists(doc_path):
|
293 |
+
os.remove(doc_path)
|
294 |
+
rested_files = local_doc_qa.list_file_from_vector_store(vs_path)
|
295 |
+
if "fail" in status:
|
296 |
+
vs_status = "文件删除失败。"
|
297 |
+
elif len(rested_files) > 0:
|
298 |
+
vs_status = "文件删除成功。"
|
299 |
+
else:
|
300 |
+
vs_status = f"文件删除成功,知识库{vs_id}中无已上传文件,请先上传文件后,再开始提问。"
|
301 |
+
logger.info(",".join(files_to_delete) + vs_status)
|
302 |
+
chatbot = chatbot + [[None, vs_status]]
|
303 |
+
return gr.update(choices=local_doc_qa.list_file_from_vector_store(vs_path), value=[]), chatbot
|
304 |
+
|
305 |
+
|
306 |
+
def delete_vs(vs_id, chatbot):
|
307 |
+
try:
|
308 |
+
shutil.rmtree(os.path.join(KB_ROOT_PATH, vs_id))
|
309 |
+
status = f"成功删除知识库{vs_id}"
|
310 |
+
logger.info(status)
|
311 |
+
chatbot = chatbot + [[None, status]]
|
312 |
+
return gr.update(choices=get_vs_list(), value=get_vs_list()[0]), gr.update(visible=True), gr.update(
|
313 |
+
visible=True), \
|
314 |
+
gr.update(visible=False), chatbot, gr.update(visible=False)
|
315 |
+
except Exception as e:
|
316 |
+
logger.error(e)
|
317 |
+
status = f"删除知识库{vs_id}失败"
|
318 |
+
chatbot = chatbot + [[None, status]]
|
319 |
+
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), \
|
320 |
+
gr.update(visible=True), chatbot, gr.update(visible=True)
|
321 |
+
|
322 |
+
|
323 |
+
block_css = """.importantButton {
|
324 |
+
background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
|
325 |
+
border: none !important;
|
326 |
+
}
|
327 |
+
.importantButton:hover {
|
328 |
+
background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
|
329 |
+
border: none !important;
|
330 |
+
}"""
|
331 |
+
|
332 |
+
webui_title = """
|
333 |
+
# 🎉Welcome Python bot🎉
|
334 |
+
"""
|
335 |
+
# default_vs = get_vs_list()[0] if len(get_vs_list()) > 1 else "为空"
|
336 |
+
init_message = f"""欢迎使用 Python bot!
|
337 |
+
|
338 |
+
在下侧对话框输入问题后,按下Shift+回车即可换行继续输入,按下回车即可获得回复!
|
339 |
+
|
340 |
+
|
341 |
+
若想询问程序报错相关问题,将报错信息最后的报错原因贴上来即可。
|
342 |
+
|
343 |
+
"""
|
344 |
+
|
345 |
+
# 初始化消息
|
346 |
+
model_status = init_model()
|
347 |
+
|
348 |
+
default_theme_args = dict(
|
349 |
+
font=["Source Sans Pro", 'ui-sans-serif', 'system-ui', 'sans-serif'],
|
350 |
+
font_mono=['IBM Plex Mono', 'ui-monospace', 'Consolas', 'monospace'],
|
351 |
+
)
|
352 |
+
|
353 |
+
with gr.Blocks(css=block_css, theme=gr.themes.Default(**default_theme_args)) as demo:
|
354 |
+
vs_path, file_status, model_status = gr.State(
|
355 |
+
os.path.join(KB_ROOT_PATH, get_vs_list()[0], "vector_store") if len(get_vs_list()) > 1 else ""), gr.State(
|
356 |
+
""), gr.State(
|
357 |
+
model_status)
|
358 |
+
gr.Markdown(webui_title)
|
359 |
+
with gr.Tab("对话"):
|
360 |
+
with gr.Row():
|
361 |
+
with gr.Column(scale=10):
|
362 |
+
chatbot = gr.Chatbot([[None, init_message], [None, model_status.value]],
|
363 |
+
elem_id="chat-box",
|
364 |
+
show_label=False).style(height=750)
|
365 |
+
query = gr.Textbox(show_label=False,
|
366 |
+
placeholder="请输入提问内容,按回车进行提交").style(container=False)
|
367 |
+
# with gr.Column(scale=5):
|
368 |
+
mode = gr.Radio(["知识库问答"],
|
369 |
+
show_label=False,
|
370 |
+
value="知识库问答" )
|
371 |
+
# knowledge_set = gr.Accordion("知识库设定", visible=False)
|
372 |
+
# vs_setting = gr.Accordion("配置知识库")
|
373 |
+
# mode.change(fn=change_mode,
|
374 |
+
# inputs=[mode, chatbot],
|
375 |
+
# outputs=[vs_setting, knowledge_set, chatbot])
|
376 |
+
# with vs_setting:
|
377 |
+
# vs_refresh = gr.Button("更新已有知识库选项")
|
378 |
+
# select_vs = gr.Dropdown(get_vs_list(),
|
379 |
+
# label="请选择要加载的知识库",
|
380 |
+
# interactive=True,
|
381 |
+
# value=get_vs_list()[0] if len(get_vs_list()) > 0 else None
|
382 |
+
# )
|
383 |
+
# vs_name = gr.Textbox(label="请输入新建知识库名称,当前知识库命名暂不支持中文",
|
384 |
+
# lines=1,
|
385 |
+
# interactive=True,
|
386 |
+
# visible=True)
|
387 |
+
# vs_add = gr.Button(value="添加至知识库选项", visible=True)
|
388 |
+
# vs_delete = gr.Button("删除本知识库", visible=False)
|
389 |
+
# file2vs = gr.Column(visible=False)
|
390 |
+
# with file2vs:
|
391 |
+
# load_vs = gr.Button("加载知识库")
|
392 |
+
# gr.Markdown("向知识库中添加文件")
|
393 |
+
# sentence_size = gr.Number(value=SENTENCE_SIZE, precision=0,
|
394 |
+
# label="文本入库分句长度限制",
|
395 |
+
# interactive=True, visible=True)
|
396 |
+
# with gr.Tab("上传文件"):
|
397 |
+
# files = gr.File(label="添加文件",
|
398 |
+
# file_types=['.txt', '.md', '.docx', '.pdf', '.png', '.jpg', ".csv"],
|
399 |
+
# file_count="multiple",
|
400 |
+
# show_label=False)
|
401 |
+
# load_file_button = gr.Button("上传文件并加载知识库")
|
402 |
+
# with gr.Tab("上传文件夹"):
|
403 |
+
# folder_files = gr.File(label="添加文件",
|
404 |
+
# file_count="directory",
|
405 |
+
# show_label=False)
|
406 |
+
# load_folder_button = gr.Button("上传文件夹并加载知识库")
|
407 |
+
# with gr.Tab("删除文件"):
|
408 |
+
# files_to_delete = gr.CheckboxGroup(choices=[],
|
409 |
+
# label="请从知识库已有文件中选择要删除的文件",
|
410 |
+
# interactive=True)
|
411 |
+
# delete_file_button = gr.Button("从知识库中删除选中文件")
|
412 |
+
# vs_refresh.click(fn=refresh_vs_list,
|
413 |
+
# inputs=[],
|
414 |
+
# outputs=select_vs)
|
415 |
+
# vs_add.click(fn=add_vs_name,
|
416 |
+
# inputs=[vs_name, chatbot],
|
417 |
+
# outputs=[select_vs, vs_name, vs_add, file2vs, chatbot, vs_delete])
|
418 |
+
# vs_delete.click(fn=delete_vs,
|
419 |
+
# inputs=[select_vs, chatbot],
|
420 |
+
# outputs=[select_vs, vs_name, vs_add, file2vs, chatbot, vs_delete])
|
421 |
+
# select_vs.change(fn=change_vs_name_input,
|
422 |
+
# inputs=[select_vs, chatbot],
|
423 |
+
# outputs=[vs_name, file2vs, vs_path, chatbot])
|
424 |
+
# load_file_button.click(get_vector_store,
|
425 |
+
# show_progress=True,
|
426 |
+
# inputs=[select_vs, files, sentence_size, chatbot],
|
427 |
+
# outputs=[vs_path, files, chatbot, files_to_delete], )
|
428 |
+
# load_folder_button.click(get_vector_store,
|
429 |
+
# show_progress=True,
|
430 |
+
# inputs=[select_vs, folder_files, sentence_size, chatbot, vs_add,
|
431 |
+
# vs_add],
|
432 |
+
# outputs=[vs_path, folder_files, chatbot, files_to_delete], )
|
433 |
+
flag_csv_logger.setup([query, vs_path, chatbot, mode], "student_log")
|
434 |
+
query.submit(get_answer,
|
435 |
+
[query, vs_path, chatbot, mode],
|
436 |
+
[chatbot, query])
|
437 |
+
# delete_file_button.click(delete_file,
|
438 |
+
# show_progress=True,
|
439 |
+
# inputs=[select_vs, files_to_delete, chatbot],
|
440 |
+
# outputs=[files_to_delete, chatbot])
|
441 |
+
# with gr.Tab("知识库测试 Beta"):
|
442 |
+
# with gr.Row():
|
443 |
+
# with gr.Column(scale=10):
|
444 |
+
# chatbot = gr.Chatbot([[None, knowledge_base_test_mode_info]],
|
445 |
+
# elem_id="chat-box",
|
446 |
+
# show_label=False).style(height=750)
|
447 |
+
# query = gr.Textbox(show_label=False,
|
448 |
+
# placeholder="请输入提问内容,按回车进行提交").style(container=False)
|
449 |
+
# with gr.Column(scale=5):
|
450 |
+
# mode = gr.Radio(["知识库测试"], # "知识库问答",
|
451 |
+
# label="请选择使用模式",
|
452 |
+
# value="知识库测试",
|
453 |
+
# visible=False)
|
454 |
+
# knowledge_set = gr.Accordion("知识库设定", visible=True)
|
455 |
+
# vs_setting = gr.Accordion("配置知识库", visible=True)
|
456 |
+
# mode.change(fn=change_mode,
|
457 |
+
# inputs=[mode, chatbot],
|
458 |
+
# outputs=[vs_setting, knowledge_set, chatbot])
|
459 |
+
# with knowledge_set:
|
460 |
+
# score_threshold = gr.Number(value=VECTOR_SEARCH_SCORE_THRESHOLD,
|
461 |
+
# label="知识相关度 Score 阈值,分值越低匹配度越高",
|
462 |
+
# precision=0,
|
463 |
+
# interactive=True)
|
464 |
+
# vector_search_top_k = gr.Number(value=VECTOR_SEARCH_TOP_K, precision=0,
|
465 |
+
# label="获取知识库内容条数", interactive=True)
|
466 |
+
# chunk_conent = gr.Checkbox(value=False,
|
467 |
+
# label="是否启用上下文关联",
|
468 |
+
# interactive=True)
|
469 |
+
# chunk_sizes = gr.Number(value=CHUNK_SIZE, precision=0,
|
470 |
+
# label="匹配单段内容的连接上下文后最大长度",
|
471 |
+
# interactive=True, visible=False)
|
472 |
+
# chunk_conent.change(fn=change_chunk_conent,
|
473 |
+
# inputs=[chunk_conent, gr.Textbox(value="chunk_conent", visible=False), chatbot],
|
474 |
+
# outputs=[chunk_sizes, chatbot])
|
475 |
+
# with vs_setting:
|
476 |
+
# vs_refresh = gr.Button("更新已有知识库选项")
|
477 |
+
# select_vs_test = gr.Dropdown(get_vs_list(),
|
478 |
+
# label="请选择要加载的知识库",
|
479 |
+
# interactive=True,
|
480 |
+
# value=get_vs_list()[0] if len(get_vs_list()) > 0 else None)
|
481 |
+
# vs_name = gr.Textbox(label="请输入新建知识库名称,当前知识库命名暂不支持中文",
|
482 |
+
# lines=1,
|
483 |
+
# interactive=True,
|
484 |
+
# visible=True)
|
485 |
+
# vs_add = gr.Button(value="添加至知识库选项", visible=True)
|
486 |
+
# file2vs = gr.Column(visible=False)
|
487 |
+
# with file2vs:
|
488 |
+
# # load_vs = gr.Button("加载知识库")
|
489 |
+
# gr.Markdown("向知识库中添加单条内容或文件")
|
490 |
+
# sentence_size = gr.Number(value=SENTENCE_SIZE, precision=0,
|
491 |
+
# label="文本入库分句长度限制",
|
492 |
+
# interactive=True, visible=True)
|
493 |
+
# with gr.Tab("上传文件"):
|
494 |
+
# files = gr.File(label="添加文件",
|
495 |
+
# file_types=['.txt', '.md', '.docx', '.pdf'],
|
496 |
+
# file_count="multiple",
|
497 |
+
# show_label=False
|
498 |
+
# )
|
499 |
+
# load_file_button = gr.Button("上传文件并加载知识库")
|
500 |
+
# with gr.Tab("上传文件夹"):
|
501 |
+
# folder_files = gr.File(label="添加文件",
|
502 |
+
# # file_types=['.txt', '.md', '.docx', '.pdf'],
|
503 |
+
# file_count="directory",
|
504 |
+
# show_label=False)
|
505 |
+
# load_folder_button = gr.Button("上传文件夹并加载知识库")
|
506 |
+
# with gr.Tab("添加单条内容"):
|
507 |
+
# one_title = gr.Textbox(label="标题", placeholder="请输入要添加单条段落的标题", lines=1)
|
508 |
+
# one_conent = gr.Textbox(label="内容", placeholder="请输入要添加单条段落的内容", lines=5)
|
509 |
+
# one_content_segmentation = gr.Checkbox(value=True, label="禁止内容分句入库",
|
510 |
+
# interactive=True)
|
511 |
+
# load_conent_button = gr.Button("添加内容并加载知识库")
|
512 |
+
# # 将上传的文件保存到content文件夹下,并更新下拉框
|
513 |
+
# vs_refresh.click(fn=refresh_vs_list,
|
514 |
+
# inputs=[],
|
515 |
+
# outputs=[select_vs, select_vs_test])
|
516 |
+
# vs_add.click(fn=add_vs_name,
|
517 |
+
# inputs=[vs_name, chatbot],
|
518 |
+
# outputs=[select_vs_test, vs_name, vs_add, file2vs, chatbot])
|
519 |
+
# select_vs_test.change(fn=change_vs_name_input,
|
520 |
+
# inputs=[select_vs_test, chatbot],
|
521 |
+
# outputs=[vs_name, vs_add, file2vs, vs_path, chatbot])
|
522 |
+
# load_file_button.click(get_vector_store,
|
523 |
+
# show_progress=True,
|
524 |
+
# inputs=[select_vs_test, files, sentence_size, chatbot, vs_add, vs_add],
|
525 |
+
# outputs=[vs_path, files, chatbot], )
|
526 |
+
# load_folder_button.click(get_vector_store,
|
527 |
+
# show_progress=True,
|
528 |
+
# inputs=[select_vs_test, folder_files, sentence_size, chatbot, vs_add,
|
529 |
+
# vs_add],
|
530 |
+
# outputs=[vs_path, folder_files, chatbot], )
|
531 |
+
# load_conent_button.click(get_vector_store,
|
532 |
+
# show_progress=True,
|
533 |
+
# inputs=[select_vs_test, one_title, sentence_size, chatbot,
|
534 |
+
# one_conent, one_content_segmentation],
|
535 |
+
# outputs=[vs_path, files, chatbot], )
|
536 |
+
# flag_csv_logger.setup([query, vs_path, chatbot, mode], "flagged")
|
537 |
+
# query.submit(get_answer,
|
538 |
+
# [query, vs_path, chatbot, mode, score_threshold, vector_search_top_k, chunk_conent,
|
539 |
+
# chunk_sizes],
|
540 |
+
# [chatbot, query])
|
541 |
+
# with gr.Tab("模型配置"):
|
542 |
+
# llm_model = gr.Radio(llm_model_dict_list,
|
543 |
+
# label="LLM 模型",
|
544 |
+
# value=LLM_MODEL,
|
545 |
+
# interactive=True)
|
546 |
+
# no_remote_model = gr.Checkbox(shared.LoaderCheckPoint.no_remote_model,
|
547 |
+
# label="加载本地模型",
|
548 |
+
# interactive=True)
|
549 |
+
|
550 |
+
# llm_history_len = gr.Slider(0, 10,
|
551 |
+
# value=LLM_HISTORY_LEN,
|
552 |
+
# step=1,
|
553 |
+
# label="LLM 对话轮数",
|
554 |
+
# interactive=True)
|
555 |
+
# use_ptuning_v2 = gr.Checkbox(USE_PTUNING_V2,
|
556 |
+
# label="使用p-tuning-v2微调过的模型",
|
557 |
+
# interactive=True)
|
558 |
+
# use_lora = gr.Checkbox(USE_LORA,
|
559 |
+
# label="使用lora微调的权重",
|
560 |
+
# interactive=True)
|
561 |
+
# embedding_model = gr.Radio(embedding_model_dict_list,
|
562 |
+
# label="Embedding 模型",
|
563 |
+
# value=EMBEDDING_MODEL,
|
564 |
+
# interactive=True)
|
565 |
+
# top_k = gr.Slider(1, 20, value=VECTOR_SEARCH_TOP_K, step=1,
|
566 |
+
# label="向量匹配 top k", interactive=True)
|
567 |
+
# load_model_button = gr.Button("重新加载模型")
|
568 |
+
# load_model_button.click(reinit_model, show_progress=True,
|
569 |
+
# inputs=[llm_model, embedding_model, llm_history_len, no_remote_model, use_ptuning_v2,
|
570 |
+
# use_lora, top_k, chatbot], outputs=chatbot)
|
571 |
+
# load_knowlege_button = gr.Button("重新构建知识库")
|
572 |
+
# load_knowlege_button.click(reinit_vector_store, show_progress=True,
|
573 |
+
# inputs=[select_vs, chatbot], outputs=chatbot)
|
574 |
+
|
575 |
+
def gradio_callback(inputs, outputs):
|
576 |
+
# 获取用户输入的用户名
|
577 |
+
username = inputs['username']
|
578 |
+
# 在这里处理用户名,例如打印出来
|
579 |
+
print("Current username:", username)
|
580 |
+
|
581 |
+
def student():
|
582 |
+
hy1_path = "/home/wsy/Langchain-chat/Langchain-Chatchat/stuendt/hy_student1.xlsx"
|
583 |
+
hy2_path = "/home/wsy/Langchain-chat/Langchain-Chatchat/stuendt/hy_student2.xlsx"
|
584 |
+
lhj_path = "/home/wsy/Langchain-chat/Langchain-Chatchat/stuendt/lhj_student.xlsx"
|
585 |
+
ygc_path = "/home/wsy/Langchain-chat/Langchain-Chatchat/stuendt/ygc_student.xlsx"
|
586 |
+
yl_path = "/home/wsy/Langchain-chat/Langchain-Chatchat/stuendt/yl_student.xlsx"
|
587 |
+
zsg1_path = "/home/wsy/Langchain-chat/Langchain-Chatchat/stuendt/zsg_student1.xlsx"
|
588 |
+
zsg2_path = "/home/wsy/Langchain-chat/Langchain-Chatchat/stuendt/zsg_student2.xlsx"
|
589 |
+
|
590 |
+
hy1_student_data = pd.DataFrame(pd.read_excel(hy1_path))
|
591 |
+
hy2_student_data = pd.DataFrame(pd.read_excel(hy2_path))
|
592 |
+
lhj_student_data = pd.DataFrame(pd.read_excel(lhj_path))
|
593 |
+
ygc_student_data = pd.DataFrame(pd.read_excel(ygc_path))
|
594 |
+
yl_student_data = pd.DataFrame(pd.read_excel(yl_path))
|
595 |
+
zsg1_student_data = pd.DataFrame(pd.read_excel(zsg1_path))
|
596 |
+
zsg2_student_data = pd.DataFrame(pd.read_excel(zsg2_path))
|
597 |
+
|
598 |
+
hy1_student = list(hy1_student_data[['姓名', '学号']].apply(tuple, axis=1))
|
599 |
+
hy2_student = list(hy2_student_data[['姓名', '学号']].apply(tuple, axis=1))
|
600 |
+
lhj_student = list(lhj_student_data[['姓名', '学号']].apply(tuple, axis=1))
|
601 |
+
ygc_student = list(ygc_student_data[['姓名', '学号']].apply(tuple, axis=1))
|
602 |
+
yl_student = list(yl_student_data[['姓名', '学号']].apply(tuple, axis=1))
|
603 |
+
zsg1_student = list(zsg1_student_data[['姓名', '学号']].apply(tuple, axis=1))
|
604 |
+
zsg2_student = list(zsg2_student_data[['姓名', '学号']].apply(tuple, axis=1))
|
605 |
+
|
606 |
+
student = hy1_student + hy2_student + lhj_student + ygc_student + yl_student + zsg1_student + zsg2_student
|
607 |
+
for i in range(len(student)):
|
608 |
+
password = student[i][1]
|
609 |
+
student[i] = (student[i][0], str(password))
|
610 |
+
|
611 |
+
return student
|
612 |
+
|
613 |
+
def login(x, y):
|
614 |
+
users = student()
|
615 |
+
for username, password in users:
|
616 |
+
if username == x and password == y:
|
617 |
+
global user
|
618 |
+
user = username
|
619 |
+
return x, y
|
620 |
+
|
621 |
+
# demo.load(
|
622 |
+
# fn=refresh_vs_list,
|
623 |
+
# inputs=None,
|
624 |
+
# outputs=[select_vs],
|
625 |
+
# queue=True,
|
626 |
+
# show_progress=False,
|
627 |
+
# )
|
628 |
+
|
629 |
+
(demo
|
630 |
+
.queue(concurrency_count=30) #test
|
631 |
+
.launch(server_name='0.0.0.0',
|
632 |
+
server_port=7860,
|
633 |
+
show_api=False,
|
634 |
+
share=False,
|
635 |
+
inbrowser=False,
|
636 |
+
auth=login)
|
637 |
+
)
|
638 |
+
|
639 |
+
|
640 |
+
|