Spaces:
Runtime error
Runtime error
JohnSmith9982
commited on
Commit
•
8971a40
1
Parent(s):
7a87049
Upload 58 files
Browse files- ChuanhuChatbot.py +4 -9
- modules/__pycache__/config.cpython-311.pyc +0 -0
- modules/__pycache__/config.cpython-39.pyc +0 -0
- modules/__pycache__/index_func.cpython-311.pyc +0 -0
- modules/__pycache__/index_func.cpython-39.pyc +0 -0
- modules/__pycache__/overwrites.cpython-311.pyc +0 -0
- modules/__pycache__/overwrites.cpython-39.pyc +0 -0
- modules/__pycache__/presets.cpython-311.pyc +0 -0
- modules/__pycache__/presets.cpython-39.pyc +0 -0
- modules/__pycache__/shared.cpython-311.pyc +0 -0
- modules/__pycache__/utils.cpython-311.pyc +0 -0
- modules/__pycache__/utils.cpython-39.pyc +0 -0
- modules/config.py +6 -14
- modules/index_func.py +6 -6
- modules/models/ChuanhuAgent.py +11 -2
- modules/models/__pycache__/base_model.cpython-311.pyc +0 -0
- modules/models/__pycache__/base_model.cpython-39.pyc +0 -0
- modules/models/__pycache__/models.cpython-311.pyc +0 -0
- modules/models/__pycache__/models.cpython-39.pyc +0 -0
- modules/models/base_model.py +11 -7
- modules/models/models.py +2 -2
- modules/overwrites.py +19 -17
- modules/presets.py +11 -12
- modules/shared.py +3 -0
- modules/utils.py +72 -11
ChuanhuChatbot.py
CHANGED
@@ -12,6 +12,7 @@ from modules.presets import *
|
|
12 |
from modules.overwrites import *
|
13 |
from modules.models.models import get_model
|
14 |
|
|
|
15 |
|
16 |
gr.Chatbot._postprocess_chat_messages = postprocess_chat_messages
|
17 |
gr.Chatbot.postprocess = postprocess
|
@@ -88,7 +89,6 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|
88 |
with gr.Row():
|
89 |
single_turn_checkbox = gr.Checkbox(label=i18n("单轮对话"), value=False)
|
90 |
use_websearch_checkbox = gr.Checkbox(label=i18n("使用在线搜索"), value=False)
|
91 |
-
# render_latex_checkbox = gr.Checkbox(label=i18n("渲染LaTeX公式"), value=render_latex, interactive=True, elem_id="render_latex_checkbox")
|
92 |
language_select_dropdown = gr.Dropdown(
|
93 |
label=i18n("选择回复语言(针对搜索&索引功能)"),
|
94 |
choices=REPLY_LANGUAGES,
|
@@ -161,7 +161,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|
161 |
|
162 |
with gr.Tab(label=i18n("高级")):
|
163 |
gr.Markdown(i18n("# ⚠️ 务必谨慎更改 ⚠️\n\n如果无法使用请恢复默认设置"))
|
164 |
-
gr.HTML(
|
165 |
use_streaming_checkbox = gr.Checkbox(
|
166 |
label=i18n("实时传输回答"), value=True, visible=ENABLE_STREAMING_OPTION
|
167 |
)
|
@@ -265,7 +265,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|
265 |
default_btn = gr.Button(i18n("🔙 恢复默认设置"))
|
266 |
|
267 |
gr.Markdown(CHUANHU_DESCRIPTION, elem_id="description")
|
268 |
-
gr.HTML(
|
269 |
|
270 |
# https://github.com/gradio-app/gradio/pull/3296
|
271 |
def create_greeting(request: gr.Request):
|
@@ -469,10 +469,5 @@ if __name__ == "__main__":
|
|
469 |
reload_javascript()
|
470 |
demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
|
471 |
blocked_paths=["config.json"],
|
472 |
-
|
473 |
-
favicon_path="./assets/favicon.ico",
|
474 |
-
inbrowser=not dockerflag, # 禁止在docker下开启inbrowser
|
475 |
)
|
476 |
-
# demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=7860, share=False) # 可自定义端口
|
477 |
-
# demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=7860,auth=("在这里填写用户名", "在这里填写密码")) # 可设置用户名与密码
|
478 |
-
# demo.queue(concurrency_count=CONCURRENT_COUNT).launch(auth=("在这里填写用户名", "在这里填写密码")) # 适合Nginx反向代理
|
|
|
12 |
from modules.overwrites import *
|
13 |
from modules.models.models import get_model
|
14 |
|
15 |
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
16 |
|
17 |
gr.Chatbot._postprocess_chat_messages = postprocess_chat_messages
|
18 |
gr.Chatbot.postprocess = postprocess
|
|
|
89 |
with gr.Row():
|
90 |
single_turn_checkbox = gr.Checkbox(label=i18n("单轮对话"), value=False)
|
91 |
use_websearch_checkbox = gr.Checkbox(label=i18n("使用在线搜索"), value=False)
|
|
|
92 |
language_select_dropdown = gr.Dropdown(
|
93 |
label=i18n("选择回复语言(针对搜索&索引功能)"),
|
94 |
choices=REPLY_LANGUAGES,
|
|
|
161 |
|
162 |
with gr.Tab(label=i18n("高级")):
|
163 |
gr.Markdown(i18n("# ⚠️ 务必谨慎更改 ⚠️\n\n如果无法使用请恢复默认设置"))
|
164 |
+
gr.HTML(get_html("appearance_switcher.html").format(label=i18n("切换亮暗色主题")), elem_classes="insert_block")
|
165 |
use_streaming_checkbox = gr.Checkbox(
|
166 |
label=i18n("实时传输回答"), value=True, visible=ENABLE_STREAMING_OPTION
|
167 |
)
|
|
|
265 |
default_btn = gr.Button(i18n("🔙 恢复默认设置"))
|
266 |
|
267 |
gr.Markdown(CHUANHU_DESCRIPTION, elem_id="description")
|
268 |
+
gr.HTML(get_html("footer.html").format(versions=versions_html()), elem_id="footer")
|
269 |
|
270 |
# https://github.com/gradio-app/gradio/pull/3296
|
271 |
def create_greeting(request: gr.Request):
|
|
|
469 |
reload_javascript()
|
470 |
demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
|
471 |
blocked_paths=["config.json"],
|
472 |
+
favicon_path="./assets/favicon.ico"
|
|
|
|
|
473 |
)
|
|
|
|
|
|
modules/__pycache__/config.cpython-311.pyc
CHANGED
Binary files a/modules/__pycache__/config.cpython-311.pyc and b/modules/__pycache__/config.cpython-311.pyc differ
|
|
modules/__pycache__/config.cpython-39.pyc
CHANGED
Binary files a/modules/__pycache__/config.cpython-39.pyc and b/modules/__pycache__/config.cpython-39.pyc differ
|
|
modules/__pycache__/index_func.cpython-311.pyc
CHANGED
Binary files a/modules/__pycache__/index_func.cpython-311.pyc and b/modules/__pycache__/index_func.cpython-311.pyc differ
|
|
modules/__pycache__/index_func.cpython-39.pyc
CHANGED
Binary files a/modules/__pycache__/index_func.cpython-39.pyc and b/modules/__pycache__/index_func.cpython-39.pyc differ
|
|
modules/__pycache__/overwrites.cpython-311.pyc
CHANGED
Binary files a/modules/__pycache__/overwrites.cpython-311.pyc and b/modules/__pycache__/overwrites.cpython-311.pyc differ
|
|
modules/__pycache__/overwrites.cpython-39.pyc
CHANGED
Binary files a/modules/__pycache__/overwrites.cpython-39.pyc and b/modules/__pycache__/overwrites.cpython-39.pyc differ
|
|
modules/__pycache__/presets.cpython-311.pyc
CHANGED
Binary files a/modules/__pycache__/presets.cpython-311.pyc and b/modules/__pycache__/presets.cpython-311.pyc differ
|
|
modules/__pycache__/presets.cpython-39.pyc
CHANGED
Binary files a/modules/__pycache__/presets.cpython-39.pyc and b/modules/__pycache__/presets.cpython-39.pyc differ
|
|
modules/__pycache__/shared.cpython-311.pyc
CHANGED
Binary files a/modules/__pycache__/shared.cpython-311.pyc and b/modules/__pycache__/shared.cpython-311.pyc differ
|
|
modules/__pycache__/utils.cpython-311.pyc
CHANGED
Binary files a/modules/__pycache__/utils.cpython-311.pyc and b/modules/__pycache__/utils.cpython-311.pyc differ
|
|
modules/__pycache__/utils.cpython-39.pyc
CHANGED
Binary files a/modules/__pycache__/utils.cpython-39.pyc and b/modules/__pycache__/utils.cpython-39.pyc differ
|
|
modules/config.py
CHANGED
@@ -18,7 +18,6 @@ __all__ = [
|
|
18 |
"log_level",
|
19 |
"advance_docs",
|
20 |
"update_doc_config",
|
21 |
-
"render_latex",
|
22 |
"usage_limit",
|
23 |
"multi_api_key",
|
24 |
"server_name",
|
@@ -43,11 +42,11 @@ hide_history_when_not_logged_in = config.get("hide_history_when_not_logged_in",
|
|
43 |
|
44 |
if os.path.exists("api_key.txt"):
|
45 |
logging.info("检测到api_key.txt文件,正在进行迁移...")
|
46 |
-
with open("api_key.txt", "r") as f:
|
47 |
config["openai_api_key"] = f.read().strip()
|
48 |
os.rename("api_key.txt", "api_key(deprecated).txt")
|
49 |
with open("config.json", "w", encoding='utf-8') as f:
|
50 |
-
json.dump(config, f, indent=4)
|
51 |
|
52 |
if os.path.exists("auth.json"):
|
53 |
logging.info("检测到auth.json文件,正在进行迁移...")
|
@@ -63,7 +62,7 @@ if os.path.exists("auth.json"):
|
|
63 |
config["users"] = auth_list
|
64 |
os.rename("auth.json", "auth(deprecated).json")
|
65 |
with open("config.json", "w", encoding='utf-8') as f:
|
66 |
-
json.dump(config, f, indent=4)
|
67 |
|
68 |
## 处理docker if we are running in Docker
|
69 |
dockerflag = config.get("dockerflag", False)
|
@@ -82,12 +81,6 @@ os.environ["MINIMAX_API_KEY"] = minimax_api_key
|
|
82 |
minimax_group_id = config.get("minimax_group_id", "")
|
83 |
os.environ["MINIMAX_GROUP_ID"] = minimax_group_id
|
84 |
|
85 |
-
render_latex = config.get("render_latex", True)
|
86 |
-
|
87 |
-
if render_latex:
|
88 |
-
os.environ["RENDER_LATEX"] = "yes"
|
89 |
-
else:
|
90 |
-
os.environ["RENDER_LATEX"] = "no"
|
91 |
|
92 |
usage_limit = os.environ.get("USAGE_LIMIT", config.get("usage_limit", 120))
|
93 |
|
@@ -109,10 +102,9 @@ if api_host is not None:
|
|
109 |
shared.state.set_api_host(api_host)
|
110 |
|
111 |
default_chuanhu_assistant_model = config.get("default_chuanhu_assistant_model", "gpt-3.5-turbo")
|
112 |
-
|
113 |
-
|
114 |
-
os.environ[
|
115 |
-
os.environ["SERPAPI_API_KEY"] = config.get("SERPAPI_API_KEY", "")
|
116 |
|
117 |
@contextmanager
|
118 |
def retrieve_openai_api(api_key = None):
|
|
|
18 |
"log_level",
|
19 |
"advance_docs",
|
20 |
"update_doc_config",
|
|
|
21 |
"usage_limit",
|
22 |
"multi_api_key",
|
23 |
"server_name",
|
|
|
42 |
|
43 |
if os.path.exists("api_key.txt"):
|
44 |
logging.info("检测到api_key.txt文件,正在进行迁移...")
|
45 |
+
with open("api_key.txt", "r", encoding="utf-8") as f:
|
46 |
config["openai_api_key"] = f.read().strip()
|
47 |
os.rename("api_key.txt", "api_key(deprecated).txt")
|
48 |
with open("config.json", "w", encoding='utf-8') as f:
|
49 |
+
json.dump(config, f, indent=4, ensure_ascii=False)
|
50 |
|
51 |
if os.path.exists("auth.json"):
|
52 |
logging.info("检测到auth.json文件,正在进行迁移...")
|
|
|
62 |
config["users"] = auth_list
|
63 |
os.rename("auth.json", "auth(deprecated).json")
|
64 |
with open("config.json", "w", encoding='utf-8') as f:
|
65 |
+
json.dump(config, f, indent=4, ensure_ascii=False)
|
66 |
|
67 |
## 处理docker if we are running in Docker
|
68 |
dockerflag = config.get("dockerflag", False)
|
|
|
81 |
minimax_group_id = config.get("minimax_group_id", "")
|
82 |
os.environ["MINIMAX_GROUP_ID"] = minimax_group_id
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
usage_limit = os.environ.get("USAGE_LIMIT", config.get("usage_limit", 120))
|
86 |
|
|
|
102 |
shared.state.set_api_host(api_host)
|
103 |
|
104 |
default_chuanhu_assistant_model = config.get("default_chuanhu_assistant_model", "gpt-3.5-turbo")
|
105 |
+
for x in ["GOOGLE_CSE_ID", "GOOGLE_API_KEY", "WOLFRAM_ALPHA_APPID", "SERPAPI_API_KEY"]:
|
106 |
+
if config.get(x, None) is not None:
|
107 |
+
os.environ[x] = config[x]
|
|
|
108 |
|
109 |
@contextmanager
|
110 |
def retrieve_openai_api(api_key = None):
|
modules/index_func.py
CHANGED
@@ -16,7 +16,7 @@ def get_index_name(file_src):
|
|
16 |
|
17 |
md5_hash = hashlib.md5()
|
18 |
for file_path in file_paths:
|
19 |
-
with open(file_path, "rb") as f:
|
20 |
while chunk := f.read(8192):
|
21 |
md5_hash.update(chunk)
|
22 |
|
@@ -47,11 +47,11 @@ def get_documents(file_src):
|
|
47 |
pdftext = parse_pdf(filepath, two_column).text
|
48 |
except:
|
49 |
pdftext = ""
|
50 |
-
with open(filepath, "rb") as pdfFileObj:
|
51 |
pdfReader = PyPDF2.PdfReader(pdfFileObj)
|
52 |
for page in tqdm(pdfReader.pages):
|
53 |
pdftext += page.extract_text()
|
54 |
-
texts = Document(page_content=pdftext, metadata={"source": filepath})
|
55 |
elif file_type == ".docx":
|
56 |
logging.debug("Loading Word...")
|
57 |
from langchain.document_loaders import UnstructuredWordDocumentLoader
|
@@ -70,9 +70,9 @@ def get_documents(file_src):
|
|
70 |
elif file_type == ".xlsx":
|
71 |
logging.debug("Loading Excel...")
|
72 |
text_list = excel_to_string(filepath)
|
|
|
73 |
for elem in text_list:
|
74 |
-
|
75 |
-
continue
|
76 |
else:
|
77 |
logging.debug("Loading text file...")
|
78 |
from langchain.document_loaders import TextLoader
|
@@ -83,7 +83,7 @@ def get_documents(file_src):
|
|
83 |
logging.error(f"Error loading file: {filename}")
|
84 |
traceback.print_exc()
|
85 |
|
86 |
-
texts = text_splitter.split_documents(
|
87 |
documents.extend(texts)
|
88 |
logging.debug("Documents loaded.")
|
89 |
return documents
|
|
|
16 |
|
17 |
md5_hash = hashlib.md5()
|
18 |
for file_path in file_paths:
|
19 |
+
with open(file_path, "rb", encoding="utf-8") as f:
|
20 |
while chunk := f.read(8192):
|
21 |
md5_hash.update(chunk)
|
22 |
|
|
|
47 |
pdftext = parse_pdf(filepath, two_column).text
|
48 |
except:
|
49 |
pdftext = ""
|
50 |
+
with open(filepath, "rb", encoding="utf-8") as pdfFileObj:
|
51 |
pdfReader = PyPDF2.PdfReader(pdfFileObj)
|
52 |
for page in tqdm(pdfReader.pages):
|
53 |
pdftext += page.extract_text()
|
54 |
+
texts = [Document(page_content=pdftext, metadata={"source": filepath})]
|
55 |
elif file_type == ".docx":
|
56 |
logging.debug("Loading Word...")
|
57 |
from langchain.document_loaders import UnstructuredWordDocumentLoader
|
|
|
70 |
elif file_type == ".xlsx":
|
71 |
logging.debug("Loading Excel...")
|
72 |
text_list = excel_to_string(filepath)
|
73 |
+
texts = []
|
74 |
for elem in text_list:
|
75 |
+
texts.append(Document(page_content=elem, metadata={"source": filepath}))
|
|
|
76 |
else:
|
77 |
logging.debug("Loading text file...")
|
78 |
from langchain.document_loaders import TextLoader
|
|
|
83 |
logging.error(f"Error loading file: {filename}")
|
84 |
traceback.print_exc()
|
85 |
|
86 |
+
texts = text_splitter.split_documents(texts)
|
87 |
documents.extend(texts)
|
88 |
logging.debug("Documents loaded.")
|
89 |
return documents
|
modules/models/ChuanhuAgent.py
CHANGED
@@ -14,7 +14,8 @@ from langchain.tools import BaseTool, StructuredTool, Tool, tool
|
|
14 |
from langchain.callbacks.stdout import StdOutCallbackHandler
|
15 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
16 |
from langchain.callbacks.manager import BaseCallbackManager
|
17 |
-
from
|
|
|
18 |
|
19 |
from typing import Any, Dict, List, Optional, Union
|
20 |
|
@@ -93,7 +94,15 @@ class ChuanhuAgent_Client(BaseLLMModel):
|
|
93 |
)
|
94 |
|
95 |
def google_search_simple(self, query):
|
96 |
-
results = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
return str(results)
|
98 |
|
99 |
def handle_file_upload(self, files, chatbot, language):
|
|
|
14 |
from langchain.callbacks.stdout import StdOutCallbackHandler
|
15 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
16 |
from langchain.callbacks.manager import BaseCallbackManager
|
17 |
+
from duckduckgo_search import DDGS
|
18 |
+
from itertools import islice
|
19 |
|
20 |
from typing import Any, Dict, List, Optional, Union
|
21 |
|
|
|
94 |
)
|
95 |
|
96 |
def google_search_simple(self, query):
|
97 |
+
results = []
|
98 |
+
with DDGS() as ddgs:
|
99 |
+
ddgs_gen = ddgs.text("notes from a dead house", backend="lite")
|
100 |
+
for r in islice(ddgs_gen, 10):
|
101 |
+
results.append({
|
102 |
+
"title": r["title"],
|
103 |
+
"link": r["href"],
|
104 |
+
"snippet": r["body"]
|
105 |
+
})
|
106 |
return str(results)
|
107 |
|
108 |
def handle_file_upload(self, files, chatbot, language):
|
modules/models/__pycache__/base_model.cpython-311.pyc
CHANGED
Binary files a/modules/models/__pycache__/base_model.cpython-311.pyc and b/modules/models/__pycache__/base_model.cpython-311.pyc differ
|
|
modules/models/__pycache__/base_model.cpython-39.pyc
CHANGED
Binary files a/modules/models/__pycache__/base_model.cpython-39.pyc and b/modules/models/__pycache__/base_model.cpython-39.pyc differ
|
|
modules/models/__pycache__/models.cpython-311.pyc
CHANGED
Binary files a/modules/models/__pycache__/models.cpython-311.pyc and b/modules/models/__pycache__/models.cpython-311.pyc differ
|
|
modules/models/__pycache__/models.cpython-39.pyc
CHANGED
Binary files a/modules/models/__pycache__/models.cpython-39.pyc and b/modules/models/__pycache__/models.cpython-39.pyc differ
|
|
modules/models/base_model.py
CHANGED
@@ -13,7 +13,8 @@ import pathlib
|
|
13 |
|
14 |
from tqdm import tqdm
|
15 |
import colorama
|
16 |
-
from
|
|
|
17 |
import asyncio
|
18 |
import aiohttp
|
19 |
from enum import Enum
|
@@ -335,16 +336,19 @@ class BaseLLMModel:
|
|
335 |
.replace("{reply_language}", reply_language)
|
336 |
)
|
337 |
elif use_websearch:
|
338 |
-
|
339 |
-
|
|
|
|
|
|
|
340 |
reference_results = []
|
341 |
for idx, result in enumerate(search_results):
|
342 |
logging.debug(f"搜索结果{idx + 1}:{result}")
|
343 |
-
domain_name = urllib3.util.parse_url(result
|
344 |
-
reference_results.append([result
|
345 |
display_append.append(
|
346 |
# f"{idx+1}. [{domain_name}]({result['href']})\n"
|
347 |
-
f"<li><a href=\"{result
|
348 |
)
|
349 |
reference_results = add_source_numbers(reference_results)
|
350 |
display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
|
@@ -637,7 +641,7 @@ class BaseLLMModel:
|
|
637 |
history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
|
638 |
else:
|
639 |
history_file_path = filename
|
640 |
-
with open(history_file_path, "r") as f:
|
641 |
json_s = json.load(f)
|
642 |
try:
|
643 |
if type(json_s["history"][0]) == str:
|
|
|
13 |
|
14 |
from tqdm import tqdm
|
15 |
import colorama
|
16 |
+
from duckduckgo_search import DDGS
|
17 |
+
from itertools import islice
|
18 |
import asyncio
|
19 |
import aiohttp
|
20 |
from enum import Enum
|
|
|
336 |
.replace("{reply_language}", reply_language)
|
337 |
)
|
338 |
elif use_websearch:
|
339 |
+
search_results = []
|
340 |
+
with DDGS() as ddgs:
|
341 |
+
ddgs_gen = ddgs.text(real_inputs, backend="lite")
|
342 |
+
for r in islice(ddgs_gen, 10):
|
343 |
+
search_results.append(r)
|
344 |
reference_results = []
|
345 |
for idx, result in enumerate(search_results):
|
346 |
logging.debug(f"搜索结果{idx + 1}:{result}")
|
347 |
+
domain_name = urllib3.util.parse_url(result['href']).host
|
348 |
+
reference_results.append([result['body'], result['href']])
|
349 |
display_append.append(
|
350 |
# f"{idx+1}. [{domain_name}]({result['href']})\n"
|
351 |
+
f"<li><a href=\"{result['href']}\" target=\"_blank\">{result['title']}</a></li>\n"
|
352 |
)
|
353 |
reference_results = add_source_numbers(reference_results)
|
354 |
display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
|
|
|
641 |
history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
|
642 |
else:
|
643 |
history_file_path = filename
|
644 |
+
with open(history_file_path, "r", encoding="utf-8") as f:
|
645 |
json_s = json.load(f)
|
646 |
try:
|
647 |
if type(json_s["history"][0]) == str:
|
modules/models/models.py
CHANGED
@@ -338,7 +338,7 @@ class LLaMA_Client(BaseLLMModel):
|
|
338 |
pipeline_args = InferencerArguments(
|
339 |
local_rank=0, random_seed=1, deepspeed='configs/ds_config_chatbot.json', mixed_precision='bf16')
|
340 |
|
341 |
-
with open(pipeline_args.deepspeed, "r") as f:
|
342 |
ds_config = json.load(f)
|
343 |
LLAMA_MODEL = AutoModel.get_model(
|
344 |
model_args,
|
@@ -623,7 +623,7 @@ def get_model(
|
|
623 |
|
624 |
|
625 |
if __name__ == "__main__":
|
626 |
-
with open("config.json", "r") as f:
|
627 |
openai_api_key = cjson.load(f)["openai_api_key"]
|
628 |
# set logging level to debug
|
629 |
logging.basicConfig(level=logging.DEBUG)
|
|
|
338 |
pipeline_args = InferencerArguments(
|
339 |
local_rank=0, random_seed=1, deepspeed='configs/ds_config_chatbot.json', mixed_precision='bf16')
|
340 |
|
341 |
+
with open(pipeline_args.deepspeed, "r", encoding="utf-8") as f:
|
342 |
ds_config = json.load(f)
|
343 |
LLAMA_MODEL = AutoModel.get_model(
|
344 |
model_args,
|
|
|
623 |
|
624 |
|
625 |
if __name__ == "__main__":
|
626 |
+
with open("config.json", "r", encoding="utf-8") as f:
|
627 |
openai_api_key = cjson.load(f)["openai_api_key"]
|
628 |
# set logging level to debug
|
629 |
logging.basicConfig(level=logging.DEBUG)
|
modules/overwrites.py
CHANGED
@@ -2,12 +2,12 @@ from __future__ import annotations
|
|
2 |
import logging
|
3 |
|
4 |
from typing import List, Tuple
|
5 |
-
import mdtex2html
|
6 |
from gradio_client import utils as client_utils
|
|
|
|
|
7 |
|
8 |
from modules.presets import *
|
9 |
from modules.index_func import *
|
10 |
-
from modules.config import render_latex
|
11 |
|
12 |
|
13 |
def postprocess(
|
@@ -40,14 +40,18 @@ def postprocess(
|
|
40 |
return processed_messages
|
41 |
|
42 |
def postprocess_chat_messages(
|
43 |
-
self, chat_message: str |
|
44 |
-
) -> str |
|
45 |
if chat_message is None:
|
46 |
return None
|
47 |
elif isinstance(chat_message, (tuple, list)):
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
49 |
mime_type = client_utils.get_mimetype(filepath)
|
50 |
-
filepath = self.make_temp_copy_if_needed(filepath)
|
51 |
return {
|
52 |
"name": filepath,
|
53 |
"mime_type": mime_type,
|
@@ -56,12 +60,13 @@ def postprocess_chat_messages(
|
|
56 |
"is_file": True,
|
57 |
}
|
58 |
elif isinstance(chat_message, str):
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
65 |
return chat_message
|
66 |
else:
|
67 |
raise ValueError(f"Invalid message for Chatbot component: {chat_message}")
|
@@ -75,11 +80,8 @@ with open("./assets/custom.js", "r", encoding="utf-8") as f, \
|
|
75 |
def reload_javascript():
|
76 |
print("Reloading javascript...")
|
77 |
js = f'<script>{customJS}</script><script async>{externalScripts}</script>'
|
78 |
-
if render_latex:
|
79 |
-
|
80 |
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-MML-AM_CHTML"></script>
|
81 |
-
<script type="text/x-mathjax-config">MathJax.Hub.Config({skipStartupTypeset: false, tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']],displayMath: [['$$','$$'], ['\\[','\\]']]}});</script>
|
82 |
-
"""
|
83 |
def template_response(*args, **kwargs):
|
84 |
res = GradioTemplateResponseOriginal(*args, **kwargs)
|
85 |
res.body = res.body.replace(b'</html>', f'{js}</html>'.encode("utf8"))
|
|
|
2 |
import logging
|
3 |
|
4 |
from typing import List, Tuple
|
|
|
5 |
from gradio_client import utils as client_utils
|
6 |
+
from gradio import utils
|
7 |
+
import inspect
|
8 |
|
9 |
from modules.presets import *
|
10 |
from modules.index_func import *
|
|
|
11 |
|
12 |
|
13 |
def postprocess(
|
|
|
40 |
return processed_messages
|
41 |
|
42 |
def postprocess_chat_messages(
|
43 |
+
self, chat_message: str | tuple | list | None, role: str
|
44 |
+
) -> str | dict | None:
|
45 |
if chat_message is None:
|
46 |
return None
|
47 |
elif isinstance(chat_message, (tuple, list)):
|
48 |
+
file_uri = chat_message[0]
|
49 |
+
if utils.validate_url(file_uri):
|
50 |
+
filepath = file_uri
|
51 |
+
else:
|
52 |
+
filepath = self.make_temp_copy_if_needed(file_uri)
|
53 |
+
|
54 |
mime_type = client_utils.get_mimetype(filepath)
|
|
|
55 |
return {
|
56 |
"name": filepath,
|
57 |
"mime_type": mime_type,
|
|
|
60 |
"is_file": True,
|
61 |
}
|
62 |
elif isinstance(chat_message, str):
|
63 |
+
# chat_message = inspect.cleandoc(chat_message)
|
64 |
+
# escape html spaces
|
65 |
+
# chat_message = chat_message.replace(" ", " ")
|
66 |
+
if role == "bot":
|
67 |
+
chat_message = convert_bot_before_marked(chat_message)
|
68 |
+
elif role == "user":
|
69 |
+
chat_message = convert_user_before_marked(chat_message)
|
70 |
return chat_message
|
71 |
else:
|
72 |
raise ValueError(f"Invalid message for Chatbot component: {chat_message}")
|
|
|
80 |
def reload_javascript():
|
81 |
print("Reloading javascript...")
|
82 |
js = f'<script>{customJS}</script><script async>{externalScripts}</script>'
|
83 |
+
# if render_latex:
|
84 |
+
# js += """\"""
|
|
|
|
|
|
|
85 |
def template_response(*args, **kwargs):
|
86 |
res = GradioTemplateResponseOriginal(*args, **kwargs)
|
87 |
res.body = res.body.replace(b'</html>', f'{js}</html>'.encode("utf8"))
|
modules/presets.py
CHANGED
@@ -46,25 +46,18 @@ CHUANHU_TITLE = i18n("川虎Chat 🚀")
|
|
46 |
|
47 |
CHUANHU_DESCRIPTION = i18n("由Bilibili [土川虎虎虎](https://space.bilibili.com/29125536)、[明昭MZhao](https://space.bilibili.com/24807452) 和 [Keldos](https://github.com/Keldos-Li) 开发<br />访问川虎Chat的 [GitHub项目](https://github.com/GaiZhenbiao/ChuanhuChatGPT) 下载最新版脚本")
|
48 |
|
49 |
-
FOOTER = """<div class="versions">{versions}</div>"""
|
50 |
-
|
51 |
-
APPEARANCE_SWITCHER = """
|
52 |
-
<div style="display: flex; justify-content: space-between;">
|
53 |
-
<span style="margin-top: 4px !important;">"""+ i18n("切换亮暗色主题") + """</span>
|
54 |
-
<span><label class="apSwitch" for="checkbox">
|
55 |
-
<input type="checkbox" id="checkbox">
|
56 |
-
<div class="apSlider"></div>
|
57 |
-
</label></span>
|
58 |
-
</div>
|
59 |
-
"""
|
60 |
|
61 |
ONLINE_MODELS = [
|
62 |
"gpt-3.5-turbo",
|
|
|
63 |
"gpt-3.5-turbo-0301",
|
|
|
64 |
"gpt-4",
|
65 |
"gpt-4-0314",
|
|
|
66 |
"gpt-4-32k",
|
67 |
"gpt-4-32k-0314",
|
|
|
68 |
"川虎助理",
|
69 |
"川虎助理 Pro",
|
70 |
"xmchat",
|
@@ -105,11 +98,15 @@ for dir_name in os.listdir("models"):
|
|
105 |
|
106 |
MODEL_TOKEN_LIMIT = {
|
107 |
"gpt-3.5-turbo": 4096,
|
|
|
108 |
"gpt-3.5-turbo-0301": 4096,
|
|
|
109 |
"gpt-4": 8192,
|
110 |
"gpt-4-0314": 8192,
|
|
|
111 |
"gpt-4-32k": 32768,
|
112 |
-
"gpt-4-32k-0314": 32768
|
|
|
113 |
}
|
114 |
|
115 |
TOKEN_OFFSET = 1000 # 模型的token上限减去这个值,得到软上限。到达软上限之后,自动尝试减少token占用。
|
@@ -238,4 +235,6 @@ small_and_beautiful_theme = gr.themes.Soft(
|
|
238 |
block_title_background_fill_dark="*primary_900",
|
239 |
block_label_background_fill_dark="*primary_900",
|
240 |
input_background_fill="#F6F6F6",
|
|
|
|
|
241 |
)
|
|
|
46 |
|
47 |
CHUANHU_DESCRIPTION = i18n("由Bilibili [土川虎虎虎](https://space.bilibili.com/29125536)、[明昭MZhao](https://space.bilibili.com/24807452) 和 [Keldos](https://github.com/Keldos-Li) 开发<br />访问川虎Chat的 [GitHub项目](https://github.com/GaiZhenbiao/ChuanhuChatGPT) 下载最新版脚本")
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
ONLINE_MODELS = [
|
51 |
"gpt-3.5-turbo",
|
52 |
+
"gpt-3.5-turbo-16k",
|
53 |
"gpt-3.5-turbo-0301",
|
54 |
+
"gpt-3.5-turbo-0613",
|
55 |
"gpt-4",
|
56 |
"gpt-4-0314",
|
57 |
+
"gpt-4-0613",
|
58 |
"gpt-4-32k",
|
59 |
"gpt-4-32k-0314",
|
60 |
+
"gpt-4-32k-0613",
|
61 |
"川虎助理",
|
62 |
"川虎助理 Pro",
|
63 |
"xmchat",
|
|
|
98 |
|
99 |
MODEL_TOKEN_LIMIT = {
|
100 |
"gpt-3.5-turbo": 4096,
|
101 |
+
"gpt-3.5-turbo-16k": 16384,
|
102 |
"gpt-3.5-turbo-0301": 4096,
|
103 |
+
"gpt-3.5-turbo-0613": 4096,
|
104 |
"gpt-4": 8192,
|
105 |
"gpt-4-0314": 8192,
|
106 |
+
"gpt-4-0613": 8192,
|
107 |
"gpt-4-32k": 32768,
|
108 |
+
"gpt-4-32k-0314": 32768,
|
109 |
+
"gpt-4-32k-0613": 32768
|
110 |
}
|
111 |
|
112 |
TOKEN_OFFSET = 1000 # 模型的token上限减去这个值,得到软上限。到达软上限之后,自动尝试减少token占用。
|
|
|
235 |
block_title_background_fill_dark="*primary_900",
|
236 |
block_label_background_fill_dark="*primary_900",
|
237 |
input_background_fill="#F6F6F6",
|
238 |
+
chatbot_code_background_color="*neutral_950",
|
239 |
+
chatbot_code_background_color_dark="*neutral_950",
|
240 |
)
|
modules/shared.py
CHANGED
@@ -59,3 +59,6 @@ class State:
|
|
59 |
|
60 |
|
61 |
state = State()
|
|
|
|
|
|
|
|
59 |
|
60 |
|
61 |
state = State()
|
62 |
+
|
63 |
+
modules_path = os.path.dirname(os.path.realpath(__file__))
|
64 |
+
chuanhu_path = os.path.dirname(modules_path)
|
modules/utils.py
CHANGED
@@ -16,7 +16,6 @@ import subprocess
|
|
16 |
import gradio as gr
|
17 |
from pypinyin import lazy_pinyin
|
18 |
import tiktoken
|
19 |
-
import mdtex2html
|
20 |
from markdown import markdown
|
21 |
from pygments import highlight
|
22 |
from pygments.lexers import get_lexer_by_name
|
@@ -133,7 +132,7 @@ def count_token(message):
|
|
133 |
return length
|
134 |
|
135 |
|
136 |
-
def markdown_to_html_with_syntax_highlight(md_str):
|
137 |
def replacer(match):
|
138 |
lang = match.group(1) or "text"
|
139 |
code = match.group(2)
|
@@ -155,7 +154,7 @@ def markdown_to_html_with_syntax_highlight(md_str):
|
|
155 |
return html_str
|
156 |
|
157 |
|
158 |
-
def normalize_markdown(md_text: str) -> str:
|
159 |
lines = md_text.split("\n")
|
160 |
normalized_lines = []
|
161 |
inside_list = False
|
@@ -179,7 +178,7 @@ def normalize_markdown(md_text: str) -> str:
|
|
179 |
return "\n".join(normalized_lines)
|
180 |
|
181 |
|
182 |
-
def convert_mdtext(md_text):
|
183 |
code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
|
184 |
inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
|
185 |
code_blocks = code_block_pattern.findall(md_text)
|
@@ -203,15 +202,70 @@ def convert_mdtext(md_text):
|
|
203 |
output += ALREADY_CONVERTED_MARK
|
204 |
return output
|
205 |
|
206 |
-
|
207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
return (
|
209 |
f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>'
|
210 |
+ ALREADY_CONVERTED_MARK
|
211 |
)
|
212 |
|
213 |
|
214 |
-
def detect_converted_mark(userinput):
|
215 |
try:
|
216 |
if userinput.endswith(ALREADY_CONVERTED_MARK):
|
217 |
return True
|
@@ -221,7 +275,7 @@ def detect_converted_mark(userinput):
|
|
221 |
return True
|
222 |
|
223 |
|
224 |
-
def detect_language(code):
|
225 |
if code.startswith("\n"):
|
226 |
first_line = ""
|
227 |
else:
|
@@ -256,8 +310,8 @@ def save_file(filename, system, history, chatbot, user_name):
|
|
256 |
history_file_path = filename
|
257 |
else:
|
258 |
history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
|
259 |
-
with open(history_file_path, "w") as f:
|
260 |
-
json.dump(json_s, f)
|
261 |
elif filename.endswith(".md"):
|
262 |
md_s = f"system: \n- {system} \n"
|
263 |
for data in history:
|
@@ -497,6 +551,13 @@ def versions_html():
|
|
497 |
<a style="text-decoration:none;color:inherit" href="https://github.com/GaiZhenbiao/ChuanhuChatGPT">ChuanhuChat</a>: {commit_info}
|
498 |
"""
|
499 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
500 |
def add_source_numbers(lst, source_name = "Source", use_source = True):
|
501 |
if use_source:
|
502 |
return [f'[{idx+1}]\t "{item[0]}"\n{source_name}: {item[1]}' for idx, item in enumerate(lst)]
|
@@ -563,7 +624,7 @@ def toggle_like_btn_visibility(selected_model_name):
|
|
563 |
def new_auto_history_filename(dirname):
|
564 |
latest_file = get_latest_filepath(dirname)
|
565 |
if latest_file:
|
566 |
-
with open(os.path.join(dirname, latest_file), 'r') as f:
|
567 |
if len(f.read()) == 0:
|
568 |
return latest_file
|
569 |
now = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
|
|
|
16 |
import gradio as gr
|
17 |
from pypinyin import lazy_pinyin
|
18 |
import tiktoken
|
|
|
19 |
from markdown import markdown
|
20 |
from pygments import highlight
|
21 |
from pygments.lexers import get_lexer_by_name
|
|
|
132 |
return length
|
133 |
|
134 |
|
135 |
+
def markdown_to_html_with_syntax_highlight(md_str): # deprecated
|
136 |
def replacer(match):
|
137 |
lang = match.group(1) or "text"
|
138 |
code = match.group(2)
|
|
|
154 |
return html_str
|
155 |
|
156 |
|
157 |
+
def normalize_markdown(md_text: str) -> str: # deprecated
|
158 |
lines = md_text.split("\n")
|
159 |
normalized_lines = []
|
160 |
inside_list = False
|
|
|
178 |
return "\n".join(normalized_lines)
|
179 |
|
180 |
|
181 |
+
def convert_mdtext(md_text): # deprecated
|
182 |
code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
|
183 |
inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
|
184 |
code_blocks = code_block_pattern.findall(md_text)
|
|
|
202 |
output += ALREADY_CONVERTED_MARK
|
203 |
return output
|
204 |
|
205 |
+
def convert_bot_before_marked(chat_message):
|
206 |
+
"""
|
207 |
+
注意不能给输出加缩进, 否则会被marked解析成代码块
|
208 |
+
"""
|
209 |
+
if '<div class="md-message">' in chat_message:
|
210 |
+
return chat_message
|
211 |
+
else:
|
212 |
+
code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
|
213 |
+
code_blocks = code_block_pattern.findall(chat_message)
|
214 |
+
non_code_parts = code_block_pattern.split(chat_message)[::2]
|
215 |
+
result = []
|
216 |
+
|
217 |
+
raw = f'<div class="raw-message hideM">{escape_markdown(chat_message)}</div>'
|
218 |
+
for non_code, code in zip(non_code_parts, code_blocks + [""]):
|
219 |
+
if non_code.strip():
|
220 |
+
result.append(non_code)
|
221 |
+
if code.strip():
|
222 |
+
code = f"\n```{code}\n```"
|
223 |
+
result.append(code)
|
224 |
+
result = "".join(result)
|
225 |
+
md = f'<div class="md-message">{result}\n</div>'
|
226 |
+
return raw + md
|
227 |
+
|
228 |
+
def convert_user_before_marked(chat_message):
|
229 |
+
if '<div class="user-message">' in chat_message:
|
230 |
+
return chat_message
|
231 |
+
else:
|
232 |
+
return f'<div class="user-message">{escape_markdown(chat_message)}</div>'
|
233 |
+
|
234 |
+
def escape_markdown(text):
|
235 |
+
"""
|
236 |
+
Escape Markdown special characters to HTML-safe equivalents.
|
237 |
+
"""
|
238 |
+
escape_chars = {
|
239 |
+
' ': ' ',
|
240 |
+
'_': '_',
|
241 |
+
'*': '*',
|
242 |
+
'[': '[',
|
243 |
+
']': ']',
|
244 |
+
'(': '(',
|
245 |
+
')': ')',
|
246 |
+
'{': '{',
|
247 |
+
'}': '}',
|
248 |
+
'#': '#',
|
249 |
+
'+': '+',
|
250 |
+
'-': '-',
|
251 |
+
'.': '.',
|
252 |
+
'!': '!',
|
253 |
+
'`': '`',
|
254 |
+
'>': '>',
|
255 |
+
'<': '<',
|
256 |
+
'|': '|'
|
257 |
+
}
|
258 |
+
return ''.join(escape_chars.get(c, c) for c in text)
|
259 |
+
|
260 |
+
|
261 |
+
def convert_asis(userinput): # deprecated
|
262 |
return (
|
263 |
f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>'
|
264 |
+ ALREADY_CONVERTED_MARK
|
265 |
)
|
266 |
|
267 |
|
268 |
+
def detect_converted_mark(userinput): # deprecated
|
269 |
try:
|
270 |
if userinput.endswith(ALREADY_CONVERTED_MARK):
|
271 |
return True
|
|
|
275 |
return True
|
276 |
|
277 |
|
278 |
+
def detect_language(code): # deprecated
|
279 |
if code.startswith("\n"):
|
280 |
first_line = ""
|
281 |
else:
|
|
|
310 |
history_file_path = filename
|
311 |
else:
|
312 |
history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
|
313 |
+
with open(history_file_path, "w", encoding='utf-8') as f:
|
314 |
+
json.dump(json_s, f, ensure_ascii=False)
|
315 |
elif filename.endswith(".md"):
|
316 |
md_s = f"system: \n- {system} \n"
|
317 |
for data in history:
|
|
|
551 |
<a style="text-decoration:none;color:inherit" href="https://github.com/GaiZhenbiao/ChuanhuChatGPT">ChuanhuChat</a>: {commit_info}
|
552 |
"""
|
553 |
|
554 |
+
def get_html(filename):
|
555 |
+
path = os.path.join(shared.chuanhu_path, "assets", "html", filename)
|
556 |
+
if os.path.exists(path):
|
557 |
+
with open(path, encoding="utf8") as file:
|
558 |
+
return file.read()
|
559 |
+
return ""
|
560 |
+
|
561 |
def add_source_numbers(lst, source_name = "Source", use_source = True):
|
562 |
if use_source:
|
563 |
return [f'[{idx+1}]\t "{item[0]}"\n{source_name}: {item[1]}' for idx, item in enumerate(lst)]
|
|
|
624 |
def new_auto_history_filename(dirname):
|
625 |
latest_file = get_latest_filepath(dirname)
|
626 |
if latest_file:
|
627 |
+
with open(os.path.join(dirname, latest_file), 'r', encoding="utf-8") as f:
|
628 |
if len(f.read()) == 0:
|
629 |
return latest_file
|
630 |
now = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
|