Spaces:

JohnSmith9982
/

ChuanhuChatGPT_Beta

Runtime error

App Files Files Community

JohnSmith9982 commited on Jun 14, 2023

Commit

8971a40

•

1 Parent(s): 7a87049

Upload 58 files

Browse files

Files changed (25) hide show

ChuanhuChatbot.py +4 -9
modules/__pycache__/config.cpython-311.pyc +0 -0
modules/__pycache__/config.cpython-39.pyc +0 -0
modules/__pycache__/index_func.cpython-311.pyc +0 -0
modules/__pycache__/index_func.cpython-39.pyc +0 -0
modules/__pycache__/overwrites.cpython-311.pyc +0 -0
modules/__pycache__/overwrites.cpython-39.pyc +0 -0
modules/__pycache__/presets.cpython-311.pyc +0 -0
modules/__pycache__/presets.cpython-39.pyc +0 -0
modules/__pycache__/shared.cpython-311.pyc +0 -0
modules/__pycache__/utils.cpython-311.pyc +0 -0
modules/__pycache__/utils.cpython-39.pyc +0 -0
modules/config.py +6 -14
modules/index_func.py +6 -6
modules/models/ChuanhuAgent.py +11 -2
modules/models/__pycache__/base_model.cpython-311.pyc +0 -0
modules/models/__pycache__/base_model.cpython-39.pyc +0 -0
modules/models/__pycache__/models.cpython-311.pyc +0 -0
modules/models/__pycache__/models.cpython-39.pyc +0 -0
modules/models/base_model.py +11 -7
modules/models/models.py +2 -2
modules/overwrites.py +19 -17
modules/presets.py +11 -12
modules/shared.py +3 -0
modules/utils.py +72 -11

ChuanhuChatbot.py CHANGED Viewed

@@ -12,6 +12,7 @@ from modules.presets import *
 from modules.overwrites import *
 from modules.models.models import get_model
 gr.Chatbot._postprocess_chat_messages = postprocess_chat_messages
 gr.Chatbot.postprocess = postprocess
@@ -88,7 +89,6 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                     with gr.Row():
                         single_turn_checkbox = gr.Checkbox(label=i18n("单轮对话"), value=False)
                         use_websearch_checkbox = gr.Checkbox(label=i18n("使用在线搜索"), value=False)
-                        # render_latex_checkbox = gr.Checkbox(label=i18n("渲染LaTeX公式"), value=render_latex, interactive=True, elem_id="render_latex_checkbox")
                     language_select_dropdown = gr.Dropdown(
                         label=i18n("选择回复语言（针对搜索&索引功能）"),
                         choices=REPLY_LANGUAGES,
@@ -161,7 +161,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                 with gr.Tab(label=i18n("高级")):
                     gr.Markdown(i18n("# ⚠️ 务必谨慎更改 ⚠️\n\n如果无法使用请恢复默认设置"))
-                    gr.HTML(APPEARANCE_SWITCHER, elem_classes="insert_block")
                     use_streaming_checkbox = gr.Checkbox(
                             label=i18n("实时传输回答"), value=True, visible=ENABLE_STREAMING_OPTION
                         )
@@ -265,7 +265,7 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
                         default_btn = gr.Button(i18n("🔙 恢复默认设置"))
     gr.Markdown(CHUANHU_DESCRIPTION, elem_id="description")
-    gr.HTML(FOOTER.format(versions=versions_html()), elem_id="footer")
     # https://github.com/gradio-app/gradio/pull/3296
     def create_greeting(request: gr.Request):
@@ -469,10 +469,5 @@ if __name__ == "__main__":
     reload_javascript()
     demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
         blocked_paths=["config.json"],
-        auth=auth_list if authflag else None,
-        favicon_path="./assets/favicon.ico",
-        inbrowser=not dockerflag, # 禁止在docker下开启inbrowser
     )
-    # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=7860, share=False) # 可自定义端口
-    # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", server_port=7860,auth=("在这里填写用户名", "在这里填写密码")) # 可设置用户名与密码
-    # demo.queue(concurrency_count=CONCURRENT_COUNT).launch(auth=("在这里填写用户名", "在这里填写密码")) # 适合Nginx反向代理

 from modules.overwrites import *
 from modules.models.models import get_model
+logging.getLogger("httpx").setLevel(logging.WARNING)
 gr.Chatbot._postprocess_chat_messages = postprocess_chat_messages
 gr.Chatbot.postprocess = postprocess
                     with gr.Row():
                         single_turn_checkbox = gr.Checkbox(label=i18n("单轮对话"), value=False)
                         use_websearch_checkbox = gr.Checkbox(label=i18n("使用在线搜索"), value=False)
                     language_select_dropdown = gr.Dropdown(
                         label=i18n("选择回复语言（针对搜索&索引功能）"),
                         choices=REPLY_LANGUAGES,
                 with gr.Tab(label=i18n("高级")):
                     gr.Markdown(i18n("# ⚠️ 务必谨慎更改 ⚠️\n\n如果无法使用请恢复默认设置"))
+                    gr.HTML(get_html("appearance_switcher.html").format(label=i18n("切换亮暗色主题")), elem_classes="insert_block")
                     use_streaming_checkbox = gr.Checkbox(
                             label=i18n("实时传输回答"), value=True, visible=ENABLE_STREAMING_OPTION
                         )
                         default_btn = gr.Button(i18n("🔙 恢复默认设置"))
     gr.Markdown(CHUANHU_DESCRIPTION, elem_id="description")
+    gr.HTML(get_html("footer.html").format(versions=versions_html()), elem_id="footer")
     # https://github.com/gradio-app/gradio/pull/3296
     def create_greeting(request: gr.Request):
     reload_javascript()
     demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
         blocked_paths=["config.json"],
+        favicon_path="./assets/favicon.ico"
     )

modules/__pycache__/config.cpython-311.pyc CHANGED Viewed

Binary files a/modules/__pycache__/config.cpython-311.pyc and b/modules/__pycache__/config.cpython-311.pyc differ

modules/__pycache__/config.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/config.cpython-39.pyc and b/modules/__pycache__/config.cpython-39.pyc differ

modules/__pycache__/index_func.cpython-311.pyc CHANGED Viewed

Binary files a/modules/__pycache__/index_func.cpython-311.pyc and b/modules/__pycache__/index_func.cpython-311.pyc differ

modules/__pycache__/index_func.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/index_func.cpython-39.pyc and b/modules/__pycache__/index_func.cpython-39.pyc differ

modules/__pycache__/overwrites.cpython-311.pyc CHANGED Viewed

Binary files a/modules/__pycache__/overwrites.cpython-311.pyc and b/modules/__pycache__/overwrites.cpython-311.pyc differ

modules/__pycache__/overwrites.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/overwrites.cpython-39.pyc and b/modules/__pycache__/overwrites.cpython-39.pyc differ

modules/__pycache__/presets.cpython-311.pyc CHANGED Viewed

Binary files a/modules/__pycache__/presets.cpython-311.pyc and b/modules/__pycache__/presets.cpython-311.pyc differ

modules/__pycache__/presets.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/presets.cpython-39.pyc and b/modules/__pycache__/presets.cpython-39.pyc differ

modules/__pycache__/shared.cpython-311.pyc CHANGED Viewed

Binary files a/modules/__pycache__/shared.cpython-311.pyc and b/modules/__pycache__/shared.cpython-311.pyc differ

modules/__pycache__/utils.cpython-311.pyc CHANGED Viewed

Binary files a/modules/__pycache__/utils.cpython-311.pyc and b/modules/__pycache__/utils.cpython-311.pyc differ

modules/__pycache__/utils.cpython-39.pyc CHANGED Viewed

Binary files a/modules/__pycache__/utils.cpython-39.pyc and b/modules/__pycache__/utils.cpython-39.pyc differ

modules/config.py CHANGED Viewed

@@ -18,7 +18,6 @@ __all__ = [
     "log_level",
     "advance_docs",
     "update_doc_config",
-    "render_latex",
     "usage_limit",
     "multi_api_key",
     "server_name",
@@ -43,11 +42,11 @@ hide_history_when_not_logged_in = config.get("hide_history_when_not_logged_in",
 if os.path.exists("api_key.txt"):
     logging.info("检测到api_key.txt文件，正在进行迁移...")
-    with open("api_key.txt", "r") as f:
         config["openai_api_key"] = f.read().strip()
     os.rename("api_key.txt", "api_key(deprecated).txt")
     with open("config.json", "w", encoding='utf-8') as f:
-        json.dump(config, f, indent=4)
 if os.path.exists("auth.json"):
     logging.info("检测到auth.json文件，正在进行迁移...")
@@ -63,7 +62,7 @@ if os.path.exists("auth.json"):
     config["users"] = auth_list
     os.rename("auth.json", "auth(deprecated).json")
     with open("config.json", "w", encoding='utf-8') as f:
-        json.dump(config, f, indent=4)
 ## 处理docker if we are running in Docker
 dockerflag = config.get("dockerflag", False)
@@ -82,12 +81,6 @@ os.environ["MINIMAX_API_KEY"] = minimax_api_key
 minimax_group_id = config.get("minimax_group_id", "")
 os.environ["MINIMAX_GROUP_ID"] = minimax_group_id
-render_latex = config.get("render_latex", True)
-if render_latex:
-    os.environ["RENDER_LATEX"] = "yes"
-else:
-    os.environ["RENDER_LATEX"] = "no"
 usage_limit = os.environ.get("USAGE_LIMIT", config.get("usage_limit", 120))
@@ -109,10 +102,9 @@ if api_host is not None:
     shared.state.set_api_host(api_host)
 default_chuanhu_assistant_model = config.get("default_chuanhu_assistant_model", "gpt-3.5-turbo")
-os.environ["GOOGLE_CSE_ID"] = config.get("GOOGLE_CSE_ID", "")
-os.environ["GOOGLE_API_KEY"] = config.get("GOOGLE_API_KEY", "")
-os.environ["WOLFRAM_ALPHA_APPID"] = config.get("WOLFRAM_ALPHA_APPID", "")
-os.environ["SERPAPI_API_KEY"] = config.get("SERPAPI_API_KEY", "")
 @contextmanager
 def retrieve_openai_api(api_key = None):

     "log_level",
     "advance_docs",
     "update_doc_config",
     "usage_limit",
     "multi_api_key",
     "server_name",
 if os.path.exists("api_key.txt"):
     logging.info("检测到api_key.txt文件，正在进行迁移...")
+    with open("api_key.txt", "r", encoding="utf-8") as f:
         config["openai_api_key"] = f.read().strip()
     os.rename("api_key.txt", "api_key(deprecated).txt")
     with open("config.json", "w", encoding='utf-8') as f:
+        json.dump(config, f, indent=4, ensure_ascii=False)
 if os.path.exists("auth.json"):
     logging.info("检测到auth.json文件，正在进行迁移...")
     config["users"] = auth_list
     os.rename("auth.json", "auth(deprecated).json")
     with open("config.json", "w", encoding='utf-8') as f:
+        json.dump(config, f, indent=4, ensure_ascii=False)
 ## 处理docker if we are running in Docker
 dockerflag = config.get("dockerflag", False)
 minimax_group_id = config.get("minimax_group_id", "")
 os.environ["MINIMAX_GROUP_ID"] = minimax_group_id
 usage_limit = os.environ.get("USAGE_LIMIT", config.get("usage_limit", 120))
     shared.state.set_api_host(api_host)
 default_chuanhu_assistant_model = config.get("default_chuanhu_assistant_model", "gpt-3.5-turbo")
+for x in ["GOOGLE_CSE_ID", "GOOGLE_API_KEY", "WOLFRAM_ALPHA_APPID", "SERPAPI_API_KEY"]:
+    if config.get(x, None) is not None:
+        os.environ[x] = config[x]
 @contextmanager
 def retrieve_openai_api(api_key = None):

modules/index_func.py CHANGED Viewed

@@ -16,7 +16,7 @@ def get_index_name(file_src):
     md5_hash = hashlib.md5()
     for file_path in file_paths:
-        with open(file_path, "rb") as f:
             while chunk := f.read(8192):
                 md5_hash.update(chunk)
@@ -47,11 +47,11 @@ def get_documents(file_src):
                     pdftext = parse_pdf(filepath, two_column).text
                 except:
                     pdftext = ""
-                    with open(filepath, "rb") as pdfFileObj:
                         pdfReader = PyPDF2.PdfReader(pdfFileObj)
                         for page in tqdm(pdfReader.pages):
                             pdftext += page.extract_text()
-                texts = Document(page_content=pdftext, metadata={"source": filepath})
             elif file_type == ".docx":
                 logging.debug("Loading Word...")
                 from langchain.document_loaders import UnstructuredWordDocumentLoader
@@ -70,9 +70,9 @@ def get_documents(file_src):
             elif file_type == ".xlsx":
                 logging.debug("Loading Excel...")
                 text_list = excel_to_string(filepath)
                 for elem in text_list:
-                    documents.append(Document(page_content=elem, metadata={"source": filepath}))
-                continue
             else:
                 logging.debug("Loading text file...")
                 from langchain.document_loaders import TextLoader
@@ -83,7 +83,7 @@ def get_documents(file_src):
             logging.error(f"Error loading file: {filename}")
             traceback.print_exc()
-        texts = text_splitter.split_documents([texts])
         documents.extend(texts)
     logging.debug("Documents loaded.")
     return documents

     md5_hash = hashlib.md5()
     for file_path in file_paths:
+        with open(file_path, "rb", encoding="utf-8") as f:
             while chunk := f.read(8192):
                 md5_hash.update(chunk)
                     pdftext = parse_pdf(filepath, two_column).text
                 except:
                     pdftext = ""
+                    with open(filepath, "rb", encoding="utf-8") as pdfFileObj:
                         pdfReader = PyPDF2.PdfReader(pdfFileObj)
                         for page in tqdm(pdfReader.pages):
                             pdftext += page.extract_text()
+                texts = [Document(page_content=pdftext, metadata={"source": filepath})]
             elif file_type == ".docx":
                 logging.debug("Loading Word...")
                 from langchain.document_loaders import UnstructuredWordDocumentLoader
             elif file_type == ".xlsx":
                 logging.debug("Loading Excel...")
                 text_list = excel_to_string(filepath)
+                texts = []
                 for elem in text_list:
+                    texts.append(Document(page_content=elem, metadata={"source": filepath}))
             else:
                 logging.debug("Loading text file...")
                 from langchain.document_loaders import TextLoader
             logging.error(f"Error loading file: {filename}")
             traceback.print_exc()
+        texts = text_splitter.split_documents(texts)
         documents.extend(texts)
     logging.debug("Documents loaded.")
     return documents

modules/models/ChuanhuAgent.py CHANGED Viewed

@@ -14,7 +14,8 @@ from langchain.tools import BaseTool, StructuredTool, Tool, tool
 from langchain.callbacks.stdout import StdOutCallbackHandler
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from langchain.callbacks.manager import BaseCallbackManager
-from googlesearch import search
 from typing import Any, Dict, List, Optional, Union
@@ -93,7 +94,15 @@ class ChuanhuAgent_Client(BaseLLMModel):
         )
     def google_search_simple(self, query):
-        results = [{"title": i.title, "link": i.url, "snippet": i.description} for i in search(query, advanced=True)]
         return str(results)
     def handle_file_upload(self, files, chatbot, language):

 from langchain.callbacks.stdout import StdOutCallbackHandler
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from langchain.callbacks.manager import BaseCallbackManager
+from duckduckgo_search import DDGS
+from itertools import islice
 from typing import Any, Dict, List, Optional, Union
         )
     def google_search_simple(self, query):
+        results = []
+        with DDGS() as ddgs:
+            ddgs_gen = ddgs.text("notes from a dead house", backend="lite")
+            for r in islice(ddgs_gen, 10):
+                results.append({
+                    "title": r["title"],
+                    "link": r["href"],
+                    "snippet": r["body"]
+                })
         return str(results)
     def handle_file_upload(self, files, chatbot, language):

modules/models/__pycache__/base_model.cpython-311.pyc CHANGED Viewed

Binary files a/modules/models/__pycache__/base_model.cpython-311.pyc and b/modules/models/__pycache__/base_model.cpython-311.pyc differ

modules/models/__pycache__/base_model.cpython-39.pyc CHANGED Viewed

Binary files a/modules/models/__pycache__/base_model.cpython-39.pyc and b/modules/models/__pycache__/base_model.cpython-39.pyc differ

modules/models/__pycache__/models.cpython-311.pyc CHANGED Viewed

Binary files a/modules/models/__pycache__/models.cpython-311.pyc and b/modules/models/__pycache__/models.cpython-311.pyc differ

modules/models/__pycache__/models.cpython-39.pyc CHANGED Viewed

Binary files a/modules/models/__pycache__/models.cpython-39.pyc and b/modules/models/__pycache__/models.cpython-39.pyc differ

modules/models/base_model.py CHANGED Viewed

@@ -13,7 +13,8 @@ import pathlib
 from tqdm import tqdm
 import colorama
-from googlesearch import search
 import asyncio
 import aiohttp
 from enum import Enum
@@ -335,16 +336,19 @@ class BaseLLMModel:
                 .replace("{reply_language}", reply_language)
             )
         elif use_websearch:
-            limited_context = True
-            search_results = [i for i in search(real_inputs, advanced=True)]
             reference_results = []
             for idx, result in enumerate(search_results):
                 logging.debug(f"搜索结果{idx + 1}：{result}")
-                domain_name = urllib3.util.parse_url(result.url).host
-                reference_results.append([result.description, result.url])
                 display_append.append(
                     # f"{idx+1}. [{domain_name}]({result['href']})\n"
-                    f"<li><a href=\"{result.url}\" target=\"_blank\">{domain_name}</a></li>\n"
                 )
             reference_results = add_source_numbers(reference_results)
             display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
@@ -637,7 +641,7 @@ class BaseLLMModel:
                 history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
             else:
                 history_file_path = filename
-            with open(history_file_path, "r") as f:
                 json_s = json.load(f)
             try:
                 if type(json_s["history"][0]) == str:

 from tqdm import tqdm
 import colorama
+from duckduckgo_search import DDGS
+from itertools import islice
 import asyncio
 import aiohttp
 from enum import Enum
                 .replace("{reply_language}", reply_language)
             )
         elif use_websearch:
+            search_results = []
+            with DDGS() as ddgs:
+                ddgs_gen = ddgs.text(real_inputs, backend="lite")
+                for r in islice(ddgs_gen, 10):
+                    search_results.append(r)
             reference_results = []
             for idx, result in enumerate(search_results):
                 logging.debug(f"搜索结果{idx + 1}：{result}")
+                domain_name = urllib3.util.parse_url(result['href']).host
+                reference_results.append([result['body'], result['href']])
                 display_append.append(
                     # f"{idx+1}. [{domain_name}]({result['href']})\n"
+                    f"<li><a href=\"{result['href']}\" target=\"_blank\">{result['title']}</a></li>\n"
                 )
             reference_results = add_source_numbers(reference_results)
             display_append = "<ol>\n\n" + "".join(display_append) + "</ol>"
                 history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
             else:
                 history_file_path = filename
+            with open(history_file_path, "r", encoding="utf-8") as f:
                 json_s = json.load(f)
             try:
                 if type(json_s["history"][0]) == str:

modules/models/models.py CHANGED Viewed

@@ -338,7 +338,7 @@ class LLaMA_Client(BaseLLMModel):
             pipeline_args = InferencerArguments(
                 local_rank=0, random_seed=1, deepspeed='configs/ds_config_chatbot.json', mixed_precision='bf16')
-            with open(pipeline_args.deepspeed, "r") as f:
                 ds_config = json.load(f)
             LLAMA_MODEL = AutoModel.get_model(
                 model_args,
@@ -623,7 +623,7 @@ def get_model(
 if __name__ == "__main__":
-    with open("config.json", "r") as f:
         openai_api_key = cjson.load(f)["openai_api_key"]
     # set logging level to debug
     logging.basicConfig(level=logging.DEBUG)

             pipeline_args = InferencerArguments(
                 local_rank=0, random_seed=1, deepspeed='configs/ds_config_chatbot.json', mixed_precision='bf16')
+            with open(pipeline_args.deepspeed, "r", encoding="utf-8") as f:
                 ds_config = json.load(f)
             LLAMA_MODEL = AutoModel.get_model(
                 model_args,
 if __name__ == "__main__":
+    with open("config.json", "r", encoding="utf-8") as f:
         openai_api_key = cjson.load(f)["openai_api_key"]
     # set logging level to debug
     logging.basicConfig(level=logging.DEBUG)

modules/overwrites.py CHANGED Viewed

@@ -2,12 +2,12 @@ from __future__ import annotations
 import logging
 from typing import List, Tuple
-import mdtex2html
 from gradio_client import utils as client_utils
 from modules.presets import *
 from modules.index_func import *
-from modules.config import render_latex
 def postprocess(
@@ -40,14 +40,18 @@ def postprocess(
         return processed_messages
 def postprocess_chat_messages(
-        self, chat_message: str | Tuple | List | None, message_type: str
-    ) -> str | Dict | None:
         if chat_message is None:
             return None
         elif isinstance(chat_message, (tuple, list)):
-            filepath = chat_message[0]
             mime_type = client_utils.get_mimetype(filepath)
-            filepath = self.make_temp_copy_if_needed(filepath)
             return {
                 "name": filepath,
                 "mime_type": mime_type,
@@ -56,12 +60,13 @@ def postprocess_chat_messages(
                 "is_file": True,
             }
         elif isinstance(chat_message, str):
-            if message_type == "bot":
-                if not detect_converted_mark(chat_message):
-                    chat_message = convert_mdtext(chat_message)
-            elif message_type == "user":
-                if not detect_converted_mark(chat_message):
-                    chat_message = convert_asis(chat_message)
             return chat_message
         else:
             raise ValueError(f"Invalid message for Chatbot component: {chat_message}")
@@ -75,11 +80,8 @@ with open("./assets/custom.js", "r", encoding="utf-8") as f, \
 def reload_javascript():
     print("Reloading javascript...")
     js = f'<script>{customJS}</script><script async>{externalScripts}</script>'
-    if render_latex:
-        js += """\
-            <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-MML-AM_CHTML"></script>
-            <script type="text/x-mathjax-config">MathJax.Hub.Config({skipStartupTypeset: false, tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']],displayMath: [['$$','$$'], ['\\[','\\]']]}});</script>
-        """
     def template_response(*args, **kwargs):
         res = GradioTemplateResponseOriginal(*args, **kwargs)
         res.body = res.body.replace(b'</html>', f'{js}</html>'.encode("utf8"))

 import logging
 from typing import List, Tuple
 from gradio_client import utils as client_utils
+from gradio import utils
+import inspect
 from modules.presets import *
 from modules.index_func import *
 def postprocess(
         return processed_messages
 def postprocess_chat_messages(
+        self, chat_message: str | tuple | list | None, role: str
+    ) -> str | dict | None:
         if chat_message is None:
             return None
         elif isinstance(chat_message, (tuple, list)):
+            file_uri = chat_message[0]
+            if utils.validate_url(file_uri):
+                filepath = file_uri
+            else:
+                filepath = self.make_temp_copy_if_needed(file_uri)
             mime_type = client_utils.get_mimetype(filepath)
             return {
                 "name": filepath,
                 "mime_type": mime_type,
                 "is_file": True,
             }
         elif isinstance(chat_message, str):
+            # chat_message = inspect.cleandoc(chat_message)
+            # escape html spaces
+            # chat_message = chat_message.replace(" ", "&nbsp;")
+            if role == "bot":
+                chat_message = convert_bot_before_marked(chat_message)
+            elif role == "user":
+                chat_message = convert_user_before_marked(chat_message)
             return chat_message
         else:
             raise ValueError(f"Invalid message for Chatbot component: {chat_message}")
 def reload_javascript():
     print("Reloading javascript...")
     js = f'<script>{customJS}</script><script async>{externalScripts}</script>'
+    # if render_latex:
+    #     js += """\"""
     def template_response(*args, **kwargs):
         res = GradioTemplateResponseOriginal(*args, **kwargs)
         res.body = res.body.replace(b'</html>', f'{js}</html>'.encode("utf8"))

modules/presets.py CHANGED Viewed

@@ -46,25 +46,18 @@ CHUANHU_TITLE = i18n("川虎Chat 🚀")
 CHUANHU_DESCRIPTION = i18n("由Bilibili [土川虎虎虎](https://space.bilibili.com/29125536)、[明昭MZhao](https://space.bilibili.com/24807452) 和 [Keldos](https://github.com/Keldos-Li) 开发<br />访问川虎Chat的 [GitHub项目](https://github.com/GaiZhenbiao/ChuanhuChatGPT) 下载最新版脚本")
-FOOTER = """<div class="versions">{versions}</div>"""
-APPEARANCE_SWITCHER = """
-<div style="display: flex; justify-content: space-between;">
-<span style="margin-top: 4px !important;">"""+ i18n("切换亮暗色主题")  + """</span>
-<span><label class="apSwitch" for="checkbox">
-    <input type="checkbox" id="checkbox">
-    <div class="apSlider"></div>
-</label></span>
-</div>
-"""
 ONLINE_MODELS = [
     "gpt-3.5-turbo",
     "gpt-3.5-turbo-0301",
     "gpt-4",
     "gpt-4-0314",
     "gpt-4-32k",
     "gpt-4-32k-0314",
     "川虎助理",
     "川虎助理 Pro",
     "xmchat",
@@ -105,11 +98,15 @@ for dir_name in os.listdir("models"):
 MODEL_TOKEN_LIMIT = {
     "gpt-3.5-turbo": 4096,
     "gpt-3.5-turbo-0301": 4096,
     "gpt-4": 8192,
     "gpt-4-0314": 8192,
     "gpt-4-32k": 32768,
-    "gpt-4-32k-0314": 32768
 }
 TOKEN_OFFSET = 1000 # 模型的token上限减去这个值，得到软上限。到达软上限之后，自动尝试减少token占用。
@@ -238,4 +235,6 @@ small_and_beautiful_theme = gr.themes.Soft(
         block_title_background_fill_dark="*primary_900",
         block_label_background_fill_dark="*primary_900",
         input_background_fill="#F6F6F6",
     )

 CHUANHU_DESCRIPTION = i18n("由Bilibili [土川虎虎虎](https://space.bilibili.com/29125536)、[明昭MZhao](https://space.bilibili.com/24807452) 和 [Keldos](https://github.com/Keldos-Li) 开发<br />访问川虎Chat的 [GitHub项目](https://github.com/GaiZhenbiao/ChuanhuChatGPT) 下载最新版脚本")
 ONLINE_MODELS = [
     "gpt-3.5-turbo",
+    "gpt-3.5-turbo-16k",
     "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo-0613",
     "gpt-4",
     "gpt-4-0314",
+    "gpt-4-0613",
     "gpt-4-32k",
     "gpt-4-32k-0314",
+    "gpt-4-32k-0613",
     "川虎助理",
     "川虎助理 Pro",
     "xmchat",
 MODEL_TOKEN_LIMIT = {
     "gpt-3.5-turbo": 4096,
+    "gpt-3.5-turbo-16k": 16384,
     "gpt-3.5-turbo-0301": 4096,
+    "gpt-3.5-turbo-0613": 4096,
     "gpt-4": 8192,
     "gpt-4-0314": 8192,
+    "gpt-4-0613": 8192,
     "gpt-4-32k": 32768,
+    "gpt-4-32k-0314": 32768,
+    "gpt-4-32k-0613": 32768
 }
 TOKEN_OFFSET = 1000 # 模型的token上限减去这个值，得到软上限。到达软上限之后，自动尝试减少token占用。
         block_title_background_fill_dark="*primary_900",
         block_label_background_fill_dark="*primary_900",
         input_background_fill="#F6F6F6",
+        chatbot_code_background_color="*neutral_950",
+        chatbot_code_background_color_dark="*neutral_950",
     )

modules/shared.py CHANGED Viewed

	@@ -59,3 +59,6 @@ class State:
59
60
61	state = State()

 state = State()
+modules_path = os.path.dirname(os.path.realpath(__file__))
+chuanhu_path = os.path.dirname(modules_path)

modules/utils.py CHANGED Viewed

@@ -16,7 +16,6 @@ import subprocess
 import gradio as gr
 from pypinyin import lazy_pinyin
 import tiktoken
-import mdtex2html
 from markdown import markdown
 from pygments import highlight
 from pygments.lexers import get_lexer_by_name
@@ -133,7 +132,7 @@ def count_token(message):
     return length
-def markdown_to_html_with_syntax_highlight(md_str):
     def replacer(match):
         lang = match.group(1) or "text"
         code = match.group(2)
@@ -155,7 +154,7 @@ def markdown_to_html_with_syntax_highlight(md_str):
     return html_str
-def normalize_markdown(md_text: str) -> str:
     lines = md_text.split("\n")
     normalized_lines = []
     inside_list = False
@@ -179,7 +178,7 @@ def normalize_markdown(md_text: str) -> str:
     return "\n".join(normalized_lines)
-def convert_mdtext(md_text):
     code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
     inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
     code_blocks = code_block_pattern.findall(md_text)
@@ -203,15 +202,70 @@ def convert_mdtext(md_text):
     output += ALREADY_CONVERTED_MARK
     return output
-def convert_asis(userinput):
     return (
         f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>'
         + ALREADY_CONVERTED_MARK
     )
-def detect_converted_mark(userinput):
     try:
         if userinput.endswith(ALREADY_CONVERTED_MARK):
             return True
@@ -221,7 +275,7 @@ def detect_converted_mark(userinput):
         return True
-def detect_language(code):
     if code.startswith("\n"):
         first_line = ""
     else:
@@ -256,8 +310,8 @@ def save_file(filename, system, history, chatbot, user_name):
             history_file_path = filename
         else:
             history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
-        with open(history_file_path, "w") as f:
-            json.dump(json_s, f)
     elif filename.endswith(".md"):
         md_s = f"system: \n- {system} \n"
         for data in history:
@@ -497,6 +551,13 @@ def versions_html():
         <a style="text-decoration:none;color:inherit" href="https://github.com/GaiZhenbiao/ChuanhuChatGPT">ChuanhuChat</a>: {commit_info}
         """
 def add_source_numbers(lst, source_name = "Source", use_source = True):
     if use_source:
         return [f'[{idx+1}]\t "{item[0]}"\n{source_name}: {item[1]}' for idx, item in enumerate(lst)]
@@ -563,7 +624,7 @@ def toggle_like_btn_visibility(selected_model_name):
 def new_auto_history_filename(dirname):
     latest_file = get_latest_filepath(dirname)
     if latest_file:
-        with open(os.path.join(dirname, latest_file), 'r') as f:
             if len(f.read()) == 0:
                 return latest_file
     now = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

 import gradio as gr
 from pypinyin import lazy_pinyin
 import tiktoken
 from markdown import markdown
 from pygments import highlight
 from pygments.lexers import get_lexer_by_name
     return length
+def markdown_to_html_with_syntax_highlight(md_str): # deprecated
     def replacer(match):
         lang = match.group(1) or "text"
         code = match.group(2)
     return html_str
+def normalize_markdown(md_text: str) -> str: # deprecated
     lines = md_text.split("\n")
     normalized_lines = []
     inside_list = False
     return "\n".join(normalized_lines)
+def convert_mdtext(md_text): # deprecated
     code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
     inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
     code_blocks = code_block_pattern.findall(md_text)
     output += ALREADY_CONVERTED_MARK
     return output
+def convert_bot_before_marked(chat_message):
+    """
+    注意不能给输出加缩进, 否则会被marked解析成代码块
+    """
+    if '<div class="md-message">' in chat_message:
+        return chat_message
+    else:
+        code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
+        code_blocks = code_block_pattern.findall(chat_message)
+        non_code_parts = code_block_pattern.split(chat_message)[::2]
+        result = []
+        raw = f'<div class="raw-message hideM">{escape_markdown(chat_message)}</div>'
+        for non_code, code in zip(non_code_parts, code_blocks + [""]):
+            if non_code.strip():
+                result.append(non_code)
+            if code.strip():
+                code = f"\n```{code}\n```"
+                result.append(code)
+        result = "".join(result)
+        md = f'<div class="md-message">{result}\n</div>'
+        return raw + md
+def convert_user_before_marked(chat_message):
+    if '<div class="user-message">' in chat_message:
+        return chat_message
+    else:
+        return f'<div class="user-message">{escape_markdown(chat_message)}</div>'
+def escape_markdown(text):
+    """
+    Escape Markdown special characters to HTML-safe equivalents.
+    """
+    escape_chars = {
+        ' ': '&nbsp;',
+        '_': '&#95;',
+        '*': '&#42;',
+        '[': '&#91;',
+        ']': '&#93;',
+        '(': '&#40;',
+        ')': '&#41;',
+        '{': '&#123;',
+        '}': '&#125;',
+        '#': '&#35;',
+        '+': '&#43;',
+        '-': '&#45;',
+        '.': '&#46;',
+        '!': '&#33;',
+        '`': '&#96;',
+        '>': '&#62;',
+        '<': '&#60;',
+        '|': '&#124;'
+    }
+    return ''.join(escape_chars.get(c, c) for c in text)
+def convert_asis(userinput): # deprecated
     return (
         f'<p style="white-space:pre-wrap;">{html.escape(userinput)}</p>'
         + ALREADY_CONVERTED_MARK
     )
+def detect_converted_mark(userinput): # deprecated
     try:
         if userinput.endswith(ALREADY_CONVERTED_MARK):
             return True
         return True
+def detect_language(code): # deprecated
     if code.startswith("\n"):
         first_line = ""
     else:
             history_file_path = filename
         else:
             history_file_path = os.path.join(HISTORY_DIR, user_name, filename)
+        with open(history_file_path, "w", encoding='utf-8') as f:
+            json.dump(json_s, f, ensure_ascii=False)
     elif filename.endswith(".md"):
         md_s = f"system: \n- {system} \n"
         for data in history:
         <a style="text-decoration:none;color:inherit" href="https://github.com/GaiZhenbiao/ChuanhuChatGPT">ChuanhuChat</a>: {commit_info}
         """
+def get_html(filename):
+    path = os.path.join(shared.chuanhu_path, "assets", "html", filename)
+    if os.path.exists(path):
+        with open(path, encoding="utf8") as file:
+            return file.read()
+    return ""
 def add_source_numbers(lst, source_name = "Source", use_source = True):
     if use_source:
         return [f'[{idx+1}]\t "{item[0]}"\n{source_name}: {item[1]}' for idx, item in enumerate(lst)]
 def new_auto_history_filename(dirname):
     latest_file = get_latest_filepath(dirname)
     if latest_file:
+        with open(os.path.join(dirname, latest_file), 'r', encoding="utf-8") as f:
             if len(f.read()) == 0:
                 return latest_file
     now = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')