from __future__ import annotations import asyncio import json import logging import os import pathlib import shutil import sys import traceback from collections import deque from enum import Enum from itertools import islice from threading import Condition, Thread from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union import aiohttp import colorama import commentjson as cjson import requests import urllib3 from duckduckgo_search import DDGS from huggingface_hub import hf_hub_download from langchain.callbacks.base import BaseCallbackHandler, BaseCallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.chat_models.base import BaseChatModel from langchain.input import print_text from langchain.schema import (AgentAction, AgentFinish, AIMessage, BaseMessage, HumanMessage, LLMResult, SystemMessage) from tqdm import tqdm from .. import shared from ..config import retrieve_proxy from ..index_func import * from ..presets import * from ..utils import * class CallbackToIterator: def __init__(self): self.queue = deque() self.cond = Condition() self.finished = False def callback(self, result): with self.cond: self.queue.append(result) self.cond.notify() # Wake up the generator. def __iter__(self): return self def __next__(self): with self.cond: # Wait for a value to be added to the queue. while not self.queue and not self.finished: self.cond.wait() if not self.queue: raise StopIteration() return self.queue.popleft() def finish(self): with self.cond: self.finished = True self.cond.notify() # Wake up the generator if it's waiting. def get_action_description(text): match = re.search("```(.*?)```", text, re.S) json_text = match.group(1) # 把json转化为python字典 json_dict = json.loads(json_text) # 提取'action'和'action_input'的值 action_name = json_dict["action"] action_input = json_dict["action_input"] if action_name != "Final Answer": return f'
{action_name}: {action_input}\n
' else: return "" class ChuanhuCallbackHandler(BaseCallbackHandler): def __init__(self, callback) -> None: """Initialize callback handler.""" self.callback = callback def on_agent_action( self, action: AgentAction, color: Optional[str] = None, **kwargs: Any ) -> Any: self.callback(get_action_description(action.log)) def on_tool_end( self, output: str, color: Optional[str] = None, observation_prefix: Optional[str] = None, llm_prefix: Optional[str] = None, **kwargs: Any, ) -> None: """If not the final action, print out observation.""" # if observation_prefix is not None: # self.callback(f"\n\n{observation_prefix}") # self.callback(output) # if llm_prefix is not None: # self.callback(f"\n\n{llm_prefix}") if observation_prefix is not None: logging.info(observation_prefix) self.callback(output) if llm_prefix is not None: logging.info(llm_prefix) def on_agent_finish( self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any ) -> None: # self.callback(f"{finish.log}\n\n") logging.info(finish.log) def on_llm_new_token(self, token: str, **kwargs: Any) -> None: """Run on new LLM token. Only available when streaming is enabled.""" self.callback(token) def on_chat_model_start( self, serialized: Dict[str, Any], messages: List[List[BaseMessage]], **kwargs: Any, ) -> Any: """Run when a chat model starts running.""" pass class ModelType(Enum): Unknown = -1 OpenAI = 0 ChatGLM = 1 LLaMA = 2 XMChat = 3 StableLM = 4 MOSS = 5 YuanAI = 6 Minimax = 7 ChuanhuAgent = 8 GooglePaLM = 9 LangchainChat = 10 Midjourney = 11 Spark = 12 OpenAIInstruct = 13 Claude = 14 Qwen = 15 OpenAIVision = 16 ERNIE = 17 DALLE3 = 18 GoogleGemini = 19 GoogleGemma = 20 Ollama = 21 @classmethod def get_type(cls, model_name: str): model_type = None model_name_lower = model_name.lower() if "gpt" in model_name_lower: if "instruct" in model_name_lower: model_type = ModelType.OpenAIInstruct elif "vision" in model_name_lower: model_type = ModelType.OpenAIVision else: model_type = ModelType.OpenAI elif "chatglm" in model_name_lower: model_type = ModelType.ChatGLM elif "ollama" in model_name_lower: model_type = ModelType.Ollama elif "llama" in model_name_lower or "alpaca" in model_name_lower: model_type = ModelType.LLaMA elif "xmchat" in model_name_lower: model_type = ModelType.XMChat elif "stablelm" in model_name_lower: model_type = ModelType.StableLM elif "moss" in model_name_lower: model_type = ModelType.MOSS elif "yuanai" in model_name_lower: model_type = ModelType.YuanAI elif "minimax" in model_name_lower: model_type = ModelType.Minimax elif "川虎助理" in model_name_lower: model_type = ModelType.ChuanhuAgent elif "palm" in model_name_lower: model_type = ModelType.GooglePaLM elif "gemini" in model_name_lower: model_type = ModelType.GoogleGemini elif "midjourney" in model_name_lower: model_type = ModelType.Midjourney elif "azure" in model_name_lower or "api" in model_name_lower: model_type = ModelType.LangchainChat elif "星火大模型" in model_name_lower: model_type = ModelType.Spark elif "claude" in model_name_lower: model_type = ModelType.Claude elif "qwen" in model_name_lower: model_type = ModelType.Qwen elif "ernie" in model_name_lower: model_type = ModelType.ERNIE elif "dall" in model_name_lower: model_type = ModelType.DALLE3 elif "gemma" in model_name_lower: model_type = ModelType.GoogleGemma else: model_type = ModelType.LLaMA return model_type def download(repo_id, filename, retry=10): if os.path.exists("./models/downloaded_models.json"): with open("./models/downloaded_models.json", "r") as f: downloaded_models = json.load(f) if repo_id in downloaded_models: return downloaded_models[repo_id]["path"] else: downloaded_models = {} while retry > 0: try: model_path = hf_hub_download( repo_id=repo_id, filename=filename, cache_dir="models", resume_download=True, ) downloaded_models[repo_id] = {"path": model_path} with open("./models/downloaded_models.json", "w") as f: json.dump(downloaded_models, f) break except: print("Error downloading model, retrying...") retry -= 1 if retry == 0: raise Exception("Error downloading model, please try again later.") return model_path class BaseLLMModel: def __init__( self, model_name, system_prompt=INITIAL_SYSTEM_PROMPT, temperature=1.0, top_p=1.0, n_choices=1, stop="", max_generation_token=None, presence_penalty=0, frequency_penalty=0, logit_bias=None, user="", single_turn=False, ) -> None: self.history = [] self.all_token_counts = [] try: self.model_name = MODEL_METADATA[model_name]["model_name"] except: self.model_name = model_name self.model_type = ModelType.get_type(model_name) try: self.token_upper_limit = MODEL_METADATA[model_name]["token_limit"] except KeyError: self.token_upper_limit = DEFAULT_TOKEN_LIMIT self.interrupted = False self.system_prompt = system_prompt self.api_key = None self.need_api_key = False self.history_file_path = get_first_history_name(user) self.user_name = user self.chatbot = [] self.default_single_turn = single_turn self.default_temperature = temperature self.default_top_p = top_p self.default_n_choices = n_choices self.default_stop_sequence = stop self.default_max_generation_token = max_generation_token self.default_presence_penalty = presence_penalty self.default_frequency_penalty = frequency_penalty self.default_logit_bias = logit_bias self.default_user_identifier = user self.single_turn = single_turn self.temperature = temperature self.top_p = top_p self.n_choices = n_choices self.stop_sequence = stop self.max_generation_token = max_generation_token self.presence_penalty = presence_penalty self.frequency_penalty = frequency_penalty self.logit_bias = logit_bias self.user_identifier = user self.metadata = {} def get_answer_stream_iter(self): """Implement stream prediction. Conversations are stored in self.history, with the most recent question in OpenAI format. Should return a generator that yields the next word (str) in the answer. """ logging.warning( "Stream prediction is not implemented. Using at once prediction instead." ) response, _ = self.get_answer_at_once() yield response def get_answer_at_once(self): """predict at once, need to be implemented conversations are stored in self.history, with the most recent question, in OpenAI format Should return: the answer (str) total token count (int) """ logging.warning("at once predict not implemented, using stream predict instead") response_iter = self.get_answer_stream_iter() count = 0 for response in response_iter: count += 1 return response, sum(self.all_token_counts) + count def billing_info(self): """get billing infomation, inplement if needed""" # logging.warning("billing info not implemented, using default") return BILLING_NOT_APPLICABLE_MSG def count_token(self, user_input): """get token count from input, implement if needed""" # logging.warning("token count not implemented, using default") return len(user_input) def stream_next_chatbot(self, inputs, chatbot, fake_input=None, display_append=""): def get_return_value(): return chatbot, status_text status_text = i18n("开始实时传输回答……") if fake_input: chatbot.append((fake_input, "")) else: chatbot.append((inputs, "")) user_token_count = self.count_token(inputs) self.all_token_counts.append(user_token_count) logging.debug(f"输入token计数: {user_token_count}") stream_iter = self.get_answer_stream_iter() if display_append: display_append = ( '\n\n