Spaces:
Running
on
Zero
Running
on
Zero
try: | |
import spaces | |
def maybe_spaces_gpu(fn): | |
fn = spaces.GPU(fn) | |
return fn | |
except ModuleNotFoundError: | |
print(f'Cannot import hf `spaces` with `import spaces`.') | |
def maybe_spaces_gpu(fn): | |
return fn | |
import os | |
from gradio.themes import ThemeClass as Theme | |
import numpy as np | |
import argparse | |
import gradio as gr | |
from typing import Any, Iterator | |
from typing import Iterator, List, Optional, Tuple | |
import filelock | |
import glob | |
import json | |
import time | |
from gradio.routes import Request | |
from gradio.utils import SyncToAsyncIterator, async_iteration | |
from gradio.helpers import special_args | |
import anyio | |
from typing import AsyncGenerator, Callable, Literal, Union, cast, Generator | |
from gradio_client.documentation import document, set_documentation_group | |
from gradio.components import Button, Component | |
from gradio.events import Dependency, EventListenerMethod | |
from typing import List, Optional, Union, Dict, Tuple | |
from tqdm.auto import tqdm | |
from huggingface_hub import snapshot_download | |
import inspect | |
from typing import AsyncGenerator, Callable, Literal, Union, cast | |
import anyio | |
from gradio_client import utils as client_utils | |
from gradio_client.documentation import document | |
from gradio.blocks import Blocks | |
from gradio.components import ( | |
Button, | |
Chatbot, | |
Component, | |
Markdown, | |
State, | |
Textbox, | |
get_component_instance, | |
) | |
from gradio.events import Dependency, on | |
from gradio.helpers import create_examples as Examples # noqa: N812 | |
from gradio.helpers import special_args | |
from gradio.layouts import Accordion, Group, Row | |
from gradio.routes import Request | |
from gradio.themes import ThemeClass as Theme | |
from gradio.utils import SyncToAsyncIterator, async_iteration | |
from .base_demo import register_demo, get_demo_class, BaseDemo | |
from ..configs import ( | |
SYSTEM_PROMPT, | |
MODEL_NAME, | |
MAX_TOKENS, | |
TEMPERATURE, | |
USE_PANEL, | |
CHATBOT_HEIGHT, | |
) | |
from ..globals import MODEL_ENGINE | |
from .chat_interface import ( | |
CHAT_EXAMPLES, | |
DATETIME_FORMAT, | |
gradio_history_to_conversation_prompt, | |
gradio_history_to_openai_conversations, | |
get_datetime_string, | |
format_conversation, | |
chat_response_stream_multiturn_engine, | |
CustomizedChatInterface, | |
ChatInterfaceDemo | |
) | |
from .langchain_web_search import ( | |
AnyEnginePipeline, | |
ChatAnyEnginePipeline, | |
create_web_search_engine, | |
) | |
web_search_llm = None | |
web_search_chat_model = None | |
web_search_engine = None | |
web_search_agent = None | |
def chat_web_search_response_stream_multiturn_engine( | |
message: str, | |
history: List[Tuple[str, str]], | |
temperature: float, | |
max_tokens: int, | |
system_prompt: Optional[str] = SYSTEM_PROMPT, | |
): | |
# global web_search_engine, web_search_llm, web_search_chat_model, web_search_agent, MODEL_ENGINE | |
# global web_search_llm, web_search_chat_model, agent_executor, MODEL_ENGINE | |
global MODEL_ENGINE | |
web_search_llm, web_search_chat_model, agent_executor = create_web_search_engine(model_engine=MODEL_ENGINE) | |
temperature = float(temperature) | |
# ! remove frequency_penalty | |
# frequency_penalty = float(frequency_penalty) | |
max_tokens = int(max_tokens) | |
message = message.strip() | |
if len(message) == 0: | |
raise gr.Error("The message cannot be empty!") | |
print(f'Begin agent_invoke.') | |
response_output = agent_executor.invoke({"input": message}) | |
response = response_output['output'] | |
full_prompt = gradio_history_to_conversation_prompt(message.strip(), history=history, system_prompt=system_prompt) | |
num_tokens = len(MODEL_ENGINE.tokenizer.encode(full_prompt)) | |
yield response, num_tokens | |
# # ! skip safety | |
# if DATETIME_FORMAT in system_prompt: | |
# # ! This sometime works sometimes dont | |
# system_prompt = system_prompt.format(cur_datetime=get_datetime_string()) | |
# full_prompt = gradio_history_to_conversation_prompt(message.strip(), history=history, system_prompt=system_prompt) | |
# # ! length checked | |
# num_tokens = len(MODEL_ENGINE.tokenizer.encode(full_prompt)) | |
# if num_tokens >= MODEL_ENGINE.max_position_embeddings - 128: | |
# raise gr.Error(f"Conversation or prompt is too long ({num_tokens} toks), please clear the chatbox or try shorter input.") | |
# print(full_prompt) | |
# outputs = None | |
# response = None | |
# num_tokens = -1 | |
# for j, outputs in enumerate(MODEL_ENGINE.generate_yield_string( | |
# prompt=full_prompt, | |
# temperature=temperature, | |
# max_tokens=max_tokens, | |
# )): | |
# if isinstance(outputs, tuple): | |
# response, num_tokens = outputs | |
# else: | |
# response, num_tokens = outputs, -1 | |
# yield response, num_tokens | |
# print(format_conversation(history + [[message, response]])) | |
# if response is not None: | |
# yield response, num_tokens | |
class WebSearchChatInterfaceDemo(BaseDemo): | |
def tab_name(self): | |
return "Web Search" | |
def create_demo( | |
self, | |
title: str | None = None, | |
description: str | None = None, | |
**kwargs | |
) -> gr.Blocks: | |
# global web_search_llm, web_search_chat_model, agent_executor | |
system_prompt = kwargs.get("system_prompt", SYSTEM_PROMPT) | |
max_tokens = kwargs.get("max_tokens", MAX_TOKENS) | |
temperature = kwargs.get("temperature", TEMPERATURE) | |
model_name = kwargs.get("model_name", MODEL_NAME) | |
# frequence_penalty = FREQUENCE_PENALTY | |
# presence_penalty = PRESENCE_PENALTY | |
# create_web_search_engine() | |
description = description or "At the moment, Web search is only **SINGLE TURN**, only works well in **English** and may respond unnaturally!" | |
# web_search_llm, web_search_chat_model, agent_executor = create_web_search_engine() | |
demo_chat = CustomizedChatInterface( | |
chat_web_search_response_stream_multiturn_engine, | |
chatbot=gr.Chatbot( | |
label=model_name, | |
bubble_full_width=False, | |
latex_delimiters=[ | |
# { "left": "$", "right": "$", "display": False}, | |
{ "left": "$$", "right": "$$", "display": True}, | |
], | |
show_copy_button=True, | |
layout="panel" if USE_PANEL else "bubble", | |
height=CHATBOT_HEIGHT, | |
), | |
textbox=gr.Textbox(placeholder='Type message', lines=1, max_lines=128, min_width=200, scale=8), | |
submit_btn=gr.Button(value='Submit', variant="primary", scale=0), | |
title=title, | |
description=description, | |
additional_inputs=[ | |
gr.Number(value=temperature, label='Temperature (higher -> more random)'), | |
gr.Number(value=max_tokens, label='Max generated tokens (increase if want more generation)'), | |
# gr.Number(value=frequence_penalty, label='Frequency penalty (> 0 encourage new tokens over repeated tokens)'), | |
# gr.Number(value=presence_penalty, label='Presence penalty (> 0 encourage new tokens, < 0 encourage existing tokens)'), | |
gr.Textbox(value=system_prompt, label='System prompt', lines=4, interactive=False) | |
], | |
examples=[ | |
["What is Langchain?"], | |
["Give me latest news about Lawrence Wong."], | |
['What did Jerome Powell say today?'], | |
['What is the best model on the LMSys leaderboard?'], | |
['Where does Messi play right now?'], | |
], | |
# ] + CHAT_EXAMPLES, | |
cache_examples=False | |
) | |
return demo_chat | |
""" | |
run | |
export BACKEND=mlx | |
export DEMOS=WebSearchChatInterfaceDemo | |
python app.py | |
""" | |